home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2011 January / maximum-cd-2011-01.iso / DiscContents / calibre-0.7.26.msi / file_3992 < prev    next >
Encoding:
Text File  |  2010-09-30  |  4.4 KB  |  120 lines

  1. #!/usr/bin/env  python
  2. __license__   = 'GPL v3'
  3. __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
  4. __docformat__ = 'restructuredtext en'
  5.  
  6. '''
  7. espn.com
  8. '''
  9. import re
  10. from calibre.web.feeds.news import BasicNewsRecipe
  11. from calibre.ptempfile import TemporaryFile
  12.  
  13. class ESPN(BasicNewsRecipe):
  14.  
  15.     title       = 'ESPN'
  16.     description = 'Sports news'
  17.     __author__  = 'Kovid Goyal and Sujata Raman'
  18.     language = 'en'
  19.     no_stylesheets = True
  20.  
  21.     use_embedded_content = False
  22.     remove_javascript     = True
  23.     needs_subscription = True
  24.     encoding= 'ISO-8859-1'
  25.  
  26.     remove_tags_before = dict(name='font', attrs={'class':'date'})
  27.     center_navbar = False
  28.     remove_tags = [
  29.                     dict(name='font', attrs={'class':'footer'}), dict(name='hr', noshade='noshade'),
  30.                     dict(name = 'img', src ='/winnercomm/horseracing/DRF.jpg')
  31.                    ]
  32.  
  33.  
  34.     extra_css = '''
  35.                 body{font-family:Verdana,Arial,Helvetica,sans-serif; font-size:x-small; font-weight:normal;}
  36.                 .subhead{color:#666666;font-family:Verdana,sans-serif; font-size:x-small; font-weight:bold;}
  37.                 .clearfix{font-family:Verdana,sans-serif; font-size:xx-small; }
  38.                 .date{ font-family:Verdana,Arial,Helvetica,sans-serif ; font-size:xx-small;color:#7A7A7A;}
  39.                 .byline{ font-family:Verdana,Arial,Helvetica,sans-serif ; font-size:xx-small;color:#666666;}
  40.                 .headline{font-family:Verdana,Arial,Helvetica,sans-serif ; font-size:large; font-weight:bold;}
  41.                 '''
  42.  
  43.  
  44.     feeds = [('Top Headlines', 'http://sports.espn.go.com/espn/rss/news'),
  45.              'http://sports.espn.go.com/espn/rss/nfl/news',
  46.              'http://sports.espn.go.com/espn/rss/nba/news',
  47.              'http://sports.espn.go.com/espn/rss/mlb/news',
  48.              'http://sports.espn.go.com/espn/rss/nhl/news',
  49.              'http://sports.espn.go.com/espn/rss/golf/news',
  50.              'http://sports.espn.go.com/espn/rss/rpm/news',
  51.              'http://sports.espn.go.com/espn/rss/tennis/news',
  52.              'http://sports.espn.go.com/espn/rss/boxing/news',
  53.              'http://soccernet.espn.go.com/rss/news',
  54.              'http://sports.espn.go.com/espn/rss/ncb/news',
  55.              'http://sports.espn.go.com/espn/rss/ncf/news',
  56.              'http://sports.espn.go.com/espn/rss/ncaa/news',
  57.              'http://sports.espn.go.com/espn/rss/outdoors/news',
  58.              #'http://sports.espn.go.com/espn/rss/bassmaster/news',
  59.              'http://sports.espn.go.com/espn/rss/oly/news',
  60.              'http://sports.espn.go.com/espn/rss/horse/news'
  61.              ]
  62.  
  63.  
  64.     def preprocess_html(self, soup):
  65.         for div in soup.findAll('div'):
  66.             if div.has_key('style') and 'px' in div['style']:
  67.                 div['style'] = ''
  68.  
  69.         return soup
  70.  
  71.     def postprocess_html(self, soup, first_fetch):
  72.         for div in soup.findAll('div', style=True):
  73.             div['style'] = div['style'].replace('center', 'left')
  74.  
  75.         return soup
  76.  
  77.  
  78.  
  79.     def get_browser(self):
  80.         br = BasicNewsRecipe.get_browser()
  81.         br.set_handle_refresh(False)
  82.         url = ('https://r.espn.go.com/members/v3_1/login')
  83.         raw = br.open(url).read()
  84.         raw = re.sub(r'(?s)<form>.*?id="regsigninbtn".*?</form>', '', raw)
  85.         with TemporaryFile(suffix='.htm') as fname:
  86.             with open(fname, 'wb') as f:
  87.                 f.write(raw)
  88.             br.open_local_file(fname)
  89.  
  90.         br.form = br.forms().next()
  91.         br.form.find_control(name='username', type='text').value = self.username
  92.         br.form['password'] = self.password
  93.         br.submit().read()
  94.         br.open('http://espn.go.com').read()
  95.         br.set_handle_refresh(True)
  96.         return br
  97.  
  98.     def get_article_url(self, article):
  99.         return article.get('guid',  None)
  100.  
  101.     def print_version(self, url):
  102.  
  103.         if 'eticket' in url:
  104.             return url.partition('&')[0].replace('story?', 'print?')
  105.         match = re.search(r'story\?(id=\d+)', url)
  106.  
  107.         if match and 'soccernet'  not in url and 'bassmaster' not in url:
  108.             return 'http://sports.espn.go.com/espn/print?'+match.group(1)+'&type=story'
  109.         else:
  110.             if match and 'soccernet' in url:
  111.                 splitlist = url.split("&", 5)
  112.                 newurl =  'http://soccernet.espn.go.com/print?'+match.group(1)+'&type=story' + '&' + str(splitlist[2] )
  113.                 return newurl
  114.             #else:
  115.             #    if 'bassmaster' in url:
  116.             #        return url
  117.  
  118.         return None
  119.  
  120.