home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_3744 < prev    next >
Encoding:
Text File  |  2009-12-10  |  4.2 KB  |  112 lines

  1. #!/usr/bin/env  python
  2. __license__   = 'GPL v3'
  3. __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
  4. __docformat__ = 'restructuredtext en'
  5.  
  6. '''
  7. espn.com
  8. '''
  9. import re
  10. from calibre.web.feeds.news import BasicNewsRecipe
  11.  
  12. class ESPN(BasicNewsRecipe):
  13.  
  14.     title       = 'ESPN'
  15.     description = 'Sports news'
  16.     __author__  = 'Kovid Goyal and Sujata Raman'
  17.     language = 'en'
  18.     no_stylesheets = True
  19.  
  20.     use_embedded_content = False
  21.     remove_javascript     = True
  22.     needs_subscription = True
  23.     encoding= 'ISO-8859-1'
  24.  
  25.     remove_tags_before = dict(name='font', attrs={'class':'date'})
  26.     center_navbar = False
  27.     remove_tags = [
  28.                     dict(name='font', attrs={'class':'footer'}), dict(name='hr', noshade='noshade'),
  29.                     dict(name = 'img', src ='/winnercomm/horseracing/DRF.jpg')
  30.                    ]
  31.  
  32.  
  33.     extra_css = '''
  34.                 body{font-family:Verdana,Arial,Helvetica,sans-serif; font-size:x-small; font-weight:normal;}
  35.                 .subhead{color:#666666;font-family:Verdana,sans-serif; font-size:x-small; font-weight:bold;}
  36.                 .clearfix{font-family:Verdana,sans-serif; font-size:xx-small; }
  37.                 .date{ font-family:Verdana,Arial,Helvetica,sans-serif ; font-size:xx-small;color:#7A7A7A;}
  38.                 .byline{ font-family:Verdana,Arial,Helvetica,sans-serif ; font-size:xx-small;color:#666666;}
  39.                 .headline{font-family:Verdana,Arial,Helvetica,sans-serif ; font-size:large; font-weight:bold;}
  40.                 '''
  41.  
  42.  
  43.     feeds = [('Top Headlines', 'http://sports.espn.go.com/espn/rss/news'),
  44.              'http://sports.espn.go.com/espn/rss/nfl/news',
  45.              'http://sports.espn.go.com/espn/rss/nba/news',
  46.              'http://sports.espn.go.com/espn/rss/mlb/news',
  47.              'http://sports.espn.go.com/espn/rss/nhl/news',
  48.              'http://sports.espn.go.com/espn/rss/golf/news',
  49.              'http://sports.espn.go.com/espn/rss/rpm/news',
  50.              'http://sports.espn.go.com/espn/rss/tennis/news',
  51.              'http://sports.espn.go.com/espn/rss/boxing/news',
  52.              'http://soccernet.espn.go.com/rss/news',
  53.              'http://sports.espn.go.com/espn/rss/ncb/news',
  54.              'http://sports.espn.go.com/espn/rss/ncf/news',
  55.              'http://sports.espn.go.com/espn/rss/ncaa/news',
  56.              'http://sports.espn.go.com/espn/rss/outdoors/news',
  57.              #'http://sports.espn.go.com/espn/rss/bassmaster/news',
  58.              'http://sports.espn.go.com/espn/rss/oly/news',
  59.              'http://sports.espn.go.com/espn/rss/horse/news'
  60.              ]
  61.  
  62.  
  63.     def preprocess_html(self, soup):
  64.         for div in soup.findAll('div'):
  65.             if div.has_key('style') and 'px' in div['style']:
  66.                 div['style'] = ''
  67.  
  68.         return soup
  69.  
  70.     def postprocess_html(self, soup, first_fetch):
  71.         for div in soup.findAll('div', style=True):
  72.             div['style'] = div['style'].replace('center', 'left')
  73.  
  74.         return soup
  75.  
  76.  
  77.  
  78.     def get_browser(self):
  79.         br = BasicNewsRecipe.get_browser()
  80.         br.set_handle_refresh(False)
  81.         if self.username is not None and self.password is not None:
  82.             br.open('http://espn.com')#('http://espn.go.com/#myespn')
  83.             br.select_form(nr=1)
  84.             br.form.find_control(name='username', type='text').value = self.username
  85.             br.form['password'] = self.password
  86.             br.submit()
  87.         br.set_handle_refresh(True)
  88.         return br
  89.  
  90.     def get_article_url(self, article):
  91.         return article.get('guid',  None)
  92.  
  93.     def print_version(self, url):
  94.  
  95.         if 'eticket' in url:
  96.             return url.partition('&')[0].replace('story?', 'print?')
  97.         match = re.search(r'story\?(id=\d+)', url)
  98.  
  99.         if match and 'soccernet'  not in url and 'bassmaster' not in url:
  100.             return 'http://sports.espn.go.com/espn/print?'+match.group(1)+'&type=story'
  101.         else:
  102.             if match and 'soccernet' in url:
  103.                 splitlist = url.split("&", 5)
  104.                 newurl =  'http://soccernet.espn.go.com/print?'+match.group(1)+'&type=story' + '&' + str(splitlist[2] )
  105.                 return newurl
  106.             #else:
  107.             #    if 'bassmaster' in url:
  108.             #        return url
  109.  
  110.         return None
  111.  
  112.