home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_4131 < prev    next >
Encoding:
Text File  |  2010-01-29  |  2.2 KB  |  56 lines

  1. import re
  2. from calibre.web.feeds.news import BasicNewsRecipe
  3.  
  4.  
  5. class WashingtonPost(BasicNewsRecipe):
  6.  
  7.     title = 'Washington Post'
  8.     description = 'US political news'
  9.     __author__ = 'Kovid Goyal and Sujata Raman'
  10.     use_embedded_content   = False
  11.     max_articles_per_feed = 20
  12.     language = 'en'
  13.  
  14.  
  15.     remove_javascript = True
  16.     no_stylesheets = True
  17.  
  18.     extra_css       = '''
  19.                         #articleCopyright { font-family:Arial,helvetica,sans-serif ; font-weight:bold ; font-size:x-small ;}
  20.                         p { font-family:"Times New Roman",times,serif ; font-weight:normal ; font-size:small ;}
  21.                         body{font-family:arial,helvetica,sans-serif}
  22.                             '''
  23.  
  24.     feeds = [    ('Today\'s Highlights', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/03/24/LI2005032400102.xml'),
  25.                   ('Politics', 'http://www.washingtonpost.com/wp-dyn/rss/politics/index.xml'),
  26.                   ('Nation', 'http://www.washingtonpost.com/wp-dyn/rss/nation/index.xml'),
  27.                   ('World', 'http://www.washingtonpost.com/wp-dyn/rss/world/index.xml'),
  28.                   ('Business', 'http://www.washingtonpost.com/wp-dyn/rss/business/index.xml'),
  29.                   ('Technology', 'http://www.washingtonpost.com/wp-dyn/rss/technology/index.xml'),
  30.                   ('Health', 'http://www.washingtonpost.com/wp-dyn/rss/health/index.xml'),
  31.                   ('Education', 'http://www.washingtonpost.com/wp-dyn/rss/education/index.xml'),
  32.                   ('Editorials', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/05/30/LI2005053000331.xml'),
  33.          ]
  34.  
  35.     remove_tags = [{'id':['pfmnav', 'ArticleCommentsWrapper']}]
  36.  
  37.  
  38.     def get_article_url(self, article):
  39.         return article.get('guid', article.get('link', None))
  40.  
  41.     def print_version(self, url):
  42.         return url.rpartition('.')[0] + '_pf.html'
  43.  
  44.     def postprocess_html(self, soup, first):
  45.         for div in soup.findAll(name='div', style=re.compile('margin')):
  46.             div['style'] = ''
  47.         return soup
  48.  
  49.     def preprocess_html(self, soup):
  50.         for tag in soup.findAll('font'):
  51.             if tag.has_key('size'):
  52.                 if tag['size'] == '+2':
  53.                     if tag.b:
  54.                         return soup
  55.         return None
  56.