home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_3935 < prev    next >
Encoding:
Text File  |  2010-04-07  |  1.6 KB  |  51 lines

  1. #!/usr/bin/env  python
  2.  
  3. __license__   = 'GPL v3'
  4. __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
  5. '''
  6. newyorker.com
  7. '''
  8.  
  9. from calibre.web.feeds.news import BasicNewsRecipe
  10. from calibre.ebooks.BeautifulSoup import Tag
  11.  
  12. class NewYorker(BasicNewsRecipe):
  13.     title                 = 'The New Yorker'
  14.     __author__            = 'Darko Miletic'
  15.     description           = 'The best of US journalism'
  16.     oldest_article        = 15
  17.     language = 'en'
  18.  
  19.     max_articles_per_feed = 100
  20.     no_stylesheets        = True
  21.     use_embedded_content  = False
  22.     publisher             = 'Conde Nast Publications'
  23.     category              = 'news, politics, USA'
  24.     encoding              = 'cp1252'
  25.  
  26.     keep_only_tags = [dict(name='div', attrs={'id':'printbody'})]
  27.     remove_tags_after = dict(name='div',attrs={'id':'articlebody'})
  28.     remove_tags = [
  29.                      dict(name='div', attrs={'class':['utils','articleRailLinks','icons'] })
  30.                     ,dict(name='link')
  31.                   ]
  32.  
  33.     feeds          = [(u'The New Yorker', u'http://feeds.newyorker.com/services/rss/feeds/everything.xml')]
  34.  
  35.     def print_version(self, url):
  36.         return url + '?printable=true'
  37.  
  38.     def get_article_url(self, article):
  39.         return article.get('guid',  None)
  40.  
  41.     def postprocess_html(self, soup, x):
  42.         body = soup.find('body')
  43.         if body:
  44.             html = soup.find('html')
  45.             if html:
  46.                 body.extract()
  47.                 html.insert(2, body)
  48.         mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
  49.         soup.head.insert(1,mcharset)
  50.         return soup
  51.