home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_3789 < prev    next >
Encoding:
Text File  |  2009-11-23  |  2.5 KB  |  69 lines

  1. #!/usr/bin/env  python
  2.  
  3. __license__   = 'GPL v3'
  4. __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
  5. '''
  6. harpers.org
  7. '''
  8. from calibre.web.feeds.news import BasicNewsRecipe
  9. from calibre.ebooks.BeautifulSoup import Tag
  10.  
  11. class Harpers(BasicNewsRecipe):
  12.     title                 = u"Harper's Magazine"
  13.     __author__            = u'Darko Miletic'
  14.     language = 'en'
  15.  
  16.     description           = u"Harper's Magazine: Founded June 1850."
  17.     publisher             = "Harper's Magazine "
  18.     category              = 'news, politics, USA'
  19.     oldest_article        = 30
  20.     max_articles_per_feed = 100
  21.     no_stylesheets        = True
  22.     use_embedded_content  = False
  23.  
  24.     html2lrf_options = [
  25.                           '--comment', description
  26.                         , '--category', category
  27.                         , '--publisher', publisher
  28.                         ]
  29.  
  30.     html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
  31.  
  32.     extra_css = '''
  33.                 h1{ font-family:georgia ; color:#111111; font-size:large;}                
  34.                 .box-of-helpful{ font-family:arial ; font-size:x-small;}
  35.                 p{font-family:georgia ;}
  36.                 .caption{font-family:Verdana,sans-serif;font-size:x-small;color:#666666;}                                
  37.                 '''
  38.               
  39.     keep_only_tags = [ dict(name='div', attrs={'id':'cached'}) ]
  40.     remove_tags = [
  41.                      dict(name='table', attrs={'class':['rcnt','rcnt topline']})
  42.                     ,dict(name=['link','object','embed'])
  43.                   ]
  44.  
  45.     feeds       = [(u"Harper's Magazine", u'http://www.harpers.org/rss/frontpage-rss20.xml')]
  46.  
  47.     def get_cover_url(self):
  48.         cover_url = None
  49.         index = 'http://harpers.org/'
  50.         soup = self.index_to_soup(index)
  51.         link_item = soup.find(name = 'img',attrs= {'class':"cover"})
  52.         print link_item
  53.         if link_item:
  54.            cover_url = 'http://harpers.org' + link_item['src'] 
  55.         print cover_url   
  56.         return cover_url
  57.     
  58.     def preprocess_html(self, soup):
  59.         mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
  60.         soup.head.insert(1,mcharset)
  61.         for item in soup.findAll(style=True):
  62.             del item['style']
  63.         for item in soup.findAll(xmlns=True):
  64.             del item['xmlns']
  65.         return soup
  66.  
  67.     
  68.  
  69.