home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_4024 < prev    next >
Encoding:
Text File  |  2009-11-18  |  2.8 KB  |  62 lines

  1. #!/usr/bin/env  python
  2.  
  3. __license__   = 'GPL v3'
  4. __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
  5. '''
  6. sciencenews.org
  7. '''
  8. from calibre.web.feeds.news import BasicNewsRecipe
  9.  
  10. class Sciencenews(BasicNewsRecipe):
  11.     title                 = u'ScienceNews'
  12.     __author__            = u'Darko Miletic and Sujata Raman'
  13.     description           = u"Science News is an award-winning weekly newsmagazine covering the most important research in all fields of science. Its 16 pages each week are packed with short, accurate articles that appeal to both general readers and scientists. Published since 1922, the magazine now reaches about 150,000 subscribers and more than 1 million readers. These are the latest News Items from Science News."
  14.     oldest_article        = 30
  15.     language = 'en'
  16.  
  17.     max_articles_per_feed = 100
  18.     no_stylesheets        = True
  19.     use_embedded_content  = False
  20.     timefmt               = ' [%A, %d %B, %Y]'
  21.  
  22.     extra_css = '''
  23.                 .content_description{font-family:georgia ;font-size:x-large; color:#646464 ; font-weight:bold;}
  24.                 .content_summary{font-family:georgia ;font-size:small ;color:#585858 ; font-weight:bold;}
  25.                 .content_authors{font-family:helvetica,arial ;font-size: xx-small ;color:#14487E ;}
  26.                 .content_edition{font-family:helvetica,arial ;font-size: xx-small ;}
  27.                 .exclusive{color:#FF0000 ;}
  28.                 .anonymous{color:#14487E ;}
  29.                 .content_content{font-family:helvetica,arial ;font-size: x-small ; color:#000000;}
  30.                 .description{color:#585858;font-family:helvetica,arial ;font-size: xx-small ;}
  31.                 .credit{color:#A6A6A6;font-family:helvetica,arial ;font-size: xx-small ;}
  32.                 '''
  33.  
  34.     keep_only_tags = [ dict(name='div', attrs={'id':'column_action'}) ]
  35.     remove_tags_after = dict(name='ul', attrs={'id':'content_functions_bottom'})
  36.     remove_tags = [
  37.                      dict(name='ul', attrs={'id':'content_functions_bottom'})
  38.                     ,dict(name='div', attrs={'id':['content_functions_top','breadcrumb_content']})
  39.                     ,dict(name='img', attrs={'class':'icon'})
  40.                     ,dict(name='div', attrs={'class': 'embiggen'})
  41.                   ]
  42.  
  43.     feeds       = [(u"Science News / News Items", u'http://sciencenews.org/view/feed/type/news/name/news.rss')]
  44.  
  45.     def get_cover_url(self):
  46.         cover_url = None
  47.         index = 'http://www.sciencenews.org/view/home'
  48.         soup = self.index_to_soup(index)
  49.         link_item = soup.find(name = 'img',alt = "issue")
  50.         print link_item
  51.         if link_item:
  52.            cover_url = 'http://www.sciencenews.org' + link_item['src'] + '.jpg'
  53.  
  54.         return cover_url
  55.  
  56.     def preprocess_html(self, soup):
  57.  
  58.             for tag in soup.findAll(name=['span']):
  59.                 tag.name = 'div'
  60.  
  61.             return soup
  62.