home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2011 January / maximum-cd-2011-01.iso / DiscContents / calibre-0.7.26.msi / file_4323 < prev    next >
Encoding:
Text File  |  2010-10-01  |  3.5 KB  |  79 lines

  1. #!/usr/bin/env  python
  2.  
  3. __license__   = 'GPL v3'
  4. '''
  5. sciencenews.org
  6. '''
  7. from calibre.web.feeds.news import BasicNewsRecipe
  8.  
  9. class ScienceNewsIssue(BasicNewsRecipe):
  10.     title                 = u'Science News Recent Issues'
  11.     __author__            = u'Darko Miletic, Sujata Raman and Starson17'
  12.     description           = u'''Science News is an award-winning weekly
  13.     newsmagazine covering the most important research in all fields of science.
  14.     Its 16 pages each week are packed with short, accurate articles that appeal
  15.     to both general readers and scientists. Published since 1922, the magazine
  16.     now reaches about 150,000 subscribers and more than 1 million readers.
  17.     These are the latest News Items from Science News. This recipe downloads
  18.     the last 30 days worth of articles.'''
  19.     category              = u'Science, Technology, News'
  20.     publisher             = u'Society for Science & the Public'
  21.     oldest_article        = 30
  22.     language = 'en'
  23.     max_articles_per_feed = 100
  24.     no_stylesheets        = True
  25.     use_embedded_content  = False
  26.     timefmt               = ' [%A, %d %B, %Y]'
  27.     recursions = 1
  28.     remove_attributes = ['style']
  29.  
  30.     conversion_options = {'linearize_tables'  : True
  31.                         , 'comment'           : description
  32.                         , 'tags'              : category
  33.                         , 'publisher'         : publisher
  34.                         , 'language'          : language
  35.                         }
  36.  
  37.     extra_css = '''
  38.                 .content_description{font-family:georgia ;font-size:x-large; color:#646464 ; font-weight:bold;}
  39.                 .content_summary{font-family:georgia ;font-size:small ;color:#585858 ; font-weight:bold;}
  40.                 .content_authors{font-family:helvetica,arial ;font-size: xx-small ;color:#14487E ;}
  41.                 .content_edition{font-family:helvetica,arial ;font-size: xx-small ;}
  42.                 .exclusive{color:#FF0000 ;}
  43.                 .anonymous{color:#14487E ;}
  44.                 .content_content{font-family:helvetica,arial ;font-size: medium ; color:#000000;}
  45.                 .description{color:#585858;font-family:helvetica,arial ;font-size: large ;}
  46.                 .credit{color:#A6A6A6;font-family:helvetica,arial ;font-size: xx-small ;}
  47.                 '''
  48.  
  49.     keep_only_tags = [ dict(name='div', attrs={'id':'column_action'}) ]
  50.     remove_tags_after = dict(name='ul', attrs={'id':'content_functions_bottom'})
  51.     remove_tags = [
  52.                      dict(name='ul', attrs={'id':'content_functions_bottom'})
  53.                     ,dict(name='div', attrs={'id':['content_functions_top','breadcrumb_content']})
  54.                     ,dict(name='img', attrs={'class':'icon'})
  55.                     ,dict(name='div', attrs={'class': 'embiggen'})
  56.                   ]
  57.  
  58.     feeds       = [(u"Science News Current Issues", u'http://www.sciencenews.org/view/feed/type/edition/name/issues.rss')]
  59.  
  60.     match_regexps = [
  61.             r'www.sciencenews.org/view/feature/id/',
  62.             r'www.sciencenews.org/view/generic/id'
  63.             ]
  64.  
  65.     def get_cover_url(self):
  66.         cover_url = None
  67.         index = 'http://www.sciencenews.org/view/home'
  68.         soup = self.index_to_soup(index)
  69.         link_item = soup.find(name = 'img',alt = "issue")
  70.         if link_item:
  71.            cover_url = 'http://www.sciencenews.org' + link_item['src'] + '.jpg'
  72.  
  73.         return cover_url
  74.  
  75.     def preprocess_html(self, soup):
  76.         for tag in soup.findAll(name=['span']):
  77.             tag.name = 'div'
  78.         return soup
  79.