home *** CD-ROM | disk | FTP | other *** search
- #!/usr/bin/env python
-
- __license__ = 'GPL v3'
- '''
- sciencenews.org
- '''
- from calibre.web.feeds.news import BasicNewsRecipe
-
- class ScienceNewsIssue(BasicNewsRecipe):
- title = u'Science News Recent Issues'
- __author__ = u'Darko Miletic, Sujata Raman and Starson17'
- description = u'''Science News is an award-winning weekly
- newsmagazine covering the most important research in all fields of science.
- Its 16 pages each week are packed with short, accurate articles that appeal
- to both general readers and scientists. Published since 1922, the magazine
- now reaches about 150,000 subscribers and more than 1 million readers.
- These are the latest News Items from Science News. This recipe downloads
- the last 30 days worth of articles.'''
- category = u'Science, Technology, News'
- publisher = u'Society for Science & the Public'
- oldest_article = 30
- language = 'en'
- max_articles_per_feed = 100
- no_stylesheets = True
- use_embedded_content = False
- timefmt = ' [%A, %d %B, %Y]'
- recursions = 1
- remove_attributes = ['style']
-
- conversion_options = {'linearize_tables' : True
- , 'comment' : description
- , 'tags' : category
- , 'publisher' : publisher
- , 'language' : language
- }
-
- extra_css = '''
- .content_description{font-family:georgia ;font-size:x-large; color:#646464 ; font-weight:bold;}
- .content_summary{font-family:georgia ;font-size:small ;color:#585858 ; font-weight:bold;}
- .content_authors{font-family:helvetica,arial ;font-size: xx-small ;color:#14487E ;}
- .content_edition{font-family:helvetica,arial ;font-size: xx-small ;}
- .exclusive{color:#FF0000 ;}
- .anonymous{color:#14487E ;}
- .content_content{font-family:helvetica,arial ;font-size: medium ; color:#000000;}
- .description{color:#585858;font-family:helvetica,arial ;font-size: large ;}
- .credit{color:#A6A6A6;font-family:helvetica,arial ;font-size: xx-small ;}
- '''
-
- keep_only_tags = [ dict(name='div', attrs={'id':'column_action'}) ]
- remove_tags_after = dict(name='ul', attrs={'id':'content_functions_bottom'})
- remove_tags = [
- dict(name='ul', attrs={'id':'content_functions_bottom'})
- ,dict(name='div', attrs={'id':['content_functions_top','breadcrumb_content']})
- ,dict(name='img', attrs={'class':'icon'})
- ,dict(name='div', attrs={'class': 'embiggen'})
- ]
-
- feeds = [(u"Science News Current Issues", u'http://www.sciencenews.org/view/feed/type/edition/name/issues.rss')]
-
- match_regexps = [
- r'www.sciencenews.org/view/feature/id/',
- r'www.sciencenews.org/view/generic/id'
- ]
-
- def get_cover_url(self):
- cover_url = None
- index = 'http://www.sciencenews.org/view/home'
- soup = self.index_to_soup(index)
- link_item = soup.find(name = 'img',alt = "issue")
- if link_item:
- cover_url = 'http://www.sciencenews.org' + link_item['src'] + '.jpg'
-
- return cover_url
-
- def preprocess_html(self, soup):
- for tag in soup.findAll(name=['span']):
- tag.name = 'div'
- return soup
-