home *** CD-ROM | disk | FTP | other *** search
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- __license__ = 'GPL v3'
- __copyright__ = '2011, gagsays <gagsays at gmail dot com>'
- '''
- nationalgeographic.com
- '''
- from calibre.web.feeds.news import BasicNewsRecipe
- class NatGeo(BasicNewsRecipe):
- title = u'National Geographic'
- description = 'Daily news articles from The National Geographic'
- language = 'en'
- oldest_article = 20
- max_articles_per_feed = 25
- encoding = 'utf8'
- publisher = 'nationalgeographic.com'
- category = 'science, nat geo'
- __author__ = 'gagsays'
- masthead_url = 'http://s.ngeo.com/wpf/sites/themes/global/i/presentation/ng_logo_small.png'
- description = 'Inspiring people to care about the planet since 1888'
- timefmt = ' [%a, %d %b, %Y]'
- no_stylesheets = True
- use_embedded_content = False
-
- extra_css = '''
- body {color: #000000;font-size: medium;}
- h1 {color: #222222; font-size: large; font-weight:lighter; text-decoration:none; text-align: center;font-family:Georgia,Times New Roman,Times,serif;}
- h2 {color: #454545; font-size: small; font-weight:lighter; text-decoration:none; text-align: justify; font-style:italic;font-family :Georgia,Times New Roman,Times,serif;}
- h3 {color: #555555; font-size: small; font-style:italic; margin-top: 10px;}
- img{margin-bottom: 0.25em;display:block;margin-left: auto;margin-right: auto;}
- a:link,a,.a,href {text-decoration: none;color: #000000;}
- .caption{color: #000000;font-size: xx-small;text-align: justify;font-weight:normal;}
- .credit{color: #555555;font-size: xx-small;text-align: left;font-weight:lighter;}
- p.author,p.publication{color: #000000;font-size: xx-small;text-align: left;display:inline;}
- p.publication_time{color: #000000;font-size: xx-small;text-align: right;text-decoration: underline;}
- p {margin-bottom: 0;}
- p + p {text-indent: 1.5em;margin-top: 0;}
- .hidden{display:none;}
- #page_head{text-transform:uppercase;}
- '''
-
- def parse_feeds (self):
- feeds = BasicNewsRecipe.parse_feeds(self)
- for feed in feeds:
- for article in feed.articles[:]:
- if 'Presented' in article.title or 'Pictures' in article.title:
- feed.articles.remove(article)
- return feeds
-
- def preprocess_html(self, soup):
- for alink in soup.findAll('a'):
- if alink.string is not None:
- tstr = alink.string
- alink.replaceWith(tstr)
- return soup
-
- remove_tags_before = dict(id='page_head')
- keep_only_tags = [
- dict(name='div',attrs={'id':['page_head','content_mainA']})
- ]
- remove_tags_after = [
- dict(name='div',attrs={'class':['article_text','promo_collection']})
- ]
- remove_tags = [
- dict(name='div', attrs={'class':['aside','primary full_width']})
- ,dict(name='div', attrs={'id':['header_search','navigation_mainB_wrap']})
- ]
- feeds = [
- (u'Daily News', u'http://feeds.nationalgeographic.com/ng/News/News_Main')
- ]
-
-