home *** CD-ROM | disk | FTP | other *** search
- __license__ = 'GPL v3'
- __copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
- '''
- pressthink.org
- '''
-
- from calibre.web.feeds.recipes import BasicNewsRecipe
-
- class PressThink(BasicNewsRecipe):
- title = 'PressThink'
- __author__ = 'Darko Miletic'
- description = 'Ghost of democracy in the media machine'
- oldest_article = 60
- max_articles_per_feed = 100
- no_stylesheets = True
- use_embedded_content = False
- encoding = 'utf8'
- publisher = 'Arthur L. Carter Journalism Institute'
- category = 'news, USA, world, economy, politics, media'
- language = 'en'
- publication_type = 'blog'
- extra_css = """
- body{ font-family: Helvetica,Arial,sans-serif }
- img{display: block; margin-bottom: 0.5em}
- h6{font-size: 1.1em; font-weight: bold}
- .post-author{font-family: Georgia,serif}
- .post-title{color: #AB0000}
- .says{color: gray}
- .comment {
- border-bottom: 1px dotted #555555;
- border-top: 1px dotted #DDDDDD;
- margin-left: 10px;
- min-height: 100px;
- padding: 15px 0 20px;
- }
- """
-
- conversion_options = {
- 'comments' : description
- ,'tags' : category
- ,'language' : language
- ,'publisher': publisher
- }
-
- remove_tags = [dict(name=['form','iframe','embed','object','link','base','table','meta'])]
- keep_only_tags = [dict(attrs={'class':['post-title','post-author','entry','postmetadata alt','commentlist']})]
-
- feeds = [(u'Articles', u'http://pressthink.org/feed/')]
-
- def preprocess_html(self, soup):
- for item in soup.findAll(style=True):
- del item['style']
- for item in soup.findAll('img', alt=False):
- item['alt'] = 'image'
- for alink in soup.findAll('a'):
- if alink.string is not None:
- tstr = alink.string
- alink.replaceWith(tstr)
- return soup
-
-
-