Maximum CD 2010 November

home *** CD-ROM | disk | FTP | other *** search

/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_3986 < prev next >

Wrap

Text File | 2009-12-17 | 3.2 KB | 89 lines

#!/usr/bin/env python # -*- coding: cp1252 -*- __license__ = 'GPL v3' __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>' ''' politico.com ''' import re, traceback from calibre.web.feeds.news import BasicNewsRecipe class Politico(BasicNewsRecipe): title = 'Politico' __author__ = 'Darko Miletic and Sujata Raman' description = 'Political news from USA' publisher = 'Capitol News Company, LLC' category = 'news, politics, USA' oldest_article = 7 max_articles_per_feed = 100 use_embedded_content = False no_stylesheets = True remove_javascript = True encoding = 'UTF-8' language = 'en' html2lrf_options = [ '--comment', description , '--category', category , '--publisher', publisher , '--ignore-tables' ] html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' remove_tags = [ dict(name=['notags','embed','object','link','img']), ] extra_css = ''' body{font-family:Arial,Sans-serif;} element.style{color:#FF0000;font-family:Arial,Sans-serif;} .author{color:#808080;font-size:x-small;} a{ color:#003399;} .byline{color:#696969 ; font-size:x-small;} .story{color:#000000;} td{color:#000000;} ''' feeds = [ (u'Top Stories' , u'http://www.politico.com/rss/politicopicks.xml' ) ,(u'Congress' , u'http://www.politico.com/rss/congress.xml' ) ,(u'Ideas' , u'http://www.politico.com/rss/ideas.xml' ) ,(u'Life' , u'http://www.politico.com/rss/life.xml' ) ,(u'Lobbyists' , u'http://www.politico.com/rss/lobbyists.xml' ) ,(u'Pitboss' , u'http://www.politico.com/rss/pitboss.xml' ) ,(u'Politics' , u'http://www.politico.com/rss/politics.xml' ) ,(u'Roger Simon' , u'http://www.politico.com/rss/rogersimon.xml' ) ,(u'Suite Talk' , u'http://www.politico.com/rss/suitetalk.xml' ) ,(u'Playbook' , u'http://www.politico.com/rss/playbook.xml' ) #(u'The Huddle' , u'http://www.politico.com/rss/huddle.xml' ) ] def preprocess_html(self, soup): mtag = '<meta http-equiv="Content-Language" content="en"/>' soup.head.insert(0,mtag) for item in soup.findAll(style=True): del item['style'] return soup url_pat = re.compile(r'<a href="([^"]+print.*\.cfm[^"]+)"') def postprocess_html(self, soup, first): for tag in soup.findAll(name=['table', 'tr', 'td']): tag.name = 'div' return soup def print_version(self, url): raw = self.index_to_soup(url, raw=True) try: url = self.url_pat.search(raw).group(1) except: traceback.print_exc() url = None return url