home *** CD-ROM | disk | FTP | other *** search
Wrap
__license__ = 'GPL v3' __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>' ''' balkaninsight.com ''' import re from calibre.web.feeds.news import BasicNewsRecipe class BalkanInsight(BasicNewsRecipe): title = 'Balkan Insight' __author__ = 'Darko Miletic' description = 'Get exclusive news and in depth information on business, politics, events and lifestyle in the Balkans. Free and exclusive premium content.' publisher = 'BalkanInsight.com' category = 'news, politics, Balcans' oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = False use_embedded_content = False encoding = 'utf-8' masthead_url = 'http://www.balkaninsight.com/templates/balkaninsight/images/aindex_02.jpg' language = 'en' publication_type = 'newsportal' remove_empty_feeds = True extra_css = """ @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body{font-family: Arial,Verdana,Helvetica,sans1,sans-serif} img{margin-bottom: 0.8em} h1,h2,h3,h4{font-family: Times,Georgia,serif1,serif; color: #24569E} .article-deck {color:#777777; font-size: small;} .main_news_img{font-size: small} """ conversion_options = { 'comment' : description , 'tags' : category , 'publisher' : publisher , 'language' : language } preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] keep_only_tags = [dict(name='div', attrs={'id':'article'})] remove_tags = [ dict(name=['object','link','iframe']) ] feeds = [ (u'Albania' , u'http://www.balkaninsight.com/?tpl=653&tpid=144' ) ,(u'Bosnia' , u'http://www.balkaninsight.com/?tpl=653&tpid=145' ) ,(u'Bulgaria' , u'http://www.balkaninsight.com/?tpl=653&tpid=146' ) ,(u'Croatia' , u'http://www.balkaninsight.com/?tpl=653&tpid=147' ) ,(u'Kosovo' , u'http://www.balkaninsight.com/?tpl=653&tpid=148' ) ,(u'Macedonia' , u'http://www.balkaninsight.com/?tpl=653&tpid=149' ) ,(u'Montenegro' , u'http://www.balkaninsight.com/?tpl=653&tpid=150' ) ,(u'Romania' , u'http://www.balkaninsight.com/?tpl=653&tpid=151' ) ,(u'Serbia' , u'http://www.balkaninsight.com/?tpl=653&tpid=152' ) ] def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] return self.adeify_images(soup)