Chip 2011 November

home *** CD-ROM | disk | FTP | other *** search

/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / technology_review.recipe < prev next >

Wrap

Text File | 2011-09-09 | 2.4 KB | 67 lines

import string from calibre.web.feeds.news import BasicNewsRecipe class TechnologyReview(BasicNewsRecipe): title = u'Technology Review' __author__ = 'rty' description = 'MIT Technology Magazine' publisher = 'Technology Review Inc.' category = 'Technology, Innovation, R&D' language = 'en' oldest_article = 14 max_articles_per_feed = 100 No_stylesheets = True extra_css = """ .ArticleBody {font: normal; text-align: justify} .headline {font: bold x-large} .subheadline {font: italic large} """ feeds = [ (u'Computing', u'http://feeds.technologyreview.com/technology_review_Computing'), (u'Web', u'http://feeds.technologyreview.com/technology_review_Web'), (u'Communications', u'http://feeds.technologyreview.com/technology_review_Communications'), (u'Energy', u'http://feeds.technologyreview.com/technology_review_Energy'), (u'Materials', u'http://feeds.technologyreview.com/technology_review_Materials'), (u'Biomedicine', u'http://feeds.technologyreview.com/technology_review_Biotech'), (u'Business', u'http://feeds.technologyreview.com/technology_review_Biztech') ] remove_attributes = ['width', 'align','cellspacing'] remove_tags = [ dict(name='div', attrs={'id':['CloseLink','footerAdDiv','copyright']}), ] remove_tags_after = [dict(name='div', attrs={'id':'copyright'})] def get_article_url(self, article): return article.get('guid', article.get('id', None)) def print_version(self, url): baseurl='http://www.technologyreview.com/printer_friendly_article.aspx?id=' split1 = string.split(url,"/") xxx=split1 [4] split2= string.split(xxx,"/") s = baseurl + split2[0] return s def postprocess_html(self,soup, True): #remove picture headerhtml = soup.find(True, {'class':'header'}) headerhtml.replaceWith("") #remove close button closehtml = soup.find(True, {'class':'close'}) closehtml.replaceWith("") #remove banner advertisement bannerhtml = soup.find(True, {'class':'bannerad'}) bannerhtml.replaceWith("") #thanks kiklop74! This code removes all links from the text for alink in soup.findAll('a'): if alink.string is not None: tstr = alink.string alink.replaceWith(tstr) return soup