home *** CD-ROM | disk | FTP | other *** search
- import string
- from calibre.web.feeds.news import BasicNewsRecipe
-
- class TechnologyReview(BasicNewsRecipe):
- title = u'Technology Review'
- __author__ = 'rty'
- description = 'MIT Technology Magazine'
- publisher = 'Technology Review Inc.'
- category = 'Technology, Innovation, R&D'
- language = 'en'
- oldest_article = 14
- max_articles_per_feed = 100
- No_stylesheets = True
- extra_css = """
- .ArticleBody {font: normal; text-align: justify}
- .headline {font: bold x-large}
- .subheadline {font: italic large}
- """
- feeds = [
- (u'Computing', u'http://feeds.technologyreview.com/technology_review_Computing'),
- (u'Web', u'http://feeds.technologyreview.com/technology_review_Web'),
- (u'Communications', u'http://feeds.technologyreview.com/technology_review_Communications'),
- (u'Energy', u'http://feeds.technologyreview.com/technology_review_Energy'),
- (u'Materials', u'http://feeds.technologyreview.com/technology_review_Materials'),
- (u'Biomedicine', u'http://feeds.technologyreview.com/technology_review_Biotech'),
- (u'Business', u'http://feeds.technologyreview.com/technology_review_Biztech')
- ]
- remove_attributes = ['width', 'align','cellspacing']
-
- remove_tags = [
- dict(name='div', attrs={'id':['CloseLink','footerAdDiv','copyright']}),
- ]
- remove_tags_after = [dict(name='div', attrs={'id':'copyright'})]
-
- def get_article_url(self, article):
- return article.get('guid', article.get('id', None))
-
- def print_version(self, url):
- baseurl='http://www.technologyreview.com/printer_friendly_article.aspx?id='
- split1 = string.split(url,"/")
- xxx=split1 [4]
- split2= string.split(xxx,"/")
- s = baseurl + split2[0]
- return s
-
-
- def postprocess_html(self,soup, True):
- #remove picture
- headerhtml = soup.find(True, {'class':'header'})
- headerhtml.replaceWith("")
-
- #remove close button
- closehtml = soup.find(True, {'class':'close'})
- closehtml.replaceWith("")
-
- #remove banner advertisement
- bannerhtml = soup.find(True, {'class':'bannerad'})
- bannerhtml.replaceWith("")
-
- #thanks kiklop74! This code removes all links from the text
- for alink in soup.findAll('a'):
- if alink.string is not None:
- tstr = alink.string
- alink.replaceWith(tstr)
-
- return soup
-