Chip 2011 November

home *** CD-ROM | disk | FTP | other *** search

/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / drivelry.recipe < prev next >

Wrap

Text File | 2011-09-09 | 1.3 KB | 42 lines

from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import BeautifulSoup class drivelrycom(BasicNewsRecipe): title = u'drivelry.com' language = 'en' description = 'A blog by Mike Abrahams' __author__ = 'Krittika Goyal' oldest_article = 60 #days max_articles_per_feed = 25 #encoding = 'latin1' remove_stylesheets = True #remove_tags_before = dict(name='h1', attrs={'class':'heading'}) remove_tags_after = dict(name='div', attrs={'id':'bookmark'}) remove_tags = [ dict(name='iframe'), dict(name='div', attrs={'class':['sidebar']}), dict(name='div', attrs={'id':['bookmark']}), #dict(name='span', attrs={'class':['related_link', 'slideshowcontrols']}), #dict(name='ul', attrs={'class':'articleTools'}), ] feeds = [ ('drivelry.com', 'http://feeds.feedburner.com/drivelry'), ] def preprocess_html(self, soup): story = soup.find(name='div', attrs={'id':'main'}) #td = heading.findParent(name='td') #td.extract() soup = BeautifulSoup(''' <html><head><title>t</title></head><body> <p>To donate to this blog: <a href="http://www.drivelry.com/thank-you/">click here</a></p> </body></html> ''') body = soup.find(name='body') body.insert(0, story) return soup