Maximum CD 2010 November

home *** CD-ROM | disk | FTP | other *** search

/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_3705 < prev next >

Wrap

Text File | 2010-04-03 | 1.9 KB | 49 lines

''' dnaindia.com ''' import re from calibre.web.feeds.news import BasicNewsRecipe class DNAIndia(BasicNewsRecipe): title = 'DNA India' description = 'Mumbai news, India news, World news, breaking news' __author__ = 'Kovid Goyal' language = 'en_IN' encoding = 'cp1252' feeds = [ ('Top News', 'http://www.dnaindia.com/syndication/rss_topnews.xml'), ('Popular News', 'http://www.dnaindia.com/syndication/rss_popular.xml'), ('Recent Columns', 'http://www.dnaindia.com/syndication/rss_column.xml'), ('Mumbai', 'http://www.dnaindia.com/syndication/rss,catid-1.xml'), ('India', 'http://www.dnaindia.com/syndication/rss,catid-2.xml'), ('World', 'http://www.dnaindia.com/syndication/rss,catid-9.xml'), ('Money', 'http://www.dnaindia.com/syndication/rss,catid-4.xml'), ('Sports', 'http://www.dnaindia.com/syndication/rss,catid-6.xml'), ('After Hours', 'http://www.dnaindia.com/syndication/rss,catid-7.xml'), ('Digital Life', 'http://www.dnaindia.com/syndication/rss,catid-1089741.xml'), ] remove_tags = [{'id':['footer', 'lhs-col']}, {'class':['bottom', 'categoryHead', 'article_tools']}] keep_only_tags = dict(id='middle-col') remove_tags_after=[dict(attrs={'id':'story'})] remove_attributes=['style'] no_stylesheets = True def print_version(self, url): match = re.search(r'newsid=(\d+)', url) if not match: return url return 'http://www.dnaindia.com/dnaprint.asp?newsid='+match.group(1) def postprocess_html(self, soup, first_fetch): for t in soup.findAll(['table', 'tr', 'td']): t.name = 'div' a = soup.find(href='http://www.3dsyndication.com/') if a is not None: a.parent.extract() return soup