home *** CD-ROM | disk | FTP | other *** search
- from __future__ import with_statement
- __license__ = 'GPL 3'
- __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
-
- import re
- from calibre.web.feeds.news import BasicNewsRecipe
-
- class TheHindu(BasicNewsRecipe):
- title = u'The Business Line'
- language = 'en_IN'
-
- oldest_article = 7
- __author__ = 'Dhiru'
- max_articles_per_feed = 100
- no_stylesheets = True
-
- remove_tags_before = {'name':'font', 'class':'storyhead'}
- preprocess_regexps = [
- (re.compile(r'<!-- story ends -->.*', re.DOTALL),
- lambda match: '</body></html>'),
- ]
- extra_css = '''
- .storyhead{font-family:Arial,Helvetica,sans-serif; font-size:large; color:#000099;}
- body{font-family:Verdana,Arial,Helvetica,sans-serif; font-size:x-small; text-align:left;}
- '''
- feeds = [
- (u'Main - Latest News', u'http://www.thehindubusinessline.com/rss/blnus.xml'),
- (u'Main - Front Page', u'http://www.thehindubusinessline.com/rss/14hdline.xml'),
- (u'Main - Corporate', u'http://www.thehindubusinessline.com/rss/02hdline.xml'),
- (u'Main - Market', u'http://www.thehindubusinessline.com/rss/05hdline.xml'),
- (u'Main - Opinion', u'http://www.thehindubusinessline.com/rss/04hdline.xml'),
- (u'Main - Infotech', u'http://www.thehindubusinessline.com/rss/15hdline.xml'),
- (u'Main - Marketing', u'http://www.thehindubusinessline.com/rss/19hdline.xml'),
- (u'Main - Money & banking',
- u'http://www.thehindubusinessline.com/rss/06hdline.xml'),
- (u'Main - Agri & Commodities', u'http://www.thehindubusinessline.com/rss/07hdline.xml'),
- (u'Industry',
- u'http://www.thehindubusinessline.com/rss/03hdline.xml'),
- (u'Logistic',
- u'http://www.thehindubusinessline.com/rss/09hdline.xml'),
- (u'Result', u'http://www.thehindubusinessline.com/rss/26hdline.xml'),
- (u'Government',
- u'http://www.thehindubusinessline.com/rss/27hdline.xml'),
- (u'Investment World',
- u'http://www.thehindubusinessline.com/rss/iw20hdline.xml'),
- (u'Supplement - Life',
- u'http://www.thehindubusinessline.com/rss/lf10hdline.xml')
- ]
-
- def postprocess_html(self, soup, first_fetch):
- for t in soup.findAll(['table', 'tr', 'td','center']):
- t.name = 'div'
- return soup
-