home *** CD-ROM | disk | FTP | other *** search
- from calibre.web.feeds.news import BasicNewsRecipe
-
- class TheNewsRecipe(BasicNewsRecipe):
- __license__ = 'GPL v3'
- __author__ = 'kwetal'
- language = 'en_PK'
- version = 1
-
- title = u'The News'
- publisher = u'Jang Group'
- category = u'News, Pakistan'
- description = u'English Newspaper from Pakistan'
-
- use_embedded_content = False
- remove_empty_feeds = True
- oldest_article = 2
- max_articles_per_feed = 100
-
- no_stylesheets = True
- remove_javascript = True
- encoding = 'iso-8859-1'
-
- remove_tags = []
- remove_tags.append(dict(name = 'img', attrs = {'src': 'images/thenews.gif'}))
- remove_tags.append(dict(name = 'img', attrs = {'src': 'images/shim.gif'}))
-
- # Feeds from http://thenews.com.pk/rss.asp
- feeds = []
- feeds.append((u'Latest Stories', u'http://www.thenews.com.pk/rss/thenews_updates.xml'))
- feeds.append((u'Top Stories', u'http://www.thenews.com.pk/rss/thenews_topstories.xml'))
- feeds.append((u'World News', u'http://www.thenews.com.pk/rss/thenews_world.xml'))
- feeds.append((u'National News', u'http://www.thenews.com.pk/rss/thenews_national.xml'))
- feeds.append((u'Business News', u'http://www.thenews.com.pk/rss/thenews_business.xml'))
- feeds.append((u'Karachi News', u'http://www.thenews.com.pk/rss/thenews_karachi.xml'))
- feeds.append((u'Lahore News', u'http://www.thenews.com.pk/rss/thenews_lahore.xml'))
- feeds.append((u'Islamabad News', u'http://www.thenews.com.pk/rss/thenews_islamabad.xml'))
- feeds.append((u'Peshawar News', u'http://www.thenews.com.pk/rss/thenews_peshawar.xml'))
- feeds.append((u'Editorial', u'http://www.thenews.com.pk/rss/thenews_editorial.xml'))
- feeds.append((u'Opinion', u'http://www.thenews.com.pk/rss/thenews_opinion.xml'))
- feeds.append((u'Sports News', u'http://www.thenews.com.pk/rss/thenews_sports.xml'))
- feeds.append((u'Newspost', u'http://www.thenews.com.pk/rss/thenews_newspost.xml'))
-
- conversion_options = {'comments': description, 'tags': category, 'language': 'en',
- 'publisher': publisher, 'linearize_tables': True}
-
- extra_css = '''
- body{font-family:verdana,arial,helvetica,geneva,sans-serif;}
- .heading_txt {font-size: x-large; font-weight: bold; text-align: left;}
- .small_txt {text-align: left;}
- .dateline {font-size: x-small; color: #696969; margin-top: 1em; margin-bottom: 1em}
- '''
-
-
- def print_version(self, url):
- ignore, sep, main = url.rpartition('/')
-
- if main.startswith('updates.asp'):
- return url.replace('updates.asp', 'print.asp')
- elif main.startswith('top_story_detail.asp'):
- return url.replace('top_story_detail.asp', 'print3.asp')
- elif main.startswith('daily_detail.asp'):
- return url.replace('daily_detail.asp', 'print1.asp')
- else:
- return None
-
- def preprocess_html(self, soup):
- for tr in soup.findAll('tr', attrs = {'bgcolor': True}):
- del tr['bgcolor']
-
- td = soup.find('td', attrs = {'class': 'small_txt', 'height': '20'})
- if td:
- del td['height']
- td['class'] = 'dateline'
-
- return soup
-
-
-
-