home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_4076 < prev    next >
Encoding:
Text File  |  2009-12-28  |  3.2 KB  |  79 lines

  1. from calibre.web.feeds.news import BasicNewsRecipe
  2.  
  3. class TheNewsRecipe(BasicNewsRecipe):
  4.     __license__  = 'GPL v3'
  5.     __author__ = 'kwetal'
  6.     language = 'en_PK'
  7.     version = 1
  8.  
  9.     title = u'The News'
  10.     publisher = u'Jang Group'
  11.     category = u'News, Pakistan'
  12.     description = u'English Newspaper from Pakistan'
  13.  
  14.     use_embedded_content = False
  15.     remove_empty_feeds = True
  16.     oldest_article = 2
  17.     max_articles_per_feed = 100
  18.  
  19.     no_stylesheets = True
  20.     remove_javascript = True
  21.     encoding = 'iso-8859-1'
  22.  
  23.     remove_tags = []
  24.     remove_tags.append(dict(name = 'img', attrs = {'src': 'images/thenews.gif'}))
  25.     remove_tags.append(dict(name = 'img', attrs = {'src': 'images/shim.gif'}))
  26.  
  27.     # Feeds from http://thenews.com.pk/rss.asp
  28.     feeds = []
  29.     feeds.append((u'Latest Stories', u'http://www.thenews.com.pk/rss/thenews_updates.xml'))
  30.     feeds.append((u'Top Stories', u'http://www.thenews.com.pk/rss/thenews_topstories.xml'))
  31.     feeds.append((u'World News', u'http://www.thenews.com.pk/rss/thenews_world.xml'))
  32.     feeds.append((u'National News', u'http://www.thenews.com.pk/rss/thenews_national.xml'))
  33.     feeds.append((u'Business News', u'http://www.thenews.com.pk/rss/thenews_business.xml'))
  34.     feeds.append((u'Karachi News', u'http://www.thenews.com.pk/rss/thenews_karachi.xml'))
  35.     feeds.append((u'Lahore News', u'http://www.thenews.com.pk/rss/thenews_lahore.xml'))
  36.     feeds.append((u'Islamabad News', u'http://www.thenews.com.pk/rss/thenews_islamabad.xml'))
  37.     feeds.append((u'Peshawar News', u'http://www.thenews.com.pk/rss/thenews_peshawar.xml'))
  38.     feeds.append((u'Editorial', u'http://www.thenews.com.pk/rss/thenews_editorial.xml'))
  39.     feeds.append((u'Opinion', u'http://www.thenews.com.pk/rss/thenews_opinion.xml'))
  40.     feeds.append((u'Sports News', u'http://www.thenews.com.pk/rss/thenews_sports.xml'))
  41.     feeds.append((u'Newspost', u'http://www.thenews.com.pk/rss/thenews_newspost.xml'))
  42.  
  43.     conversion_options = {'comments': description, 'tags': category, 'language': 'en',
  44.                           'publisher': publisher, 'linearize_tables': True}
  45.  
  46.     extra_css = '''
  47.                 body{font-family:verdana,arial,helvetica,geneva,sans-serif;}
  48.                 .heading_txt {font-size: x-large; font-weight: bold; text-align: left;}
  49.                 .small_txt {text-align: left;}
  50.                 .dateline {font-size: x-small; color: #696969; margin-top: 1em; margin-bottom: 1em}
  51.                 '''
  52.  
  53.  
  54.     def print_version(self, url):
  55.         ignore, sep, main = url.rpartition('/')
  56.  
  57.         if main.startswith('updates.asp'):
  58.             return url.replace('updates.asp', 'print.asp')
  59.         elif main.startswith('top_story_detail.asp'):
  60.             return url.replace('top_story_detail.asp', 'print3.asp')
  61.         elif main.startswith('daily_detail.asp'):
  62.             return url.replace('daily_detail.asp', 'print1.asp')
  63.         else:
  64.             return None
  65.  
  66.     def preprocess_html(self, soup):
  67.         for tr in soup.findAll('tr', attrs = {'bgcolor': True}):
  68.             del tr['bgcolor']
  69.  
  70.         td = soup.find('td', attrs = {'class': 'small_txt', 'height': '20'})
  71.         if td:
  72.             del td['height']
  73.             td['class'] = 'dateline'
  74.  
  75.         return soup
  76.  
  77.  
  78.  
  79.