home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2011 January / maximum-cd-2011-01.iso / DiscContents / calibre-0.7.26.msi / file_4380 < prev    next >
Encoding:
Text File  |  2010-10-21  |  2.1 KB  |  55 lines

  1. __license__   = 'GPL v3'
  2. __copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
  3. '''
  4. economictimes.indiatimes.com
  5. '''
  6.  
  7. from calibre.web.feeds.news import BasicNewsRecipe
  8.  
  9. class TheEconomicTimes(BasicNewsRecipe):
  10.     title                  = 'The Economic Times India'
  11.     __author__             = 'Darko Miletic'
  12.     description            = 'Financial news from India'
  13.     publisher              = 'economictimes.indiatimes.com'
  14.     category               = 'news, finances, politics, India'
  15.     oldest_article         = 2
  16.     max_articles_per_feed  = 100
  17.     no_stylesheets         = True
  18.     use_embedded_content   = False
  19.     simultaneous_downloads = 1
  20.     encoding               = 'utf-8'
  21.     language               = 'en_IN'
  22.     publication_type       = 'newspaper'
  23.     masthead_url           = 'http://economictimes.indiatimes.com/photo/2676871.cms'
  24.     extra_css              = """
  25.                                  body{font-family: Arial,Helvetica,sans-serif}
  26.                              """
  27.  
  28.     conversion_options = {
  29.                           'comment'          : description
  30.                         , 'tags'             : category
  31.                         , 'publisher'        : publisher
  32.                         , 'language'         : language
  33.                         }
  34.  
  35.     keep_only_tags = [dict(attrs={'class':'printdiv'})]
  36.     remove_tags    = [dict(name=['object','link','embed','iframe','base','table','meta'])]
  37.     remove_attributes = ['name']
  38.  
  39.     feeds          = [(u'All articles', u'http://economictimes.indiatimes.com/rssfeedsdefault.cms')]
  40.  
  41.     def print_version(self, url):
  42.         rest, sep, art = url.rpartition('/articleshow/')
  43.         return 'http://economictimes.indiatimes.com/articleshow/' + art + '?prtpage=1'
  44.  
  45.     def get_article_url(self, article):
  46.         rurl = article.get('link',  None)
  47.         if (rurl.find('/quickieslist/') > 0) or (rurl.find('/quickiearticleshow/') > 0):
  48.            return None
  49.         return rurl
  50.  
  51.     def preprocess_html(self, soup):
  52.         for item in soup.findAll(style=True):
  53.             del item['style']
  54.         return self.adeify_images(soup)
  55.