home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_3888 < prev    next >
Encoding:
Text File  |  2009-10-14  |  1.4 KB  |  42 lines

  1. #!/usr/bin/env  python
  2.  
  3. __license__   = 'GPL v3'
  4. __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
  5. '''
  6. www.livemint.com
  7. '''
  8.  
  9. from calibre.web.feeds.news import BasicNewsRecipe
  10.  
  11. class LiveMint(BasicNewsRecipe):
  12.     title                 = u'Livemint'
  13.     __author__            = 'Darko Miletic'
  14.     description           = 'The Wall Street Journal'
  15.     publisher             = 'The Wall Street Journal'
  16.     category              = 'news, games, adventure, technology'
  17.     language = 'en'
  18.  
  19.     oldest_article        = 15
  20.     max_articles_per_feed = 100
  21.     no_stylesheets        = True
  22.     encoding              = 'utf-8'
  23.     use_embedded_content  = False
  24.     extra_css             = ' #dvArtheadline{font-size: x-large} #dvArtAbstract{font-size: large} '
  25.  
  26.     keep_only_tags = [dict(name='div', attrs={'class':'innercontent'})]
  27.  
  28.     remove_tags = [dict(name=['object','link','embed','form','iframe'])]
  29.  
  30.     feeds = [(u'Articles', u'http://www.livemint.com/SectionRssfeed.aspx?Mid=1')]
  31.  
  32.     def print_version(self, url):
  33.         link = url
  34.         msoup = self.index_to_soup(link)
  35.         mlink = msoup.find(attrs={'id':'ctl00_bodyplaceholdercontent_cntlArtTool_printUrl'})
  36.         if mlink:
  37.            link = 'http://www.livemint.com/Articles/' + mlink['href'].rpartition('/Articles/')[2]
  38.         return link
  39.  
  40.     def preprocess_html(self, soup):
  41.         return self.adeify_images(soup)
  42.