home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_3749 < prev    next >
Encoding:
Text File  |  2009-10-14  |  2.1 KB  |  60 lines

  1. #!/usr/bin/env  python
  2. # -*- coding: utf-8 -*-
  3.  
  4. __license__   = 'GPL v3'
  5. __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
  6. '''
  7. www.expansion.com
  8. '''
  9.  
  10. from calibre.web.feeds.news import BasicNewsRecipe
  11. from calibre.ebooks.BeautifulSoup import Tag
  12.  
  13. class Expansion(BasicNewsRecipe):
  14.     title                 = 'Diario Expansion'
  15.     __author__            = 'Darko Miletic'
  16.     description           = 'Lider de informacion de mercados, economica y politica'
  17.     publisher             = 'expansion.com'
  18.     category              = 'news, politics, Spain'
  19.     oldest_article        = 2
  20.     max_articles_per_feed = 100
  21.     no_stylesheets        = True
  22.     use_embedded_content  = False
  23.     delay                 = 1
  24.     encoding              = 'iso-8859-15'
  25.     language = 'es'
  26.  
  27.     direction             = 'ltr'
  28.  
  29.     html2lrf_options = [
  30.                           '--comment'  , description
  31.                         , '--category' , category
  32.                         , '--publisher', publisher
  33.                         ]
  34.  
  35.     html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
  36.  
  37.     feeds              = [
  38.                             (u'Ultimas noticias', u'http://rss.expansion.com/rss/descarga.htm?data2=178')
  39.                            ,(u'Temas del dia'   , u'http://rss.expansion.com/rss/descarga.htm?data2=178')
  40.                          ]
  41.  
  42.  
  43.     keep_only_tags = [dict(name='div', attrs={'id':'principal'})]
  44.  
  45.     remove_tags        = [
  46.                              dict(name=['object','link','script'])
  47.                             ,dict(name='div', attrs={'class':['utilidades','tit_relacionadas']})
  48.                          ]
  49.  
  50.     remove_tags_after = [dict(name='div', attrs={'class':'tit_relacionadas'})]
  51.  
  52.     def preprocess_html(self, soup):
  53.         soup.html['dir' ] = self.direction
  54.         mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
  55.         soup.head.insert(0,mcharset)
  56.         for item in soup.findAll(style=True):
  57.             del item['style']
  58.         return soup
  59.  
  60.