home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_3735 < prev    next >
Encoding:
Text File  |  2009-10-14  |  2.4 KB  |  62 lines

  1. #!/usr/bin/env  python
  2.  
  3. __license__   = 'GPL v3'
  4. __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
  5. '''
  6. emol.com
  7. '''
  8.  
  9. from calibre.web.feeds.news import BasicNewsRecipe
  10.  
  11. class ElMercurio(BasicNewsRecipe):
  12.     title                 = 'El Mercurio online'
  13.     __author__            = 'Darko Miletic'
  14.     description           = 'El sitio de noticias online de Chile'
  15.     publisher             = 'El Mercurio'
  16.     category              = 'news, politics, Chile'
  17.     oldest_article        = 2
  18.     max_articles_per_feed = 100
  19.     no_stylesheets        = True
  20.     use_embedded_content  = False
  21.     encoding              = 'cp1252'
  22.     cover_url             = 'http://www.emol.com/especiales/logo_emol/logo_emol.gif'
  23.     remove_javascript     = True
  24.     use_embedded_content  = False
  25.  
  26.     html2lrf_options = [
  27.                           '--comment', description
  28.                         , '--category', category
  29.                         , '--publisher', publisher
  30.                         ]
  31.  
  32.     html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
  33.  
  34.     keep_only_tags = [
  35.                         dict(name='div', attrs={'class':'despliegue-txt_750px'})
  36.                        ,dict(name='div', attrs={'id':'div_cuerpo_participa'})
  37.                      ]
  38.  
  39.     remove_tags = [
  40.                      dict(name='div', attrs={'class':'contenedor_despliegue-col-left300'})
  41.                     ,dict(name='div', attrs={'id':['div_centro_dn_opc','div_cabezera','div_secciones','div_contenidos','div_pie','nav']})
  42.                     ]
  43.  
  44.     feeds = [
  45.                (u'Noticias de ultima hora', u'http://www.emol.com/rss20/rss.asp?canal=0')
  46.               ,(u'Nacional', u'http://www.emol.com/rss20/rss.asp?canal=1')
  47.               ,(u'Mundo', u'http://www.emol.com/rss20/rss.asp?canal=2')
  48.               ,(u'Deportes', u'http://www.emol.com/rss20/rss.asp?canal=4')
  49.               ,(u'Magazine', u'http://www.emol.com/rss20/rss.asp?canal=6')
  50.               ,(u'Tecnologia', u'http://www.emol.com/rss20/rss.asp?canal=5')
  51.               ,(u'La Musica', u'http://www.emol.com/rss20/rss.asp?canal=7')
  52.             ]
  53.  
  54.     def preprocess_html(self, soup):
  55.         mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
  56.         soup.head.insert(0,mtag)
  57.         for item in soup.findAll(style=True):
  58.             del item['style']
  59.         return soup
  60.  
  61.     language = 'es'
  62.