home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_3874 < prev    next >
Encoding:
Text File  |  2010-02-28  |  2.7 KB  |  59 lines

  1. __license__   = 'GPL v3'
  2. __copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
  3. '''
  4. latercera.com
  5. '''
  6.  
  7. from calibre.web.feeds.news import BasicNewsRecipe
  8.  
  9. class LaTercera(BasicNewsRecipe):
  10.     title                 = 'La Tercera'
  11.     __author__            = 'Darko Miletic'
  12.     description           = 'El sitio de noticias online de Chile'
  13.     publisher             = 'La Tercera'
  14.     category              = 'news, politics, Chile'
  15.     oldest_article        = 2
  16.     max_articles_per_feed = 100
  17.     no_stylesheets        = True
  18.     encoding              = 'cp1252'
  19.     use_embedded_content  = False
  20.     remove_empty_feeds    = True
  21.     language              = 'es'
  22.  
  23.     conversion_options = {
  24.                           'comment'          : description
  25.                         , 'tags'             : category
  26.                         , 'publisher'        : publisher
  27.                         , 'language'         : language
  28.                         , 'linearize_tables' : True
  29.                         }
  30.  
  31.     keep_only_tags = [dict(name='div', attrs={'class':['span-16 articulo border','span-16 border','span-16']}) ]
  32.  
  33.     remove_tags = [
  34.                      dict(name=['ul','input','base'])
  35.                     ,dict(name='div', attrs={'id':['boxComentarios','shim','enviarAmigo']})
  36.                     ,dict(name='div', attrs={'class':['ad640','span-10 imgSet A','infoRelCol']})
  37.                     ,dict(name='p', attrs={'id':['mensajeError','mensajeEnviandoNoticia','mensajeExito']})
  38.                   ]
  39.  
  40.  
  41.     feeds = [
  42.                (u'Noticias de ultima hora', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&ul=1')
  43.               ,(u'Nacional', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=680')
  44.               ,(u'Politica', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=674')              
  45.               ,(u'Mundo', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=678')
  46.               ,(u'Deportes', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=656')
  47.               ,(u'Negocios', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=655')
  48.               ,(u'Entretenimiento', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=661')
  49.               ,(u'Motores', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=665')
  50.               ,(u'Tendencias', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=659')
  51.               ,(u'Estilo', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=660')
  52.               ,(u'Educacion', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=657')
  53.             ]
  54.  
  55.     def preprocess_html(self, soup):
  56.         for item in soup.findAll(style=True):
  57.             del item['style']
  58.         return soup
  59.