home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2011 January / maximum-cd-2011-01.iso / DiscContents / calibre-0.7.26.msi / file_4146 < prev    next >
Encoding:
Text File  |  2010-09-30  |  3.0 KB  |  65 lines

  1. __license__   = 'GPL v3'
  2. __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
  3. '''
  4. www.la-razon.com
  5. '''
  6.  
  7. from calibre import strftime
  8. from calibre.web.feeds.news import BasicNewsRecipe
  9.  
  10. class LaRazon_Bol(BasicNewsRecipe):
  11.     title                 = 'La Raz├│n - Bolivia'
  12.     __author__            = 'Darko Miletic'
  13.     description           = 'El diario nacional de Bolivia'
  14.     publisher             = 'Praxsis S.R.L.'
  15.     category              = 'news, politics, Bolivia'
  16.     oldest_article        = 1
  17.     max_articles_per_feed = 200
  18.     no_stylesheets        = True
  19.     encoding              = 'cp1252'
  20.     use_embedded_content  = False
  21.     language              = 'es'
  22.     publication_type      = 'newspaper'
  23.     delay                 = 1
  24.     remove_empty_feeds    = True
  25.     cover_url             = strftime('http://www.la-razon.com/portadas/%Y%m%d_LaRazon.jpg')
  26.     masthead_url          = 'http://www.la-razon.com/imagenes/logo.jpg'
  27.     extra_css             = """ body{font-family: Arial,Helvetica,sans-serif }
  28.                                 img{margin-bottom: 0.4em}
  29.                                 .noticia-titulo{font-family: Georgia,"Times New Roman",Times,serif}
  30.                                 .lead{font-weight: bold; font-size: 0.8em}
  31.                                 """
  32.  
  33.     conversion_options = {
  34.                           'comment'   : description
  35.                         , 'tags'      : category
  36.                         , 'publisher' : publisher
  37.                         , 'language'  : language
  38.                         }
  39.  
  40.     keep_only_tags    = [dict(name='div', attrs={'class':['noticia-titulo','noticia-desarrollo']})]
  41.     remove_tags       = [dict(name=['meta','link','form','iframe','embed','object'])]
  42.     remove_attributes = ['width','height']
  43.  
  44.     feeds = [
  45.               (u'Editorial'     , u'http://www.la-razon.com/rss_editorial.php' )
  46.              ,(u'Opini├│n'       , u'http://www.la-razon.com/rss_opinion.php'   )
  47.              ,(u'Nacional'      , u'http://www.la-razon.com/rss_nacional.php'  )
  48.              ,(u'Economia'      , u'http://www.la-razon.com/rss_economia.php'  )
  49.              ,(u'Ciudades'      , u'http://www.la-razon.com/rss_ciudades.php'  )
  50.              ,(u'Sociedad'      , u'http://www.la-razon.com/rss_sociedad.php'  )
  51.              ,(u'Mundo'         , u'http://www.la-razon.com/rss_sociedad.php'  )
  52.              ,(u'La Revista'    , u'http://www.la-razon.com/rss_larevista.php' )
  53.              ,(u'Sociales'      , u'http://www.la-razon.com/rss_sociales.php'  )
  54.              ,(u'Mia'           , u'http://www.la-razon.com/rss_mia.php'       )
  55.              ,(u'Marcas'        , u'http://www.la-razon.com/rss_marcas.php'    )
  56.              ,(u'Escape'        , u'http://www.la-razon.com/rss_escape.php'    )
  57.              ,(u'El Financiero' , u'http://www.la-razon.com/rss_financiero.php')
  58.              ,(u'Tendencias'    , u'http://www.la-razon.com/rss_tendencias.php')
  59.             ]
  60.  
  61.     def preprocess_html(self, soup):
  62.         for item in soup.findAll(style=True):
  63.             del item['style']
  64.         return soup
  65.