home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_3659 < prev    next >
Encoding:
Text File  |  2010-06-04  |  2.6 KB  |  63 lines

  1.  
  2. __license__   = 'GPL v3'
  3. __copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
  4. '''
  5. clarin.com
  6. '''
  7.  
  8. from calibre.web.feeds.news import BasicNewsRecipe
  9.  
  10. class Clarin(BasicNewsRecipe):
  11.     title                 = 'Clarin'
  12.     __author__            = 'Darko Miletic'
  13.     description           = 'Noticias de Argentina y mundo'
  14.     publisher             = 'Grupo Clarin'
  15.     category              = 'news, politics, Argentina'
  16.     oldest_article        = 2
  17.     max_articles_per_feed = 100
  18.     use_embedded_content  = False
  19.     no_stylesheets        = True
  20.     encoding              = 'utf8'
  21.     language              = 'es_AR'
  22.     publication_type      = 'newspaper'
  23.     INDEX                 = 'http://www.clarin.com'
  24.     masthead_url          = 'http://www.clarin.com/static/CLAClarin/images/logo-clarin-print.jpg'
  25.     extra_css             = ' body{font-family: Arial,Helvetica,sans-serif} h2{font-family: Georgia,serif; font-size: xx-large} .hora{font-weight:bold} .hd p{font-size: small} .nombre-autor{color: #0F325A} '
  26.  
  27.     conversion_options = {
  28.                           'comment'  : description
  29.                         , 'tags'     : category
  30.                         , 'publisher': publisher
  31.                         , 'language' : language
  32.                         }
  33.  
  34.     keep_only_tags = [dict(attrs={'class':['hd','mt']})]
  35.  
  36.     feeds = [
  37.                (u'Pagina principal', u'http://www.clarin.com/rss/'             )
  38.               ,(u'Politica'        , u'http://www.clarin.com/rss/politica/'    )
  39.               ,(u'Deportes'        , u'http://www.clarin.com/rss/deportes/'    )
  40.               ,(u'Economia'        , u'http://www.clarin.com/economia/'        )
  41.               ,(u'Mundo'           , u'http://www.clarin.com/rss/mundo/'       )
  42.               ,(u'Espectaculos'    , u'http://www.clarin.com/rss/espectaculos/')
  43.               ,(u'Sociedad'        , u'http://www.clarin.com/rss/sociedad/'    )
  44.               ,(u'Ciudades'        , u'http://www.clarin.com/rss/ciudades/'    )
  45.               ,(u'Policiales'      , u'http://www.clarin.com/rss/policiales/'  )
  46.               ,(u'Internet'        , u'http://www.clarin.com/rss/internet/'    )
  47.               ,(u'Ciudades'        , u'http://www.clarin.com/rss/ciudades/'    )
  48.             ]
  49.  
  50.     def print_version(self, url):
  51.         return url + '?print=1'
  52.  
  53.     def get_cover_url(self):
  54.         cover_url = None
  55.         soup = self.index_to_soup(self.INDEX)
  56.         cover_item = soup.find('div',attrs={'class':'bb-md bb-md-edicion_papel'})
  57.         if cover_item:
  58.            ap = cover_item.find('a',attrs={'href':'/edicion-impresa/'})
  59.            if ap:
  60.               cover_url = self.INDEX + ap.img['src']
  61.         return cover_url
  62.  
  63.