home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_3727 < prev    next >
Encoding:
Text File  |  2010-07-12  |  4.6 KB  |  87 lines

  1. # -*- coding: utf-8 -*-
  2. __license__   = 'GPL v3'
  3. __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
  4. '''
  5. www.elpais.com/diario/
  6. '''
  7.  
  8. from calibre import strftime
  9. from calibre.web.feeds.news import BasicNewsRecipe
  10.  
  11. class ElPaisImpresa(BasicNewsRecipe):
  12.     title                 = 'El Pa├¡s - edicion impresa'
  13.     __author__            = 'Darko Miletic'
  14.     description           = 'el periodico global en Espa├▒ol'
  15.     publisher             = 'EDICIONES EL PAIS, S.L.'
  16.     category              = 'news, politics,Spain,actualidad,noticias,informacion,videos,fotografias,audios,graficos,nacional,internacional,deportes,economia,tecnologia,cultura,gente,television,sociedad,opinion,blogs,foros,chats,encuestas,entrevistas,participacion'
  17.     no_stylesheets        = True
  18.     encoding              = 'latin1'
  19.     use_embedded_content  = False
  20.     language              = 'es'
  21.     publication_type      = 'newspaper'
  22.     masthead_url          = 'http://www.elpais.com/im/tit_logo_global.gif'
  23.     index                 = 'http://www.elpais.com/diario/'
  24.     extra_css             = ' p{text-align: justify} body{ text-align: left; font-family: Georgia,"Times New Roman",Times,serif } h2{font-family: Arial,Helvetica,sans-serif} img{margin-bottom: 0.4em} '
  25.  
  26.     conversion_options = {
  27.                           'comment'      : description
  28.                         , 'tags'         : category
  29.                         , 'publisher'    : publisher
  30.                         , 'language'     : language
  31.                         }
  32.  
  33.     feeds          = [
  34.                         (u'Internacional'     , index + u'internacional/'     )
  35.                        ,(u'Espa├▒a'            , index + u'espana/'            )
  36.                        ,(u'Economia'          , index + u'economia/'          )
  37.                        ,(u'Opinion'           , index + u'opinion/'           )
  38.                        ,(u'Vi├▒etas'           , index + u'vineta/'            )
  39.                        ,(u'Sociedad'          , index + u'sociedad/'          )
  40.                        ,(u'Cultura'           , index + u'cultura/'           )
  41.                        ,(u'Tendencias'        , index + u'tendencias/'        )
  42.                        ,(u'Gente'             , index + u'gente/'             )
  43.                        ,(u'Obituarios'        , index + u'obituarios/'        )
  44.                        ,(u'Deportes'          , index + u'deportes/'          )
  45.                        ,(u'Pantallas'         , index + u'radioytv/'          )
  46.                        ,(u'Ultima'            , index + u'ultima/'            )
  47.                        ,(u'Educacion'         , index + u'educacion/'         )
  48.                        ,(u'Saludo'            , index + u'salud/'             )
  49.                        ,(u'Ciberpais'         , index + u'ciberpais/'         )
  50.                        ,(u'EP3'               , index + u'ep3/'               )
  51.                        ,(u'Cine'              , index + u'cine/'              )
  52.                        ,(u'Babelia'           , index + u'babelia/'           )
  53.                        ,(u'El viajero'        , index + u'viajero/'           )
  54.                        ,(u'Negocios'          , index + u'negocios/'          )
  55.                        ,(u'Domingo'           , index + u'domingo/'           )
  56.                        ,(u'El Pais semanal'   , index + u'eps/'               )
  57.                        ,(u'Quadern Catalunya' , index + u'quadern-catalunya/' )
  58.                      ]
  59.  
  60.     keep_only_tags=[dict(attrs={'class':['cabecera_noticia','contenido_noticia']})]
  61.     remove_attributes=['width','height']
  62.     remove_tags=[dict(name='link')]
  63.  
  64.     def parse_index(self):
  65.         totalfeeds = []
  66.         lfeeds = self.get_feeds()
  67.         for feedobj in lfeeds:
  68.             feedtitle, feedurl = feedobj
  69.             self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
  70.             articles = []
  71.             soup = self.index_to_soup(feedurl)
  72.             for item in soup.findAll('a',attrs={'class':['g19r003','g19i003','g17r003','g17i003']}):
  73.                 url   = 'http://www.elpais.com' + item['href'].rpartition('/')[0]
  74.                 title = self.tag_to_string(item)
  75.                 date  = strftime(self.timefmt)
  76.                 articles.append({
  77.                                       'title'      :title
  78.                                      ,'date'       :date
  79.                                      ,'url'        :url
  80.                                      ,'description':''
  81.                                     })
  82.             totalfeeds.append((feedtitle, articles))
  83.         return totalfeeds
  84.  
  85.     def print_version(self, url):
  86.         return url + '?print=1'
  87.