home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_3676 < prev    next >
Encoding:
Text File  |  2009-11-11  |  3.2 KB  |  70 lines

  1. #!/usr/bin/env  python
  2.  
  3. __license__   = 'GPL v3'
  4. __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
  5. '''
  6. criticadigital.com
  7. '''
  8.  
  9. from calibre.web.feeds.news import BasicNewsRecipe
  10.  
  11. class CriticaDigital(BasicNewsRecipe):
  12.     title                 = 'Critica de la Argentina'
  13.     __author__            = 'Darko Miletic and Sujata Raman'
  14.     description           = 'Noticias de Argentina'
  15.     oldest_article        = 2
  16.     max_articles_per_feed = 100
  17.     language = 'es'
  18.  
  19.     no_stylesheets        = True
  20.     use_embedded_content  = False
  21.     encoding              = 'cp1252'
  22.  
  23.     extra_css = '''
  24.                 h1{font-family:"Trebuchet MS";}
  25.                 h3{color:#9A0000; font-family:Tahoma; font-size:x-small;}
  26.                 h2{color:#504E53; font-family:Arial,Helvetica,sans-serif ;font-size:small;}
  27.                 #epigrafe{font-family:Arial,Helvetica,sans-serif ;color:#666666 ; font-size:x-small;}
  28.                 p {font-family:Arial,Helvetica,sans-serif;}
  29.                 #fecha{color:#858585; font-family:Tahoma; font-size:x-small;}
  30.                 #autor{color:#858585; font-family:Tahoma; font-size:x-small;}
  31.                 #hora{color:#F00000;font-family:Tahoma; font-size:x-small;}
  32.                 '''
  33.     keep_only_tags = [
  34.                         dict(name='div', attrs={'class':['bloqueTitulosNoticia','cfotonota']})
  35.                        ,dict(name='div', attrs={'id':'boxautor'})
  36.                         ,dict(name='p', attrs={'id':'textoNota'})
  37.                      ]
  38.  
  39.     remove_tags = [
  40.                      dict(name='div', attrs={'class':'box300'       })
  41.                     ,dict(name='div', style=True                     )
  42.                     ,dict(name='div', attrs={'class':'titcomentario'})
  43.                     ,dict(name='div', attrs={'class':'comentario'   })
  44.                     ,dict(name='div', attrs={'class':'paginador'    })
  45.                   ]
  46.  
  47.     feeds = [
  48.                (u'Politica', u'http://www.criticadigital.com/herramientas/rss.php?ch=politica'        )
  49.               ,(u'Economia', u'http://www.criticadigital.com/herramientas/rss.php?ch=economia'        )
  50.               ,(u'Deportes', u'http://www.criticadigital.com/herramientas/rss.php?ch=deportes'        )
  51.               ,(u'Espectaculos', u'http://www.criticadigital.com/herramientas/rss.php?ch=espectaculos')
  52.               ,(u'Mundo', u'http://www.criticadigital.com/herramientas/rss.php?ch=mundo'              )
  53.               ,(u'Policiales', u'http://www.criticadigital.com/herramientas/rss.php?ch=policiales'    )
  54.               ,(u'Sociedad', u'http://www.criticadigital.com/herramientas/rss.php?ch=sociedad'        )
  55.               ,(u'Salud', u'http://www.criticadigital.com/herramientas/rss.php?ch=salud'              )
  56.               ,(u'Tecnologia', u'http://www.criticadigital.com/herramientas/rss.php?ch=tecnologia'    )
  57.               ,(u'Santa Fe', u'http://www.criticadigital.com/herramientas/rss.php?ch=santa_fe'        )
  58.             ]
  59.  
  60.     def get_cover_url(self):
  61.         cover_url = None
  62.         index = 'http://www.criticadigital.com/impresa/'
  63.         soup = self.index_to_soup(index)
  64.         link_item = soup.find('div',attrs={'class':'tapa'})
  65.         if link_item:
  66.            cover_url = index + link_item.img['src']
  67.         return cover_url
  68.  
  69.  
  70.