home *** CD-ROM | disk | FTP | other *** search
- #!/usr/bin/env python
-
- __license__ = 'GPL v3'
- __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
- '''
- cronista.com
- '''
-
- from calibre.web.feeds.news import BasicNewsRecipe
-
- class ElCronista(BasicNewsRecipe):
- title = 'El Cronista'
- __author__ = 'Darko Miletic'
- description = 'Noticias de Argentina'
- oldest_article = 2
- language = 'es'
-
- max_articles_per_feed = 100
- no_stylesheets = True
- use_embedded_content = False
- encoding = 'cp1252'
-
- html2lrf_options = [
- '--comment' , description
- , '--category' , 'news, Argentina'
- , '--publisher' , title
- ]
-
- keep_only_tags = [
- dict(name='table', attrs={'width':'100%' })
- ,dict(name='h1' , attrs={'class':'Arialgris16normal'})
- ]
-
- remove_tags = [dict(name='a', attrs={'class':'Arialazul12'})]
-
- feeds = [
- (u'Economia' , u'http://www.cronista.com/adjuntos/8/rss/Economia_EI.xml' )
- ,(u'Negocios' , u'http://www.cronista.com/adjuntos/8/rss/negocios_EI.xml' )
- ,(u'Ultimo momento' , u'http://www.cronista.com/adjuntos/8/rss/ultimo_momento.xml' )
- ,(u'Finanzas y Mercados' , u'http://www.cronista.com/adjuntos/8/rss/Finanzas_Mercados_EI.xml' )
- ,(u'Financial Times' , u'http://www.cronista.com/adjuntos/8/rss/FT_EI.xml' )
- ,(u'Opinion edicion impresa' , u'http://www.cronista.com/adjuntos/8/rss/opinion_edicion_impresa.xml' )
- ,(u'Socialmente Responsables', u'http://www.cronista.com/adjuntos/8/rss/Socialmente_Responsables.xml')
- ,(u'Asuntos Legales' , u'http://www.cronista.com/adjuntos/8/rss/asuntoslegales.xml' )
- ,(u'IT Business' , u'http://www.cronista.com/adjuntos/8/rss/itbusiness.xml' )
- ,(u'Management y RR.HH.' , u'http://www.cronista.com/adjuntos/8/rss/management.xml' )
- ,(u'Inversiones Personales' , u'http://www.cronista.com/adjuntos/8/rss/inversionespersonales.xml' )
- ]
-
- def print_version(self, url):
- main, sep, rest = url.partition('.com/notas/')
- article_id, lsep, rrest = rest.partition('-')
- return 'http://www.cronista.com/interior/index.php?p=imprimir_nota&idNota=' + article_id
-
- def preprocess_html(self, soup):
- mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
- soup.head.insert(0,mtag)
- soup.head.base.extract()
- htext = soup.find('h1',attrs={'class':'Arialgris16normal'})
- htext.name = 'p'
- soup.prettify()
- return soup
-
- def get_cover_url(self):
- cover_url = None
- index = 'http://www.cronista.com/contenidos/'
- soup = self.index_to_soup(index + 'ee.html')
- link_item = soup.find('a',attrs={'href':"javascript:Close()"})
- if link_item:
- cover_url = index + link_item.img['src']
- return cover_url
-
-