home *** CD-ROM | disk | FTP | other *** search
- #!/usr/bin/env python
- __license__ = 'GPL v3'
- __author__ = 'Lorenzo Vigentini'
- __copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
- __version__ = 'v1.02'
- __date__ = '10, January 2010'
- __description__ = 'Sport news from the most read sport newspaper in Italy'
-
- '''www.gazzetta.it'''
-
- from calibre.web.feeds.news import BasicNewsRecipe
-
- class laGazzetta(BasicNewsRecipe):
- __author__ = 'Lorenzo Vigentini'
- description = 'Sport news from the most read sport newspaper in Italy'
-
- cover_url = 'http://www.gazzetta.it/primapagina/images/prima_pagina_grande.png'
- title = 'La Gazzetta dello Sport '
- publisher = 'RCS Digital'
- category = 'Sport News'
-
- language = 'it'
- encoding = 'cp1252'
- timefmt = '[%a, %d %b, %Y]'
-
- oldest_article = 2
- max_articles_per_feed = 20
- use_embedded_content = False
- recursion = 10
-
- remove_javascript = True
- no_stylesheets = True
-
- keep_only_tags = [ dict(name='div', attrs={'id':'articolo'})]
-
- remove_tags = [
- dict(name='ul',attrs={'id':['service-toolbar','sections-menu']}),
- dict(name='div',attrs={'id':['header','rightcol','sponsored','vxFlashPlayer','footer','print-box']}),
- dict(name='iframe',attrs={'id':'mirago-feed'}),
- dict(name='a',attrs={'id':'commenta-up'}),
- dict(name='cite',attrs={'class':['signature','parag-title']}),
- dict(name='a',attrs={'class':['last-comment','button-bold2']}),
- dict(name=['base','object','link','a','script','noscript'])
- ]
-
- extra_css = '''
- h1 {font: sans-serif large;}
- h2 {font: sans-serif medium;}
- h3 {font: sans-serif small;}
- h4 {font: sans-serif bold small;}
- p {font:10pt helvetica}
- dd {font:8pt helvetica}
- '''
-
- feeds = [
- (u'Calcio',u'http://www.gazzetta.it/rss/Calcio.xml'),
- (u'Formula 1',u'http://www.gazzetta.it/rss/Formula1.xml'),
- (u'Motomodiale',u'http://www.gazzetta.it/rss/Motomondiale.xml'),
- (u'Motori',u'http://www.gazzetta.it/rss/Motori.xml'),
- (u'Ciclismo',u'http://www.gazzetta.it/rss/Ciclismo.xml'),
- (u'Basket',u'http://www.gazzetta.it/rss/Basket.xml'),
- (u'Tennis',u'http://www.gazzetta.it/rss/Tennis.xml'),
- (u'Pallavolo',u'http://www.gazzetta.it/rss/Pallavolo.xml'),
- (u'Vela',u'http://www.gazzetta.it/rss/Vela.xml'),
- (u'Atletica',u'http://www.gazzetta.it/rss/Atletica.xml'),
- (u'Altri Sport',u'http://www.gazzetta.it/rss/Sport_Vari.xml')
- ]
-
- def print_version(self,url):
- segments = url.split('/')
- basename = '/'.join(segments[:3])+'/'
- subPath= '/'.join(segments[3:7])+'/'
- articleURL=(segments[len(segments)-1])[:-6]
- myArticleSegs=articleURL.split('.')
- myArticle=myArticleSegs[0]
- printVerString=myArticle+ '_print.html'
- myURL = basename + subPath + printVerString
- print 'this is the url: ' + myURL
- return basename + subPath + printVerString
-