home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_3870 < prev    next >
Encoding:
Text File  |  2010-01-10  |  3.3 KB  |  80 lines

  1. #!/usr/bin/env  python
  2. __license__     = 'GPL v3'
  3. __author__      = 'Lorenzo Vigentini'
  4. __copyright__   = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
  5. __version__     = 'v1.02'
  6. __date__        = '10, January 2010'
  7. __description__ = 'Sport news from the most read sport newspaper in Italy'
  8.  
  9. '''www.gazzetta.it'''
  10.  
  11. from calibre.web.feeds.news import BasicNewsRecipe
  12.  
  13. class laGazzetta(BasicNewsRecipe):
  14.     __author__        = 'Lorenzo Vigentini'
  15.     description   = 'Sport news from the most read sport newspaper in Italy'
  16.  
  17.     cover_url      = 'http://www.gazzetta.it/primapagina/images/prima_pagina_grande.png'
  18.     title          = 'La Gazzetta dello Sport '
  19.     publisher      = 'RCS Digital'
  20.     category       = 'Sport News'
  21.  
  22.     language       = 'it'
  23.     encoding       = 'cp1252'
  24.     timefmt        = '[%a, %d %b, %Y]'
  25.  
  26.     oldest_article = 2
  27.     max_articles_per_feed = 20
  28.     use_embedded_content  = False
  29.     recursion             = 10
  30.  
  31.     remove_javascript = True
  32.     no_stylesheets = True
  33.  
  34.     keep_only_tags = [ dict(name='div', attrs={'id':'articolo'})]
  35.  
  36.     remove_tags = [
  37.                 dict(name='ul',attrs={'id':['service-toolbar','sections-menu']}),
  38.                 dict(name='div',attrs={'id':['header','rightcol','sponsored','vxFlashPlayer','footer','print-box']}),
  39.                 dict(name='iframe',attrs={'id':'mirago-feed'}),
  40.                 dict(name='a',attrs={'id':'commenta-up'}),
  41.                 dict(name='cite',attrs={'class':['signature','parag-title']}),
  42.                 dict(name='a',attrs={'class':['last-comment','button-bold2']}),
  43.                 dict(name=['base','object','link','a','script','noscript'])
  44.             ]
  45.  
  46.     extra_css      = '''
  47.                         h1 {font: sans-serif large;}
  48.                         h2 {font: sans-serif medium;}
  49.                         h3 {font: sans-serif small;}
  50.                         h4 {font: sans-serif bold small;}
  51.                         p  {font:10pt helvetica}
  52.                         dd {font:8pt helvetica}
  53.                       '''
  54.  
  55.     feeds       = [
  56.                    (u'Calcio',u'http://www.gazzetta.it/rss/Calcio.xml'),
  57.                    (u'Formula 1',u'http://www.gazzetta.it/rss/Formula1.xml'),
  58.                    (u'Motomodiale',u'http://www.gazzetta.it/rss/Motomondiale.xml'),
  59.                    (u'Motori',u'http://www.gazzetta.it/rss/Motori.xml'),
  60.                    (u'Ciclismo',u'http://www.gazzetta.it/rss/Ciclismo.xml'),
  61.                    (u'Basket',u'http://www.gazzetta.it/rss/Basket.xml'),
  62.                    (u'Tennis',u'http://www.gazzetta.it/rss/Tennis.xml'),
  63.                    (u'Pallavolo',u'http://www.gazzetta.it/rss/Pallavolo.xml'),
  64.                    (u'Vela',u'http://www.gazzetta.it/rss/Vela.xml'),
  65.                    (u'Atletica',u'http://www.gazzetta.it/rss/Atletica.xml'),
  66.                    (u'Altri Sport',u'http://www.gazzetta.it/rss/Sport_Vari.xml')
  67.                  ]
  68.  
  69.     def print_version(self,url):
  70.         segments = url.split('/')
  71.         basename = '/'.join(segments[:3])+'/'
  72.         subPath= '/'.join(segments[3:7])+'/'
  73.         articleURL=(segments[len(segments)-1])[:-6]
  74.         myArticleSegs=articleURL.split('.')
  75.         myArticle=myArticleSegs[0]
  76.         printVerString=myArticle+ '_print.html'
  77.         myURL = basename + subPath + printVerString
  78.         print 'this is the url: ' + myURL
  79.         return basename + subPath + printVerString
  80.