home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_3671 < prev    next >
Encoding:
Text File  |  2010-05-27  |  3.6 KB  |  89 lines

  1. #!/usr/bin/env  python
  2. __license__     = 'GPL v3'
  3. __author__      = 'Lorenzo Vigentini, based on Darko Miletic'
  4. __copyright__   = '2009, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>'
  5. __version__     = 'v1.01'
  6. __date__        = '10, January 2010'
  7. __description__ = 'Italian daily newspaper'
  8.  
  9. '''
  10. http://www.corriere.it/
  11. '''
  12. import time
  13. from calibre.web.feeds.news import BasicNewsRecipe
  14.  
  15. class ilCorriere(BasicNewsRecipe):
  16.     __author__     = 'Lorenzo Vigentini, based on Darko Miletic, Gabriele Marini'
  17.     description    = 'Italian daily newspaper'
  18.  
  19. #    cover_url      = 'http://images.corriereobjects.it/images/static/common/logo_home.gif?v=200709121520
  20.  
  21.  
  22.     title          = u'Il Corriere della sera'
  23.     publisher      = 'RCS Digital'
  24.     category       = 'News, politics, culture, economy, general interest'
  25.  
  26.     encoding       = 'cp1252'
  27.     language       = 'it'
  28.     timefmt        = '[%a, %d %b, %Y]'
  29.  
  30.     oldest_article = 10
  31.     max_articles_per_feed = 100
  32.     use_embedded_content  = False
  33.     recursion             = 10
  34.  
  35.     remove_javascript = True
  36.     no_stylesheets = True
  37.  
  38.     html2lrf_options = [
  39.                           '--comment', description
  40.                         , '--category', category
  41.                         , '--publisher', publisher
  42.                         , '--ignore-tables'
  43.                         ]
  44.  
  45.     html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
  46.  
  47.     keep_only_tags = [dict(name='div', attrs={'class':['news-dettaglio article','article']})]
  48.  
  49.     remove_tags = [
  50.                    dict(name=['base','object','link','embed']),
  51.                    dict(name='div', attrs={'class':'news-goback'}),
  52.                    dict(name='ul', attrs={'class':'toolbar'})
  53.                   ]
  54.  
  55.     remove_tags_after = dict(name='p', attrs={'class':'footnotes'})
  56.  
  57.     def get_cover_url(self):
  58.         cover = None
  59.         st = time.localtime()
  60.         year = str(st.tm_year)
  61.         month = "%.2d" % st.tm_mon
  62.         day = "%.2d" % st.tm_mday
  63.         #http://images.corriere.it/primapagina/storico/2010_05_17/images/prima_pagina_grande.png
  64.         cover='http://images.corriere.it/primapagina/storico/'+ year + '_' +  month +'_' + day +'/images/prima_pagina_grande.png'
  65.         br = BasicNewsRecipe.get_browser()
  66.         try:
  67.             br.open(cover)
  68.         except:
  69.             self.log("\nCover unavailable")
  70.             cover ='http://images.corriereobjects.it/images/static/common/logo_home.gif?v=200709121520'
  71.         return cover
  72.  
  73.     feeds = [
  74.              (u'Ultimora'   ,  u'http://www.corriere.it/rss/ultimora.xml'  ),
  75.              (u'Editoriali' ,  u'http://www.corriere.it/rss/editoriali.xml'),
  76.              (u'Cronache'   ,  u'http://www.corriere.it/rss/cronache.xml'  ),
  77.              (u'Politica'   ,  u'http://www.corriere.it/rss/politica.xml'  ),
  78.              (u'Esteri'     ,  u'http://www.corriere.it/rss/esteri.xml'    ),
  79.              (u'Economia'   ,  u'http://www.corriere.it/rss/economia.xml'  ),
  80.              (u'Cultura'    ,  u'http://www.corriere.it/rss/cultura.xml'   ),
  81.              (u'Scienze'    ,  u'http://www.corriere.it/rss/scienze.xml'   ),
  82.              (u'Salute'     ,  u'http://www.corriere.it/rss/salute.xml'    ),
  83.              (u'Spettacolo' ,  u'http://www.corriere.it/rss/spettacoli.xml'),
  84.              (u'Cinema e TV',  u'http://www.corriere.it/rss/cinema.xml'    ),
  85.              (u'Sport'      ,  u'http://www.corriere.it/rss/sport.xml'     ),
  86.              (u'Roma'      ,   u'http://www.corriere.it/rss/homepage_roma.xml'),
  87.              (u'Milano'      , u'http://www.corriere.it/rss/homepage_milano.xml')
  88.             ]
  89.