home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_3729 < prev    next >
Encoding:
Text File  |  2009-10-14  |  1.9 KB  |  57 lines

  1. #!/usr/bin/env  python
  2. # -*- coding: utf-8 -*-
  3.  
  4. __license__   = 'GPL v3'
  5. __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
  6. '''
  7. elperiodico.cat
  8. '''
  9.  
  10. from calibre.web.feeds.news import BasicNewsRecipe
  11. from calibre.ebooks.BeautifulSoup import Tag
  12.  
  13. class ElPeriodico_cat(BasicNewsRecipe):
  14.     title                 = 'El Periodico de Catalunya'
  15.     __author__            = 'Darko Miletic'
  16.     description           = 'Noticias desde Catalunya'
  17.     publisher             = 'elperiodico.cat'
  18.     category              = 'news, politics, Spain, Catalunya'
  19.     oldest_article        = 2
  20.     max_articles_per_feed = 100
  21.     no_stylesheets        = True
  22.     use_embedded_content  = False
  23.     delay                 = 1
  24.     encoding              = 'cp1252'
  25.     language = 'ca'
  26.  
  27.  
  28.     html2lrf_options = [
  29.                           '--comment'  , description
  30.                         , '--category' , category
  31.                         , '--publisher', publisher
  32.                         ]
  33.  
  34.     html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
  35.  
  36.     feeds              = [(u"Tota l'edici├│", u'http://www.elperiodico.cat/rss.asp?id=46')]
  37.  
  38.  
  39.     keep_only_tags = [dict(name='div', attrs={'id':'noticia'})]
  40.  
  41.     remove_tags        = [
  42.                               dict(name=['object','link','script'])
  43.                              ,dict(name='ul',attrs={'class':'herramientasDeNoticia'})
  44.                              ,dict(name='div', attrs={'id':'inferiores'})
  45.                          ]
  46.  
  47.     def print_version(self, url):
  48.         return url.replace('/default.asp?','/print.asp?')
  49.  
  50.     def preprocess_html(self, soup):
  51.         mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
  52.         soup.head.insert(0,mcharset)
  53.         for item in soup.findAll(style=True):
  54.             del item['style']
  55.         return soup
  56.  
  57.