home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_3900 < prev    next >
Encoding:
Text File  |  2009-10-14  |  1.9 KB  |  57 lines

  1. #!/usr/bin/env  python
  2. # -*- coding: utf-8 -*-
  3.  
  4. __license__   = 'GPL v3'
  5. __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
  6. '''
  7. www.marca.com
  8. '''
  9.  
  10. from calibre.web.feeds.news import BasicNewsRecipe
  11. from calibre.ebooks.BeautifulSoup import Tag
  12.  
  13. class Marca(BasicNewsRecipe):
  14.     title                 = 'Marca'
  15.     __author__            = 'Darko Miletic'
  16.     description           = 'Noticias deportivas'
  17.     publisher             = 'marca.com'
  18.     category              = 'news, sports, Spain'
  19.     oldest_article        = 2
  20.     max_articles_per_feed = 100
  21.     no_stylesheets        = True
  22.     use_embedded_content  = False
  23.     delay                 = 1
  24.     encoding              = 'iso-8859-15'
  25.     language = 'es'
  26.  
  27.     direction             = 'ltr'
  28.  
  29.     html2lrf_options = [
  30.                           '--comment'  , description
  31.                         , '--category' , category
  32.                         , '--publisher', publisher
  33.                         ]
  34.  
  35.     html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
  36.  
  37.     feeds              = [(u'Portada', u'http://rss.marca.com/rss/descarga.htm?data2=425')]
  38.  
  39.     keep_only_tags = [dict(name='div', attrs={'class':['cab_articulo','col_izq']})]
  40.  
  41.     remove_tags        = [
  42.                              dict(name=['object','link','script'])
  43.                             ,dict(name='div', attrs={'class':['colC','peu']})
  44.                             ,dict(name='div', attrs={'class':['utilidades estirar','bloque_int_corr estirar']})
  45.                          ]
  46.  
  47.     remove_tags_after = [dict(name='div', attrs={'class':'bloque_int_corr estirar'})]
  48.  
  49.     def preprocess_html(self, soup):
  50.         soup.html['dir' ] = self.direction
  51.         mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
  52.         soup.head.insert(0,mcharset)
  53.         for item in soup.findAll(style=True):
  54.             del item['style']
  55.         return soup
  56.  
  57.