home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_3703 < prev    next >
Encoding:
Text File  |  2010-02-10  |  1.5 KB  |  49 lines

  1. __license__   = 'GPL v3'
  2. __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
  3. '''
  4. http://www.dilbert.com
  5. '''
  6. import re
  7.  
  8. from calibre.web.feeds.recipes import BasicNewsRecipe
  9.  
  10. class DosisDiarias(BasicNewsRecipe):
  11.     title                  = 'Dilbert'
  12.     __author__             = 'Darko Miletic'
  13.     description            = 'Dilbert'
  14.     oldest_article         = 5
  15.     max_articles_per_feed  = 100
  16.     no_stylesheets         = True
  17.     use_embedded_content   = True
  18.     encoding               = 'utf-8'
  19.     publisher              = 'UNITED FEATURE SYNDICATE, INC.'
  20.     category               = 'comic'
  21.     language               = 'en'
  22.  
  23.     conversion_options = {
  24.                              'comments'        : description
  25.                             ,'tags'            : category
  26.                             ,'language'        : language
  27.                             ,'publisher'       : publisher
  28.                          }
  29.  
  30.     feeds = [(u'Dilbert', u'http://feeds.dilbert.com/DilbertDailyStrip' )]
  31.  
  32.     preprocess_regexps = [
  33.                     (re.compile('strip\..*\.gif', re.DOTALL|re.IGNORECASE),
  34.                         lambda match: 'strip.zoom.gif')
  35.                             ]
  36.  
  37.  
  38.     def get_article_url(self, article):
  39.         return article.get('feedburner_origlink', None)
  40.  
  41.     def preprocess_html(self, soup):
  42.        for tag in soup.findAll(name='a'):
  43.            if tag['href'].find('http://feedads') >= 0:
  44.               tag.extract()
  45.        return soup
  46.  
  47.  
  48.  
  49.