home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_3602 < prev    next >
Encoding:
Text File  |  2009-10-14  |  1.7 KB  |  53 lines

  1. #!/usr/bin/env  python
  2.  
  3. __license__   = 'GPL v3'
  4. __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
  5.  
  6. '''
  7. aljazeera.net
  8. '''
  9. from calibre.web.feeds.news import BasicNewsRecipe
  10.  
  11. class AlJazeera(BasicNewsRecipe):
  12.     title                  = 'Al Jazeera in English'
  13.     __author__             = 'Darko Miletic'
  14.     description            = 'News from Middle East'
  15.     language = 'en'
  16.  
  17.     publisher              = 'Al Jazeera'
  18.     category               = 'news, politics, middle east'
  19.     simultaneous_downloads = 1
  20.     delay                  = 4
  21.     oldest_article         = 1
  22.     max_articles_per_feed  = 100
  23.     no_stylesheets         = True
  24.     encoding               = 'iso-8859-1'
  25.     remove_javascript      = True
  26.     use_embedded_content   = False
  27.  
  28.     html2lrf_options = [
  29.                           '--comment', description
  30.                         , '--category', category
  31.                         , '--publisher', publisher
  32.                         , '--ignore-tables'
  33.                         ]
  34.  
  35.     html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_table=True'
  36.  
  37.     keep_only_tags = [dict(name='div', attrs={'id':'ctl00_divContent'})]
  38.  
  39.     remove_tags = [
  40.                      dict(name=['object','link'])
  41.                     ,dict(name='td', attrs={'class':['MostActiveDescHeader','MostActiveDescBody']})
  42.                   ]
  43.  
  44.     feeds = [(u'AL JAZEERA ENGLISH (AJE)', u'http://english.aljazeera.net/Services/Rss/?PostingId=2007731105943979989' )]
  45.  
  46.     def preprocess_html(self, soup):
  47.         for item in soup.findAll(style=True):
  48.             del item['style']
  49.         for item in soup.findAll(face=True):
  50.             del item['face']
  51.         return soup
  52.  
  53.