home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_3655 < prev    next >
Encoding:
Text File  |  2010-05-15  |  4.9 KB  |  93 lines

  1. #!/usr/bin/env  python
  2. __license__     = 'GPL v3'
  3. __author__      = 'Kovid Goyal and Sujata Raman, Lorenzo Vigentini'
  4. __copyright__   = '2009, Kovid Goyal and Sujata Raman'
  5. __version__     = 'v1.02'
  6. __date__        = '10, January 2010'
  7. __description__ = 'Providing context and clarity on national and international news, peoples and cultures'
  8.  
  9. '''csmonitor.com'''
  10.  
  11.  
  12. import re
  13. from calibre.web.feeds.news import BasicNewsRecipe
  14.  
  15. class ChristianScienceMonitor(BasicNewsRecipe):
  16.  
  17.     author        = 'Kovid Goyal, Sujata Raman and Lorenzo Vigentini'
  18.     description   = 'Providing context and clarity on national and international news, peoples and cultures'
  19.  
  20.     cover_url      = 'http://www.csmonitor.com/extension/csm_base/design/csm_design/images/csmlogo_179x46.gif'
  21.     title          = 'Christian Science Monitor'
  22.     publisher      = 'The Christian Science Monitor'
  23.     category       = 'News, politics, culture, economy, general interest'
  24.  
  25.     language = 'en'
  26.     encoding = 'utf-8'
  27.     timefmt        = '[%a, %d %b, %Y]'
  28.  
  29.     oldest_article        = 16
  30.     max_articles_per_feed = 20
  31.     use_embedded_content  = False
  32.     recursion             = 10
  33.  
  34.     remove_javascript     = True
  35.     no_stylesheets = True
  36.  
  37.  
  38.     preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
  39.         [
  40.             (r'<!--.*?-->', lambda match : ''),
  41.         (r'<body.*?<div id="story"', lambda match : '<body><div id="story"'),
  42.         (r'<div class="pubdate">.*?</div>', lambda m: ''),
  43.         (r'Full HTML version of this story which may include photos, graphics, and related links.*</body>',
  44.               lambda match : '</body>'),
  45.         ]]
  46.  
  47.     extra_css      = '''
  48.                         h1{ color:#000000;font-family: Georgia,Times,"Times New Roman",serif; font-size: large}
  49.                         .sub{ color:#000000;font-family: Georgia,Times,"Times New Roman",serif; font-size: small;}
  50.                         .byline{ font-family:Arial,Helvetica,sans-serif ; color:#999999; font-size: x-small;}
  51.                         .postdate{color:#999999 ;  font-family:Arial,Helvetica,sans-serif ; font-size: x-small; }
  52.                         h3{color:#999999 ;  font-family:Arial,Helvetica,sans-serif ; font-size: x-small; }
  53.                         .photoCutline{ color:#333333 ; font-family:Arial,Helvetica,sans-serif ; font-size: x-small; }
  54.                         .photoCredit{ color:#999999 ; font-family:Arial,Helvetica,sans-serif ; font-size: x-small; }
  55.                         #story{font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size: small; }
  56.                         #main{font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size: small; }
  57.                         #photo-details{ font-family:Arial,Helvetica,sans-serif ; color:#999999; font-size: x-small;}
  58.                         span.name{color:#205B87;font-family: Georgia,Times,"Times New Roman",serif; font-size: x-small}
  59.                         p#dateline{color:#444444 ;  font-family:Arial,Helvetica,sans-serif ; font-style:italic;}
  60.                         '''
  61.     feeds          = [
  62.                         (u'Top Stories' , u'http://rss.csmonitor.com/feeds/top'),
  63.                         (u'World' , u'http://rss.csmonitor.com/feeds/world'),
  64.                         (u'USA' , u'http://rss.csmonitor.com/feeds/usa'),
  65.                         (u'Commentary' , u'http://rss.csmonitor.com/feeds/commentary'),
  66.                         (u'Money' , u'http://rss.csmonitor.com/feeds/wam'),
  67.                         (u'Learning' , u'http://rss.csmonitor.com/feeds/learning'),
  68.                         (u'Living', u'http://rss.csmonitor.com/feeds/living'),
  69.                         (u'Innovation', u'http://rss.csmonitor.com/feeds/scitech'),
  70.                         (u'Gardening', u'http://rss.csmonitor.com/feeds/gardening'),
  71.                         (u'Environment',u'http://rss.csmonitor.com/feeds/environment'),
  72.                         (u'Arts', u'http://rss.csmonitor.com/feeds/arts'),
  73.                         (u'Books', u'http://rss.csmonitor.com/feeds/books'),
  74.                         (u'Home Forum' , u'http://rss.csmonitor.com/feeds/homeforum')
  75.                      ]
  76.  
  77.     keep_only_tags = [
  78.                         dict(name='div', attrs={'id':'mainColumn'}),
  79.                         ]
  80.  
  81.     remove_tags    = [
  82.                         dict(name='div', attrs={'id':['story-tools','videoPlayer','storyRelatedBottom','enlarge-photo','photo-paginate']}),
  83.                         dict(name=['div','a'], attrs={'class':
  84.                             ['storyToolbar cfx','podStoryRel','spacer3',
  85.                                 'divvy spacer7','comment','storyIncludeBottom',
  86.                                 'hide', 'podBrdr']}),
  87.                         dict(name='ul', attrs={'class':[ 'centerliststories']}) ,
  88.                         dict(name='form', attrs={'id':[ 'commentform']}) ,
  89.                     ]
  90.  
  91.     remove_tags_after = [ dict(name='div', attrs={'class':[ 'ad csmAd']})]
  92.  
  93.