home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_4007 < prev    next >
Encoding:
Text File  |  2010-04-29  |  2.7 KB  |  58 lines

  1. import re
  2.  
  3. from calibre.web.feeds.news import BasicNewsRecipe
  4.  
  5.  
  6. class Reuters(BasicNewsRecipe):
  7.  
  8.     title = 'Reuters'
  9.     description = 'Global news'
  10.     __author__ = 'Kovid Goyal and Sujata Raman'
  11.     use_embedded_content   = False
  12.     language = 'en'
  13.  
  14.     max_articles_per_feed = 10
  15.     no_stylesheets = True
  16.     remove_javascript = True
  17.  
  18.     extra_css      = '''
  19.                          body{font-family:arial,helvetica,sans;}
  20.                         h1{ font-size:larger ; font-weight:bold;  }
  21.                         .byline{color:#006E97;font-size:x-small; font-weight:bold;}
  22.                         .location{font-size:x-small; font-weight:bold;}
  23.                         .timestamp{font-size:x-small; }
  24.                         '''
  25.  
  26.     keep_only_tags = [dict(name='div', attrs={'class':'column2 gridPanel grid8'})]
  27.  
  28.  
  29.     remove_tags = [dict(name='div', attrs={'id':['recommendedArticles','relatedNews','relatedVideo','relatedFactboxes']}),
  30.                    dict(name='p', attrs={'class':['relatedTopics']}),
  31.                     dict(name='a', attrs={'id':['fullSizeLink']}),
  32.                    dict(name='div', attrs={'class':['photoNav','relatedTopicButtons','articleComments','gridPanel grid8','footerHalf gridPanel grid1','gridPanel grid2','gridPanel grid3']}),]
  33.  
  34.     preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
  35. [
  36.         ##(r'<HEAD>.*?</HEAD>' , lambda match : '<HEAD></HEAD>'),
  37.         (r'<div id="apple-rss-sidebar-background">.*?<!-- start Entries -->', lambda match : ''),
  38.         (r'<!-- end apple-rss-content-area -->.*?</body>', lambda match : '</body>'),
  39.         (r'<script.*?>.*?</script>', lambda match : ''),
  40.         (r'<body>.*?<div class="contentBand">', lambda match : '<body>'),
  41.         (r'<h3>Share:</h3>.*?</body>', lambda match : '<!-- END:: Shared Module id=36615 --></body>'),
  42.         (r'<div id="atools" class="articleTools">.*?<div class="linebreak">', lambda match : '<div class="linebreak">'),
  43.     ]
  44.     ]
  45.  
  46.  
  47.  
  48.     feeds = [ ('Top Stories', 'http://feeds.reuters.com/reuters/topNews?format=xml'),
  49.                   ('US News', 'http://feeds.reuters.com/reuters/domesticNews?format=xml'),
  50.                   ('World News', 'http://feeds.reuters.com/reuters/worldNews?format=xml'),
  51.                   ('Politics News', 'http://feeds.reuters.com/reuters/politicsNews?format=xml'),
  52.                   ('Science News', 'http://feeds.reuters.com/reuters/scienceNews?format=xml'),
  53.                   ('Environment News', 'http://feeds.reuters.com/reuters/Environment?format=xml'),
  54.                   ('Technology News', 'http://feeds.reuters.com/reuters/technologyNews?format=xml'),
  55.                   ('Oddly Enough News', 'http://feeds.reuters.com/reuters/oddlyEnoughNews?format=xml')
  56.          ]
  57.  
  58.