home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_3776 < prev    next >
Encoding:
Text File  |  2010-01-21  |  3.4 KB  |  78 lines

  1. #!/usr/bin/env  python
  2. __license__   = 'GPL v3'
  3.  
  4. __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
  5. __docformat__ = 'restructuredtext en'
  6.  
  7. '''
  8. globeandmail.com
  9. '''
  10.  
  11. from calibre.web.feeds.news import BasicNewsRecipe
  12.  
  13. class GlobeAndMail(BasicNewsRecipe):
  14.     title = u'Globe and Mail'
  15.     language = 'en_CA'
  16.  
  17.     __author__ = 'Kovid Goyal'
  18.     oldest_article = 2
  19.     max_articles_per_feed = 10
  20.     no_stylesheets = True
  21.     extra_css = '''
  22.     h3 {font-size: 22pt; font-weight:bold; margin:0px; padding:0px 0px 8pt 0px;}
  23.     h4 {margin-top: 0px;}
  24.     #byline { font-family: monospace; font-weight:bold; }
  25.     #placeline {font-weight:bold;}
  26.     #credit {margin-top:0px;}
  27.     .tag {font-size: 22pt;}'''
  28.     description = 'Canada\'s national newspaper'
  29.     remove_tags_before = dict(id="article-top")
  30.     remove_tags = [
  31.         {'id':['util', 'article-tabs', 'comments', 'article-relations',
  32.         'gallery-controls', 'video', 'galleryLoading','deck','header',
  33.         'toolsBottom'] },
  34.         {'class':['credit','inline-img-caption','tab-pointer'] },
  35.         dict(name='div', attrs={'id':['lead-photo', 'most-popular-story']}),
  36.         dict(name='div', attrs={'class':'right'}),
  37.         dict(name='div', attrs={'id':'footer'}),
  38.         dict(name='div', attrs={'id':'beta-msg'}),
  39.         dict(name='img', attrs={'class':'headshot'}),
  40.         dict(name='div', attrs={'class':'brand'}),
  41.         dict(name='div', attrs={'id':'nav-wrap'}),
  42.         dict(name='div', attrs={'id':'featureTopics'}),
  43.         dict(name='div', attrs={'id':'videoNav'}),
  44.         dict(name='div', attrs={'id':'blog-header'}),
  45.         dict(name='div', attrs={'id':'right-rail'}),
  46.         dict(name='div', attrs={'id':'group-footer-container'}),
  47.         dict(name=['iframe', 'style'])
  48.         ]
  49.     remove_attributes = ['style']
  50.     remove_tags_after = [{'id':['article-content']},
  51.         {'class':['pull','inline-img'] },
  52.         dict(name='img', attrs={'class':'inline-media-embed'}),
  53.         ]
  54.     feeds = [
  55.             (u'Latest headlines', u'http://www.theglobeandmail.com/?service=rss'),
  56.             (u'Top stories', u'http://www.theglobeandmail.com/?service=rss&feed=topstories'),
  57.             (u'National', u'http://www.theglobeandmail.com/news/national/?service=rss'),
  58.             (u'Politics', u'http://www.theglobeandmail.com/news/politics/?service=rss'),
  59.             (u'World', u'http://www.theglobeandmail.com/news/world/?service=rss'),
  60.             (u'Business', u'http://www.theglobeandmail.com/report-on-business/?service=rss'),
  61.             (u'Opinions', u'http://www.theglobeandmail.com/news/opinions/?service=rss'),
  62.             (u'Columnists', u'http://www.theglobeandmail.com/news/opinions/columnists/?service=rss'),
  63.             (u'Globe Investor', u'http://www.theglobeandmail.com/globe-investor/?service=rss'),
  64.             (u'Sports', u'http://www.theglobeandmail.com/sports/?service=rss'),
  65.             (u'Technology', u'http://www.theglobeandmail.com/news/technology/?service=rss'),
  66.             (u'Arts', u'http://www.theglobeandmail.com/news/arts/?service=rss'),
  67.             (u'Life', u'http://www.theglobeandmail.com/life/?service=rss'),
  68.             (u'Blogs', u'http://www.theglobeandmail.com/blogs/?service=rss'),
  69.             (u'Real Estate', u'http://www.theglobeandmail.com/real-estate/?service=rss'),
  70.             (u'Auto', u'http://www.theglobeandmail.com/auto/?service=rss')
  71.             ]
  72.  
  73.     def get_article_url(self, article):
  74.         url = BasicNewsRecipe.get_article_url(self, article)
  75.         if '/video/' not in url:
  76.             return url
  77.  
  78.