home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_3816 < prev    next >
Encoding:
Text File  |  2010-01-01  |  2.1 KB  |  56 lines

  1. from calibre.web.feeds.news import BasicNewsRecipe
  2. from calibre.ebooks.BeautifulSoup import BeautifulSoup
  3.  
  4. class TheIndependent(BasicNewsRecipe):
  5.     title          = u'The Independent'
  6.     language       = 'en_GB'
  7.     __author__     = 'Krittika Goyal'
  8.     oldest_article = 1 #days
  9.     max_articles_per_feed = 25
  10.     encoding = 'latin1'
  11.  
  12.     no_stylesheets = True
  13.     #remove_tags_before = dict(name='h1', attrs={'class':'heading'})
  14.     #remove_tags_after  = dict(name='td', attrs={'class':'newptool1'})
  15.     remove_tags = [
  16.        dict(name='iframe'),
  17.        dict(name='div', attrs={'class':'related-articles'}),
  18.        dict(name='div', attrs={'id':['qrformdiv', 'inSection', 'alpha-inner']}),
  19.        dict(name='ul', attrs={'class':'article-tools'}),
  20.        dict(name='ul', attrs={'class':'articleTools'}),
  21.     ]
  22.  
  23.     feeds          = [
  24.             ('UK',
  25.             'http://www.independent.co.uk/news/uk/rss'),
  26.             ('World',
  27.             'http://www.independent.co.uk/news/world/rss'),
  28.             ('Sport',
  29.             'http://www.independent.co.uk/sport/rss'),
  30.             ('Arts and Entertainment',
  31.             'http://www.independent.co.uk/arts-entertainment/rss'),
  32.             ('Business',
  33.             'http://www.independent.co.uk/news/business/rss'),
  34.             ('Life and Style',
  35.             'http://www.independent.co.uk/life-style/gadgets-and-tech/news/rss'),
  36.             ('Science',
  37.             'http://www.independent.co.uk/news/science/rss'),
  38.             ('People',
  39.             'http://www.independent.co.uk/news/people/rss'),
  40.             ('Media',
  41.             'http://www.independent.co.uk/news/media/rss'),
  42.             ('Health and Families',
  43.             'http://www.independent.co.uk/life-style/health-and-families/rss'),
  44.             ('Obituaries',
  45.             'http://www.independent.co.uk/news/obituaries/rss'),
  46.     ]
  47.  
  48.     def preprocess_html(self, soup):
  49.         story = soup.find(name='div', attrs={'id':'mainColumn'})
  50.         #td = heading.findParent(name='td')
  51.         #td.extract()
  52.         soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
  53.         body = soup.find(name='body')
  54.         body.insert(0, story)
  55.         return soup
  56.