home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_3810 < prev    next >
Encoding:
Text File  |  2009-10-14  |  2.5 KB  |  60 lines

  1. __license__   = 'GPL v3'
  2. __copyright__ = '2008, Derry FitzGerald'
  3. '''
  4. iht.com
  5. '''
  6. import re
  7.  
  8. from calibre.web.feeds.news import BasicNewsRecipe
  9. from calibre.ptempfile import PersistentTemporaryFile
  10.  
  11.  
  12. class InternationalHeraldTribune(BasicNewsRecipe):
  13.     title          = u'The International Herald Tribune'
  14.     __author__     = 'Derry FitzGerald'
  15.     language = 'en'
  16.  
  17.     oldest_article = 1
  18.     max_articles_per_feed = 10
  19.     no_stylesheets = True
  20.  
  21.     remove_tags    = [dict(name='div', attrs={'class':'footer'}),
  22.                       dict(name=['form'])]
  23.     preprocess_regexps = [
  24.             (re.compile(r'<!-- webtrends.*', re.DOTALL),
  25.              lambda m:'</body></html>')
  26.                           ]
  27.     extra_css      = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt  }'
  28.  
  29.     feeds          = [
  30.                       (u'Frontpage', u'http://www.iht.com/rss/frontpage.xml'),
  31.                       (u'Business', u'http://www.iht.com/rss/business.xml'),
  32.                       (u'Americas', u'http://www.iht.com/rss/america.xml'),
  33.                       (u'Europe', u'http://www.iht.com/rss/europe.xml'),
  34.                       (u'Asia', u'http://www.iht.com/rss/asia.xml'),
  35.                       (u'Africa and Middle East', u'http://www.iht.com/rss/africa.xml'),
  36.                       (u'Opinion', u'http://www.iht.com/rss/opinion.xml'),
  37.                       (u'Technology', u'http://www.iht.com/rss/technology.xml'),
  38.                       (u'Health and Science', u'http://www.iht.com/rss/healthscience.xml'),
  39.                       (u'Sports', u'http://www.iht.com/rss/sports.xml'),
  40.                       (u'Culture', u'http://www.iht.com/rss/arts.xml'),
  41.                       (u'Style and Design', u'http://www.iht.com/rss/style.xml'),
  42.                       (u'Travel', u'http://www.iht.com/rss/travel.xml'),
  43.                       (u'At Home Abroad', u'http://www.iht.com/rss/athome.xml'),
  44.                       (u'Your Money', u'http://www.iht.com/rss/yourmoney.xml'),
  45.                       (u'Properties', u'http://www.iht.com/rss/properties.xml')
  46.                     ]
  47.     temp_files = []
  48.     articles_are_obfuscated = True
  49.  
  50.     def get_obfuscated_article(self, url, logger):
  51.         br = self.get_browser()
  52.         br.open(url)
  53.         br.select_form(name='printFriendly')
  54.         res = br.submit()
  55.         html = res.read()
  56.         self.temp_files.append(PersistentTemporaryFile('_iht.html'))
  57.         self.temp_files[-1].write(html)
  58.         self.temp_files[-1].close()
  59.         return self.temp_files[-1].name
  60.