home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2011 January / maximum-cd-2011-01.iso / DiscContents / calibre-0.7.26.msi / file_4005 < prev    next >
Encoding:
Text File  |  2010-10-12  |  2.7 KB  |  75 lines

  1. __license__   = 'GPL v3'
  2. __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
  3. '''
  4. ft.com
  5. '''
  6. from calibre import strftime
  7. from calibre.web.feeds.news import BasicNewsRecipe
  8.  
  9. class FinancialTimes(BasicNewsRecipe):
  10.     title                 = u'Financial Times - UK printed edition'
  11.     __author__            = 'Darko Miletic'
  12.     description           = 'Financial world news'
  13.     oldest_article        = 2
  14.     language              = 'en_GB'
  15.     max_articles_per_feed = 250
  16.     no_stylesheets        = True
  17.     use_embedded_content  = False
  18.     needs_subscription    = True
  19.     encoding              = 'utf8'
  20.     simultaneous_downloads= 1
  21.     delay                 = 1
  22.     LOGIN                 = 'https://registration.ft.com/registration/barrier/login'
  23.     INDEX                 = 'http://www.ft.com/uk-edition'
  24.     PREFIX                = 'http://www.ft.com'
  25.  
  26.     def get_browser(self):
  27.         br = BasicNewsRecipe.get_browser()
  28.         if self.username is not None and self.password is not None:
  29.             br.open(self.LOGIN)
  30.             br.select_form(name='loginForm')
  31.             br['username'] = self.username
  32.             br['password'] = self.password
  33.             br.submit()
  34.         return br
  35.  
  36.     keep_only_tags    = [ dict(name='div', attrs={'id':'cont'}) ]
  37.     remove_tags_after = dict(name='p', attrs={'class':'copyright'})
  38.     remove_tags = [
  39.                       dict(name='div', attrs={'id':'floating-con'})
  40.                      ,dict(name=['meta','iframe','base','object','embed','link'])
  41.                   ]
  42.     remove_attributes = ['width','height','lang']
  43.  
  44.     extra_css = """
  45.                 body{font-family:Arial,Helvetica,sans-serif;}
  46.                 h2{font-size:large;}
  47.                 .ft-story-header{font-size:xx-small;}
  48.                 .ft-story-body{font-size:small;}
  49.                 a{color:#003399;}
  50.                 .container{font-size:x-small;}
  51.                 h3{font-size:x-small;color:#003399;}
  52.                 .copyright{font-size: x-small}
  53.                 """
  54.  
  55.     def parse_index(self):
  56.         articles = []
  57.         soup = self.index_to_soup(self.INDEX)
  58.         wide = soup.find('div',attrs={'class':'wide'})
  59.         if wide:
  60.             for item in wide.findAll('a',href=True):
  61.                 url   = self.PREFIX + item['href']
  62.                 title = self.tag_to_string(item)
  63.                 date = strftime(self.timefmt)
  64.                 articles.append({
  65.                                   'title'      :title
  66.                                  ,'date'       :date
  67.                                  ,'url'        :url
  68.                                  ,'description':''
  69.                                 })
  70.         return [('FT UK edition',articles)]
  71.  
  72.     def preprocess_html(self, soup):
  73.         return self.adeify_images(soup)
  74.  
  75.