home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / jbpress.recipe < prev    next >
Encoding:
Text File  |  2011-09-09  |  1.8 KB  |  52 lines

  1. import urllib2, re
  2. from calibre.web.feeds.news import BasicNewsRecipe
  3.  
  4. class JBPress(BasicNewsRecipe):
  5.     title          = u'JBPress'
  6.     language = 'ja'
  7.     description = u'Japan Business Press New articles (using small print version)'
  8.     __author__    = 'Ado Nishimura'
  9.     needs_subscription = True
  10.     oldest_article = 7
  11.     max_articles_per_feed = 100
  12.     remove_tags_before = dict(id='wrapper')
  13.     no_stylesheets         = True
  14.  
  15.     feeds = [('JBPress new article', 'http://feed.ismedia.jp/rss/jbpress/all.rdf')]
  16.  
  17.  
  18.     def get_cover_url(self):
  19.         return 'http://www.jbpress.co.jp/common/images/v1/jpn/common/logo.gif'
  20.  
  21.     def get_browser(self):
  22.         html = '''<form action="https://jbpress.ismedia.jp/auth/dologin/http://jbpress.ismedia.jp/articles/print/5549" method="post">
  23. <input id="login" name="login" type="text"/>
  24. <input id="password" name="password" type="password"/>
  25. <input id="rememberme" name="rememberme" type="checkbox"/>
  26. </form>
  27. '''
  28.         br = BasicNewsRecipe.get_browser()
  29.         if self.username is not None and self.password is not None:
  30.             br.open('http://jbpress.ismedia.jp/articles/print/5549')
  31.             response = br.response()
  32.             response.set_data(html)
  33.             br.set_response(response)
  34.             br.select_form(nr=0)
  35.             br["login"]   = self.username
  36.             br['password'] = self.password
  37.             br.submit()
  38.         return br
  39.  
  40.     def print_version(self, url):
  41.         url = urllib2.urlopen(url).geturl() # resolve redirect.
  42.         return url.replace('/-/', '/print/')
  43.  
  44.     def preprocess_html(self, soup):
  45.             # remove breadcrumb
  46.             h3s = soup.findAll('h3')
  47.             for h3 in h3s:
  48.                 if re.compile('^JBpress>').match(h3.string):
  49.                     h3.extract()
  50.             return soup
  51.  
  52.