home *** CD-ROM | disk | FTP | other *** search
- # -*- coding: utf-8 -*-
- from calibre.web.feeds.recipes import BasicNewsRecipe
-
- class NYTimes(BasicNewsRecipe):
-
- title = 'New England Journal of Medicine'
- __author__ = 'Kovid Goyal'
- description = 'Medical news'
- timefmt = ' [%d %b, %Y]'
- needs_subscription = True
- language = 'en'
-
- no_stylesheets = True
- keep_only_tags = dict(id='content')
-
-
- #TO LOGIN
- def get_browser(self):
- br = BasicNewsRecipe.get_browser()
- br.open('http://www.nejm.org/action/showLogin?uri=http://www.nejm.org/')
- br.select_form(name='frmLogin')
- br['login'] = self.username
- br['password'] = self.password
- response = br.submit()
- raw = response.read()
- if '>Sign Out<' not in raw:
- raise Exception('Login failed. Check your username and password')
- return br
-
- #TO GET ARTICLE TOC
- def nejm_get_index(self):
- return self.index_to_soup('http://content.nejm.org/current.dtl')
-
- # To parse artice toc
- def parse_index(self):
- parse_soup = self.nejm_get_index()
-
- feeds = []
-
- div = parse_soup.find(attrs={'class':'tocContent'})
- for group in div.findAll(attrs={'class':'articleGrouping'}):
- feed_title = group.find(attrs={'class':'articleType'})
- if feed_title is None:
- continue
- feed_title = self.tag_to_string(feed_title)
- articles = []
- self.log('Found section:', feed_title)
- for art in group.findAll(attrs={'class':lambda x: x and 'articleEntry'
- in x}):
- link = art.find(attrs={'class':lambda x:x and 'articleLink' in
- x})
- if link is None:
- continue
- a = link.find('a', href=True)
- if a is None:
- continue
- url = a.get('href')
- if url.startswith('/'):
- url = 'http://www.nejm.org'+url
- title = self.tag_to_string(a)
- self.log.info('\tFound article:', title, 'at', url)
- article = {'title':title, 'url':url, 'date':''}
- au = art.find(attrs={'class':'articleAuthors'})
- if au is not None:
- article['author'] = self.tag_to_string(au)
- desc = art.find(attrs={'class':'hover_text'})
- if desc is not None:
- desc = self.tag_to_string(desc)
- if 'author' in article:
- desc = ' by ' + article['author'] + ' ' +desc
- article['description'] = desc
- articles.append(article)
- if articles:
- feeds.append((feed_title, articles))
-
- return feeds
-
-
-