home *** CD-ROM | disk | FTP | other *** search
- #!/usr/bin/env python
-
- __license__ = 'GPL v3'
- __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
- '''
- msdn.microsoft.com/en-us/magazine
- '''
- from calibre.web.feeds.news import BasicNewsRecipe
- from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup
-
- class MSDNMagazine_en(BasicNewsRecipe):
- title = 'MSDN Magazine'
- __author__ = 'Darko Miletic'
- description = 'The Microsoft Journal for Developers'
- masthead_url = 'http://i3.msdn.microsoft.com/Platform/MasterPages/MsdnMagazine/smalllogo.png'
- publisher = 'Microsoft Press'
- category = 'news, IT, Microsoft, programming, windows'
- oldest_article = 31
- max_articles_per_feed = 100
- no_stylesheets = True
- use_embedded_content = False
- encoding = 'utf-8'
- language = 'en'
-
- base_url = 'http://msdn.microsoft.com/en-us/magazine/default.aspx'
- rss_url = 'http://msdn.microsoft.com/en-us/magazine/rss/default.aspx?z=z&iss=1'
-
-
- keep_only_tags = [dict(name='div', attrs={'id':'MainContent'})]
-
- remove_tags = [
- dict(name='div', attrs={'class':'DivRatingsOnly'})
- ,dict(name='div', attrs={'class':'ShareThisButton4'})
- ]
-
- def find_articles(self):
- idx_contents = self.browser.open(self.rss_url).read()
- idx = BeautifulStoneSoup(idx_contents, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
-
- for article in idx.findAll('item'):
- desc_html = self.tag_to_string(article.find('description'))
- description = self.tag_to_string(BeautifulSoup(desc_html))
-
- a = {
- 'title': self.tag_to_string(article.find('title')),
- 'url': self.tag_to_string(article.find('link')),
- 'description': description,
- 'date' : self.tag_to_string(article.find('pubdate')),
- }
- yield a
-
-
- def parse_index(self):
- soup = self.index_to_soup(self.base_url)
-
- #find issue name, eg "August 2011"
- issue_name = self.tag_to_string(soup.find('h1'))
-
- # find cover pic
- img = soup.find('img',attrs ={'alt':issue_name})
- if img is not None:
- self.cover_url = img['src']
-
- return [(issue_name, list(self.find_articles()))]
-
-