home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_3916 < prev    next >
Encoding:
Text File  |  2009-10-14  |  1.5 KB  |  44 lines

  1. #!/usr/bin/env  python
  2.  
  3. __license__   = 'GPL v3'
  4. __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
  5. '''
  6. msdn.microsoft.com/en-us/magazine
  7. '''
  8. from calibre.web.feeds.news import BasicNewsRecipe
  9.  
  10. class MSDNMagazine_en(BasicNewsRecipe):
  11.     title                 = 'MSDN Magazine'
  12.     __author__            = 'Darko Miletic'
  13.     description           = 'The Microsoft Journal for Developers'
  14.     publisher             = 'Microsoft Press'
  15.     category              = 'news, IT, Microsoft, programming, windows'
  16.     oldest_article        = 31
  17.     max_articles_per_feed = 100
  18.     no_stylesheets        = True
  19.     use_embedded_content  = False
  20.     encoding              = 'utf-8'
  21.     language              = 'en'
  22.  
  23.  
  24.  
  25.     feeds = [(u'Articles', u'http://msdn.microsoft.com/en-us/magazine/rss/default.aspx?z=z&iss=1')]
  26.  
  27.     keep_only_tags = [dict(name='div', attrs={'class':'navpage'})]
  28.  
  29.     remove_tags = [
  30.                      dict(name=['object','link','base','table'])
  31.                     ,dict(name='div', attrs={'class':'MTPS_CollapsibleRegion'})
  32.                   ]
  33.     remove_tags_after = dict(name='div', attrs={'class':'navpage'})
  34.  
  35.     def preprocess_html(self, soup):
  36.         for item in soup.findAll('div',attrs={'class':['FeatureSmallHead','ColumnTypeSubTitle']}):
  37.             item.name="h2"
  38.         for item in soup.findAll('div',attrs={'class':['FeatureHeadline','ColumnTypeTitle']}):
  39.             item.name="h1"
  40.         for item in soup.findAll('div',attrs={'class':'ArticleTypeTitle'}):
  41.             item.name="h3"
  42.         return soup
  43.  
  44.