home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_3643 < prev    next >
Encoding:
Text File  |  2009-12-26  |  2.7 KB  |  65 lines

  1.  
  2. __license__   = 'GPL v3'
  3. __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
  4. '''
  5. http://www.businessweek.com/magazine/news/articles/business_news.htm
  6. '''
  7.  
  8. from calibre import strftime
  9. from calibre.web.feeds.news import BasicNewsRecipe
  10.  
  11. class BWmagazine(BasicNewsRecipe):
  12.     title                 = 'BusinessWeek Magazine'
  13.     __author__            = 'Darko Miletic'
  14.     description           = 'Stay up to date with BusinessWeek magazine articles. Read news on international business, personal finances & the economy in the BusinessWeek online magazine.'
  15.     publisher             = 'Bloomberg L.P.'
  16.     category              = 'news, International Business News, current news in international business,international business articles, personal business, business week magazine, business week magazine articles, business week magazine online, business week online magazine'
  17.     oldest_article        = 10
  18.     max_articles_per_feed = 100
  19.     no_stylesheets        = True
  20.     encoding              = 'utf-8'
  21.     use_embedded_content  = False
  22.     language              = 'en'
  23.     INDEX                 = 'http://www.businessweek.com/magazine/news/articles/business_news.htm'
  24.     cover_url             = 'http://images.businessweek.com/mz/covers/current_120x160.jpg'
  25.  
  26.  
  27.     conversion_options = {
  28.                           'comment'          : description
  29.                         , 'tags'             : category
  30.                         , 'publisher'        : publisher
  31.                         , 'language'         : language
  32.                         }
  33.  
  34.  
  35.     def parse_index(self):
  36.         articles = []
  37.         soup = self.index_to_soup(self.INDEX)
  38.         ditem = soup.find('div',attrs={'id':'column2'})
  39.         if ditem:
  40.             for item in ditem.findAll('h3'):
  41.                 title_prefix = ''
  42.                 description = ''
  43.                 feed_link = item.find('a')
  44.                 if feed_link and feed_link.has_key('href'):
  45.                     url   = 'http://www.businessweek.com/magazine/' + feed_link['href'].partition('../../')[2]
  46.                     title = title_prefix + self.tag_to_string(feed_link)
  47.                     date  = strftime(self.timefmt)
  48.                     articles.append({
  49.                                       'title'      :title
  50.                                      ,'date'       :date
  51.                                      ,'url'        :url
  52.                                      ,'description':description
  53.                                     })
  54.         return [(soup.head.title.string, articles)]
  55.  
  56.     keep_only_tags = dict(name='div', attrs={'id':'storyBody'})
  57.  
  58.     def print_version(self, url):
  59.         rurl = url.rpartition('?')[0]
  60.         if rurl == '':
  61.            rurl = url
  62.         return rurl.replace('.com/magazine/','.com/print/magazine/')
  63.  
  64.  
  65.