home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_3640 < prev    next >
Encoding:
Text File  |  2009-10-14  |  2.7 KB  |  78 lines

  1. #!/usr/bin/env  python
  2.  
  3. __license__   = 'GPL v3'
  4. __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
  5. '''
  6. www.businessworld.in
  7. '''
  8.  
  9. from calibre import strftime
  10. from calibre.web.feeds.news import BasicNewsRecipe
  11.  
  12. class BusinessWorldMagazine(BasicNewsRecipe):
  13.     title                = 'Business World Magazine'
  14.     __author__           = 'Darko Miletic'
  15.     description          = 'News from India'
  16.     publisher            = 'ABP Pvt Ltd Publication'
  17.     category             = 'news, politics, finances, India, Asia'
  18.     delay                = 1
  19.     no_stylesheets       = True
  20.     INDEX                = 'http://www.businessworld.in/bw/Magazine_Current_Issue'
  21.     ROOT                 = 'http://www.businessworld.in'
  22.     use_embedded_content = False
  23.     encoding             = 'utf-8'
  24.     language             = 'en_IN'
  25.  
  26.  
  27.     conversion_options = {
  28.                           'comment'          : description
  29.                         , 'tags'             : category
  30.                         , 'publisher'        : publisher
  31.                         , 'language'         : language
  32.                         }
  33.  
  34.     def is_in_list(self,linklist,url):
  35.         for litem in linklist:
  36.             if litem == url:
  37.                return True
  38.         return False
  39.     
  40.     
  41.     def parse_index(self):
  42.         articles = []
  43.         linklist = []
  44.         soup = self.index_to_soup(self.INDEX)
  45.  
  46.         for item in soup.findAll('div', attrs={'class':'nametitle'}):
  47.             description = ''
  48.             title_prefix = ''
  49.             feed_link = item.find('a')
  50.             if feed_link and feed_link.has_key('href'):
  51.                 url   = self.ROOT + feed_link['href']
  52.                 if not self.is_in_list(linklist,url):
  53.                     title = title_prefix + self.tag_to_string(feed_link)
  54.                     date  = strftime(self.timefmt)
  55.                     articles.append({
  56.                                       'title'      :title
  57.                                      ,'date'       :date
  58.                                      ,'url'        :url
  59.                                      ,'description':description
  60.                                     })
  61.                     linklist.append(url)
  62.         return [(soup.head.title.string, articles)]
  63.  
  64.     
  65.     keep_only_tags = [dict(name='div', attrs={'id':['register-panel','printwrapper']})]
  66.     remove_tags = [dict(name=['object','link'])]
  67.  
  68.     def print_version(self, url):
  69.         return url.replace('/bw/','/bw/storyContent/')
  70.  
  71.     def get_cover_url(self):
  72.         cover_url = None
  73.         soup = self.index_to_soup(self.INDEX)
  74.         cover_item = soup.find('img',attrs={'class':'toughbor'})
  75.         if cover_item:
  76.            cover_url = self.ROOT + cover_item['src']
  77.         return cover_url
  78.