home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2011 January / maximum-cd-2011-01.iso / DiscContents / calibre-0.7.26.msi / file_4181 < prev    next >
Encoding:
Text File  |  2010-10-16  |  3.0 KB  |  90 lines

  1. #!/usr/bin/env  python
  2. __license__   = 'GPL v3'
  3. __author__    = 'Tony Stegall'
  4. __copyright__ = '2010, Tony Stegall or Tonythebookworm on mobiread.com'
  5. __version__   = '1'
  6. __date__      = '16, October 2010'
  7. __docformat__ = 'English'
  8.  
  9.  
  10.  
  11. from calibre.web.feeds.news import BasicNewsRecipe
  12.  
  13. class MalaysianMirror(BasicNewsRecipe):
  14.     title      = 'MalaysianMirror'
  15.     __author__ = 'Tonythebookworm'
  16.     description = 'The Pulse of the Nation'
  17.     language = 'en'
  18.     no_stylesheets = True
  19.     publisher           = 'Tonythebookworm'
  20.     category            = 'news'
  21.     use_embedded_content= False
  22.     no_stylesheets      = True
  23.     oldest_article      = 24
  24.  
  25.     remove_javascript   = True
  26.     remove_empty_feeds  = True
  27.     conversion_options = {'linearize_tables' : True}
  28.     extra_css = '''
  29.                     #content_heading{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
  30.  
  31.                     td{text-align:right; font-size:small;margin-top:0px;margin-bottom: 0px;}
  32.  
  33.                     #content_body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
  34.                 '''
  35.  
  36.     keep_only_tags     = [dict(name='table', attrs={'class':['contentpaneopen']})
  37.                           ]
  38.     remove_tags = [dict(name='table', attrs={'class':['buttonheading']})]
  39.     #######################################################################################################################
  40.  
  41.  
  42.     max_articles_per_feed = 10
  43.  
  44.     '''
  45.     Make a variable that will hold the url for the main site because our links do not include the index
  46.     '''
  47.  
  48.     INDEX = 'http://www.malaysianmirror.com'
  49.  
  50.  
  51.  
  52.  
  53.     def parse_index(self):
  54.         feeds = []
  55.         for title, url in [
  56.                             (u"Media Buzz", u"http://www.malaysianmirror.com/media-buzz-front"),
  57.                             (u"Life Style", u"http://www.malaysianmirror.com/lifestylefront"),
  58.                             (u"Features", u"http://www.malaysianmirror.com/featurefront"),
  59.  
  60.  
  61.                              ]:
  62.             articles = self.make_links(url)
  63.             if articles:
  64.                 feeds.append((title, articles))
  65.         return feeds
  66.  
  67.     def make_links(self, url):
  68.         title = 'Temp'
  69.         current_articles = []
  70.         soup = self.index_to_soup(url)
  71.        # print 'The soup is: ', soup
  72.         for item in soup.findAll('div', attrs={'class':'contentheading'}):
  73.             #print 'item is: ', item
  74.             link = item.find('a')
  75.             #print 'the link is: ', link
  76.             if link:
  77.                 url         = self.INDEX + link['href']
  78.                 title       = self.tag_to_string(link)
  79.                 #print 'the title is: ', title
  80.                 #print 'the url is: ', url
  81.                 #print 'the title is: ', title
  82.                 current_articles.append({'title': title, 'url': url, 'description':'', 'date':''}) # append all this
  83.         return current_articles
  84.  
  85.     def preprocess_html(self, soup):
  86.         for item in soup.findAll(attrs={'style':True}):
  87.             del item['style']
  88.         return soup
  89.  
  90.