home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_3595 < prev    next >
Encoding:
Text File  |  2009-10-14  |  2.8 KB  |  78 lines

  1. #!/usr/bin/env  python
  2.  
  3. __license__   = 'GPL v3'
  4. __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
  5. '''
  6. www.adventuregamers.com
  7. '''
  8.  
  9. from calibre.web.feeds.news import BasicNewsRecipe
  10.  
  11. class AdventureGamers(BasicNewsRecipe):
  12.     title                 = u'Adventure Gamers'
  13.     language = 'en'
  14.  
  15.     __author__            = 'Darko Miletic'
  16.     description           = 'Adventure games portal'    
  17.     publisher             = 'Adventure Gamers'
  18.     category              = 'news, games, adventure, technology'    
  19.     language = 'en'
  20.  
  21.     oldest_article        = 10
  22.     delay                 = 10
  23.     max_articles_per_feed = 100
  24.     no_stylesheets        = True
  25.     encoding              = 'cp1252'
  26.     remove_javascript     = True
  27.     use_embedded_content  = False
  28.     INDEX                 = u'http://www.adventuregamers.com'
  29.     
  30.     html2lrf_options = [
  31.                           '--comment', description
  32.                         , '--category', category
  33.                         , '--publisher', publisher
  34.                         ]
  35.     
  36.     html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
  37.  
  38.     keep_only_tags = [
  39.                        dict(name='div', attrs={'class':'content_middle'})
  40.                      ]
  41.  
  42.     remove_tags = [
  43.                      dict(name=['object','link','embed','form'])
  44.                     ,dict(name='div', attrs={'class':['related-stories','article_leadout','prev','next','both']})
  45.                   ]
  46.                   
  47.     remove_tags_after = [dict(name='div', attrs={'class':'toolbar_fat'})]
  48.     
  49.     feeds = [(u'Articles', u'http://feeds2.feedburner.com/AdventureGamers')]
  50.     
  51.     def get_article_url(self, article):
  52.         return article.get('guid',  None)
  53.     
  54.     def append_page(self, soup, appendtag, position):
  55.         pager = soup.find('div',attrs={'class':'toolbar_fat_next'})
  56.         if pager:
  57.            nexturl = self.INDEX + pager.a['href']
  58.            soup2 = self.index_to_soup(nexturl)
  59.            texttag = soup2.find('div', attrs={'class':'bodytext'})
  60.            for it in texttag.findAll(style=True):
  61.                del it['style']
  62.            newpos = len(texttag.contents)          
  63.            self.append_page(soup2,texttag,newpos)
  64.            texttag.extract()
  65.            appendtag.insert(position,texttag)
  66.         
  67.     
  68.     def preprocess_html(self, soup):
  69.         mtag = '<meta http-equiv="Content-Language" content="en-US"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
  70.         soup.head.insert(0,mtag)    
  71.         for item in soup.findAll(style=True):
  72.             del item['style']
  73.         self.append_page(soup, soup.body, 3)
  74.         pager = soup.find('div',attrs={'class':'toolbar_fat'})
  75.         if pager:
  76.            pager.extract()        
  77.         return soup
  78.