home *** CD-ROM | disk | FTP | other *** search
- #!/usr/bin/env python
-
- __license__ = 'GPL v3'
- __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
- '''
- tomshardware.com/us
- '''
-
- import urllib
- from calibre.web.feeds.recipes import BasicNewsRecipe
-
- class Tomshardware(BasicNewsRecipe):
- title = "Tom's Hardware US"
- __author__ = 'Darko Miletic'
- description = 'Hardware reviews and News'
- publisher = "Tom's Hardware"
- category = 'news, IT, hardware, USA'
- no_stylesheets = True
- needs_subscription = True
- language = 'en'
-
- INDEX = 'http://www.tomshardware.com'
- LOGIN = INDEX + '/membres/'
- remove_javascript = True
- use_embedded_content= False
-
- html2lrf_options = [
- '--comment', description
- , '--category', category
- , '--publisher', publisher
- ]
-
- html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
-
- def get_browser(self):
- br = BasicNewsRecipe.get_browser()
- br.open(self.INDEX+'/us/')
- if self.username is not None and self.password is not None:
- data = urllib.urlencode({ 'action':'login_action'
- ,'r':self.INDEX+'/us/'
- ,'login':self.username
- ,'mdp':self.password
- })
- br.open(self.LOGIN,data)
- return br
-
- remove_tags = [
- dict(name='div' , attrs={'id':'header' })
- ,dict(name='object')
- ]
-
- feeds = [
- (u'Latest Articles', u'http://www.tomshardware.com/feeds/atom/tom-s-hardware-us,18-2.xml' )
- ,(u'Latest News' , u'http://www.tomshardware.com/feeds/atom/tom-s-hardware-us,18-1.xml')
- ]
-
- def print_version(self, url):
- main, sep, rest = url.rpartition('.html')
- rmain, rsep, article_id = main.rpartition(',')
- tmain, tsep, trest = rmain.rpartition('/reviews/')
- rind = 'http://www.tomshardware.com/news_print.php?p1='
- if tsep:
- rind = 'http://www.tomshardware.com/review_print.php?p1='
- return rind + article_id
-
- def cleanup_image_tags(self,soup):
- for item in soup.findAll('img'):
- for attrib in ['height','width','border','align']:
- if item.has_key(attrib):
- del item[attrib]
- return soup
-
- def preprocess_html(self, soup):
- del(soup.body['onload'])
- for item in soup.findAll(style=True):
- del item['style']
- for it in soup.findAll('span'):
- it.name="div"
- return self.cleanup_image_tags(soup)
-