home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_4140 < prev    next >
Encoding:
Text File  |  2010-05-28  |  5.7 KB  |  132 lines

  1.  
  2. __license__   = 'GPL v3'
  3. __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
  4. '''
  5. www.wired.com
  6. '''
  7.  
  8. import re
  9. from calibre import strftime
  10. from calibre.web.feeds.news import BasicNewsRecipe
  11.  
  12. class Wired(BasicNewsRecipe):
  13.     title                 = 'Wired Magazine'
  14.     __author__            = 'Darko Miletic'
  15.     description           = 'Gaming news'
  16.     publisher             = 'Conde Nast Digital'
  17.     category              = 'news, games, IT, gadgets'
  18.     oldest_article        = 32
  19.     delay                 = 1
  20.     max_articles_per_feed = 100
  21.     no_stylesheets        = True
  22.     encoding              = 'utf-8'
  23.     use_embedded_content  = False
  24.     masthead_url          = 'http://www.wired.com/images/home/wired_logo.gif'
  25.     language              = 'en'
  26.     publication_type      = 'magazine'
  27.     extra_css             = ' body{font-family: Arial,Verdana,sans-serif} .entryDescription li {display: inline; list-style-type: none} '
  28.     index                 = 'http://www.wired.com/magazine/'
  29.  
  30.     preprocess_regexps = [(re.compile(r'<meta name="Title".*<title>', re.DOTALL|re.IGNORECASE),lambda match: '<title>')]
  31.     conversion_options = {
  32.                           'comment'   : description
  33.                         , 'tags'      : category
  34.                         , 'publisher' : publisher
  35.                         , 'language'  : language
  36.                         }
  37.  
  38.     keep_only_tags = [dict(name='div', attrs={'class':'post'})]
  39.     remove_tags_after = dict(name='div', attrs={'class':'tweetmeme_button'})
  40.     remove_tags = [
  41.                      dict(name=['object','embed','iframe','link'])
  42.                     ,dict(name='div', attrs={'class':['podcast_storyboard','tweetmeme_button']})
  43.                     ,dict(attrs={'id':'ff_bottom_nav'})
  44.                     ,dict(name='a',attrs={'href':'http://www.wired.com/app'})
  45.                   ]
  46.     remove_attributes = ['height','width']
  47.  
  48.  
  49.     def parse_index(self):
  50.         totalfeeds = []
  51.  
  52.         soup   = self.index_to_soup(self.index)
  53.         majorf = soup.find('div',attrs={'class':'index'})
  54.         if majorf:
  55.            pfarticles = []
  56.            firsta = majorf.find(attrs={'class':'spread-header'})
  57.            if firsta:
  58.               pfarticles.append({
  59.                                   'title'      :self.tag_to_string(firsta.a)
  60.                                  ,'date'       :strftime(self.timefmt)
  61.                                  ,'url'        :'http://www.wired.com' + firsta.a['href']
  62.                                  ,'description':''
  63.                                 })
  64.            for itt in majorf.findAll('li'):
  65.                itema = itt.find('a',href=True)
  66.                if itema:
  67.                   pfarticles.append({
  68.                                       'title'      :self.tag_to_string(itema)
  69.                                      ,'date'       :strftime(self.timefmt)
  70.                                      ,'url'        :'http://www.wired.com' + itema['href']
  71.                                      ,'description':''
  72.                                     })
  73.            totalfeeds.append(('Cover', pfarticles))
  74.         features = soup.find('div',attrs={'id':'my-glider'})
  75.         if features:
  76.            farticles = []
  77.            for item in features.findAll('div',attrs={'class':'section'}):
  78.                divurl = item.find('div',attrs={'class':'feature-header'})
  79.                if divurl:
  80.                    divdesc = item.find('div',attrs={'class':'feature-text'})
  81.                    url   = 'http://www.wired.com' + divurl.a['href']
  82.                    title = self.tag_to_string(divurl.a)
  83.                    description = self.tag_to_string(divdesc)
  84.                    date  = strftime(self.timefmt)
  85.                    farticles.append({
  86.                                       'title'      :title
  87.                                      ,'date'       :date
  88.                                      ,'url'        :url
  89.                                      ,'description':description
  90.                                     })
  91.            totalfeeds.append(('Featured Articles', farticles))
  92.         #department feeds
  93.         departments = ['rants','start','test','play','found']
  94.         dept = soup.find('div',attrs={'id':'magazine-departments'})
  95.         if dept:
  96.             for ditem in departments:
  97.                 darticles = []
  98.                 department = dept.find('div',attrs={'id':'department-'+ditem})
  99.                 if department:
  100.                     for item in department.findAll('div'):
  101.                         description = ''
  102.                         feed_link = item.find('a')
  103.                         if feed_link and feed_link.has_key('href'):
  104.                             url   = feed_link['href']
  105.                             title = self.tag_to_string(feed_link)
  106.                             date  = strftime(self.timefmt)
  107.                             darticles.append({
  108.                                               'title'      :title
  109.                                              ,'date'       :date
  110.                                              ,'url'        :url
  111.                                              ,'description':description
  112.                                             })
  113.                     totalfeeds.append((ditem.capitalize(), darticles))
  114.         return totalfeeds
  115.  
  116.     def get_cover_url(self):
  117.         cover_url = None
  118.         soup = self.index_to_soup(self.index)
  119.         cover_item = soup.find('div',attrs={'class':'spread-image'})
  120.         if cover_item:
  121.            cover_url = 'http://www.wired.com' + cover_item.a.img['src']
  122.         return cover_url
  123.  
  124.     def print_version(self, url):
  125.         return url.rstrip('/') + '/all/1'
  126.  
  127.     def preprocess_html(self, soup):
  128.         for item in soup.findAll(style=True):
  129.             del item['style']
  130.         return soup
  131.  
  132.