home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_3803 < prev    next >
Encoding:
Text File  |  2010-06-27  |  2.8 KB  |  66 lines

  1. #!/usr/bin/env python
  2. # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
  3.  
  4. import string, pprint
  5.  
  6. from calibre.web.feeds.news import BasicNewsRecipe
  7.  
  8. class HoustonChronicle(BasicNewsRecipe):
  9.  
  10.     title          = u'The Houston Chronicle'
  11.     description    = 'News from Houston, Texas'
  12.     __author__       = 'Kovid Goyal'
  13.     language       = 'en'
  14.     timefmt        = ' [%a, %d %b, %Y]'
  15.     no_stylesheets = True
  16.  
  17.     keep_only_tags = [
  18.                         dict(id=['story-head', 'story'])
  19.                      ]
  20.  
  21.     remove_tags    = [
  22.                         dict(id=['share-module', 'resource-box',
  23.                         'resource-box-header'])
  24.                      ]
  25.  
  26.     extra_css      = '''
  27.                         h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
  28.                         h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#666666;}
  29.                         h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
  30.                         h4{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
  31.                         p{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
  32.                         #story-head h1{font-family :Arial,Helvetica,sans-serif; font-size: xx-large;}
  33.                         #story-head h2{font-family :Arial,Helvetica,sans-serif; font-size: small; color:#000000;}
  34.                         #story-head h3{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
  35.                         #story-head h4{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
  36.                         #story{font-family :Arial,Helvetica,sans-serif; font-size:xx-small;}
  37.                         #Text-TextSubhed BoldCond PoynterAgateZero h3{color:#444444;font-family :Arial,Helvetica,sans-serif; font-size:small;}
  38.                         .p260x p{font-family :Arial,Helvetica,serif; font-size:x-small;font-style:italic;}
  39.                         .p260x h6{color:#777777;font-family :Arial,Helvetica,sans-serif; font-size:xx-small;}
  40.                      '''
  41.  
  42.  
  43.     def parse_index(self):
  44.         categories = ['news', 'sports', 'business', 'entertainment', 'life',
  45.                 'travel']
  46.         feeds = []
  47.         for cat in categories:
  48.             articles = []
  49.             soup = self.index_to_soup('http://www.chron.com/%s/'%cat)
  50.             for elem in soup.findAll(comptype='story', storyid=True):
  51.                 a = elem.find('a', href=True)
  52.                 if a is None: continue
  53.                 url = a['href']
  54.                 if not url.startswith('http://'):
  55.                     url = 'http://www.chron.com'+url
  56.                 articles.append({'title':self.tag_to_string(a), 'url':url,
  57.                     'description':'', 'date':''})
  58.                 pprint.pprint(articles[-1])
  59.             if articles:
  60.                 feeds.append((string.capwords(cat), articles))
  61.         return feeds
  62.  
  63.  
  64.  
  65.  
  66.