home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_3981 < prev    next >
Encoding:
Text File  |  2009-11-14  |  3.4 KB  |  86 lines

  1. #!/usr/bin/env  python
  2. __license__   = 'GPL v3'
  3. '''
  4. philly.com/inquirer/
  5. '''
  6. from calibre.web.feeds.recipes import BasicNewsRecipe
  7.  
  8. class Philly(BasicNewsRecipe):
  9.  
  10.     title       = 'Philadelphia Inquirer'
  11.     __author__  = 'RadikalDissent and Sujata Raman'
  12.     language = 'en'
  13.     description = 'Daily news from the Philadelphia Inquirer'
  14.     no_stylesheets        = True
  15.     use_embedded_content  = False
  16.     oldest_article = 1
  17.     max_articles_per_feed = 25
  18.  
  19.     extra_css = '''
  20.         h1{font-family:verdana,arial,helvetica,sans-serif; font-size: large;}
  21.         h2{font-family:verdana,arial,helvetica,sans-serif; font-size: small;}
  22.         .body-content{font-family:verdana,arial,helvetica,sans-serif; font-size: small;}
  23.         .byline {font-size: small; color: #666666; font-style:italic; }
  24.         .lastline {font-size: small; color: #666666; font-style:italic;}
  25.         .contact {font-size: small; color: #666666;}
  26.         .contact p {font-size: small; color: #666666;}
  27.         #photoCaption { font-family:verdana,arial,helvetica,sans-serif; font-size:x-small;}
  28.         .photoCaption { font-family:verdana,arial,helvetica,sans-serif; font-size:x-small;}
  29.         #photoCredit{ font-family:verdana,arial,helvetica,sans-serif; font-size:x-small; color:#666666;}
  30.         .photoCredit{ font-family:verdana,arial,helvetica,sans-serif; font-size:x-small; color:#666666;}
  31.         .article_timestamp{font-size:x-small; color:#666666;}
  32.         a {font-family:verdana,arial,helvetica,sans-serif; font-size: x-small;}
  33.                 '''
  34.  
  35.     keep_only_tags = [
  36.                dict(name='div', attrs={'class':'story-content'}),
  37.                dict(name='div', attrs={'id': 'contentinside'})
  38.                     ]
  39.  
  40.     remove_tags = [
  41.          dict(name='div', attrs={'class':['linkssubhead','post_balloon','relatedlist','pollquestion','b_sq']}),
  42.          dict(name='dl', attrs={'class':'relatedlist'}),
  43.         dict(name='div', attrs={'id':['photoNav','sidebar_adholder']}),
  44.         dict(name='a', attrs={'class': ['headlineonly','bl']}),
  45.          dict(name='img', attrs={'class':'img_noborder'})
  46.     ]
  47.    # def print_version(self, url):
  48.    #     return url + '?viewAll=y'
  49.  
  50.  
  51.     feeds = [
  52.         ('Front Page', 'http://www.philly.com/inquirer_front_page.rss'),
  53.         ('Business', 'http://www.philly.com/inq_business.rss'),
  54.         #('News', 'http://www.philly.com/inquirer/news/index.rss'),
  55.         ('Nation', 'http://www.philly.com/inq_news_world_us.rss'),
  56.         ('Local', 'http://www.philly.com/inquirer_local.rss'),
  57.         ('Health', 'http://www.philly.com/inquirer_health_science.rss'),
  58.         ('Education', 'http://www.philly.com/inquirer_education.rss'),
  59.         ('Editorial and opinion', 'http://www.philly.com/inq_news_editorial.rss'),
  60.         ('Sports', 'http://www.philly.com/inquirer_sports.rss')
  61.         ]
  62.  
  63.     def get_article_url(self, article):
  64.         ans = article.link
  65.  
  66.         try:
  67.             self.log('Looking for full story link in', ans)
  68.             soup = self.index_to_soup(ans)
  69.             x = soup.find(text="View All")
  70.  
  71.             if x is not None:
  72.                 ans = ans + '?viewAll=y'
  73.                 self.log('Found full story link', ans)
  74.         except:
  75.             pass
  76.         return ans
  77.  
  78.     def postprocess_html(self, soup,first):
  79.  
  80.          for tag in soup.findAll(name='div',attrs={'class':"container_ate_qandatitle"}):
  81.                 tag.extract()
  82.          for tag in soup.findAll(name='br'):
  83.                 tag.extract()
  84.  
  85.          return soup
  86.