home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_3656 < prev    next >
Encoding:
Text File  |  2009-10-14  |  2.4 KB  |  49 lines

  1. #!/usr/bin/env python
  2. __license__   = 'GPL v3'
  3. __copyright__ = '2009 Kovid Goyal <kovid at kovidgoyal.net>'
  4.  
  5. from calibre.web.feeds.news import BasicNewsRecipe
  6.  
  7. class AdvancedUserRecipe1234144423(BasicNewsRecipe):
  8.     title          = u'Cincinnati Enquirer'
  9.     oldest_article = 7
  10.     language = 'en'
  11.  
  12.     __author__     = 'Joseph Kitzmiller and Sujata Raman'
  13.     max_articles_per_feed = 100
  14.     no_stylesheets        = True
  15.     use_embedded_content  = False
  16.     remove_javascript     = True
  17.     encoding = 'cp1252'
  18.     extra_css = '''
  19.                     h1{font-family:Arial,Helvetica,sans-serif; font-size:large; color:#0E5398; }
  20.                     h2{color:#666666;}
  21.                    .blog_title{color:#4E0000; font-family:Georgia,"Times New Roman",Times,serif; font-size:large;}
  22.                    .sidebar-photo{font-family:Arial,Helvetica,sans-serif; color:#333333; font-size:30%;}
  23.                    .blog_post{font-family:Arial,Helvetica,sans-serif; color:#222222; font-size:xx-small;}
  24.                    .article-bodytext{font-family:Arial,Helvetica,sans-serif; font-size:xx-small; color:#222222;font-weight:normal;}
  25.                    .blog caitlin{font-family:Arial,Helvetica,sans-serif; font-size:xx-small; color:#222222; font-weight:normal;}
  26.                    .ratingbyline{font-family:Arial,Helvetica,sans-serif; color:#333333; font-size:50%;}
  27.                    .author{font-family:Arial,Helvetica,sans-serif; color:#777777; font-size:50%;}
  28.                    .date{font-family:Arial,Helvetica,sans-serif; color:#777777; font-size:50%;}
  29.                    .padding{font-family:Arial,Helvetica,sans-serif; font-size:70%; color:#222222; font-weight:normal;}
  30.                     '''
  31.  
  32.     keep_only_tags = [dict(name='div', attrs={'class':['padding','sidebar-photo','blog caitlin']})]
  33.  
  34.     remove_tags = [
  35.                      dict(name=['object','link','table','embed'])
  36.                     ,dict(name='div',attrs={'id':["pluckcomments","StoryChat"]})
  37.                     ,dict(name='div',attrs={'class':['articleflex-container',]})
  38.                      ,dict(name='p',attrs={'class':['posted','tags']})
  39.                   ]
  40.  
  41.     feeds          = [(u'Cincinnati Enquirer', u'http://rss.cincinnati.com/apps/pbcs.dll/section?category=rssenq01&mime=xml')]
  42.  
  43.     def preprocess_html(self, soup):
  44.         for item in soup.findAll(style=True):
  45.             del item['style']
  46.         for item in soup.findAll(face=True):
  47.             del item['face']
  48.         return soup
  49.