home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2011 January / maximum-cd-2011-01.iso / DiscContents / calibre-0.7.26.msi / file_3870 < prev    next >
Encoding:
Text File  |  2010-09-30  |  2.6 KB  |  65 lines

  1. from calibre.web.feeds.news import BasicNewsRecipe
  2. import re
  3.  
  4. class BigOven(BasicNewsRecipe):
  5.     title               = 'BigOven'
  6.     __author__          = 'Starson17'
  7.     description         = 'Recipes for the Foodie in us all. Registration is free. A fake username and password just gives smaller photos.'
  8.     language            = 'en'
  9.     category            = 'news, food, recipes, gourmet'
  10.     publisher           = 'Starson17'
  11.     use_embedded_content= False
  12.     no_stylesheets      = True
  13.     oldest_article      = 24
  14.     remove_javascript   = True
  15.     remove_empty_feeds    = True
  16.     cover_url           = 'http://www.software.com/images/products/BigOven%20Logo_177_216.JPG'
  17.     max_articles_per_feed = 30
  18.     needs_subscription = True
  19.  
  20.     conversion_options = {'linearize_tables'  : True
  21.                         , 'comment'           : description
  22.                         , 'tags'              : category
  23.                         , 'publisher'         : publisher
  24.                         , 'language'          : language
  25.                         }
  26.     
  27.     def get_browser(self):
  28.         br = BasicNewsRecipe.get_browser()
  29.         if self.username is not None and self.password is not None:
  30.             br.open('http://www.bigoven.com/account/login?ReturnUrl=/')
  31.             br.select_form(nr=1)
  32.             br['Email']  = self.username
  33.             br['Password'] = self.password
  34.             br.submit()
  35.         return br
  36.  
  37.     remove_attributes = ['style', 'font']
  38.  
  39.     remove_tags     = [dict(name='div', attrs={'class':['ppy-caption']})
  40.                                   ,dict(name='div', attrs={'id':['float_corner']})
  41.                                   ]
  42.  
  43.     def preprocess_html(self, soup):
  44.         for tag in soup.findAll(name='a', attrs={'class':['deflink']}):
  45.           tag.replaceWith(tag.string)
  46.         for tag in soup.findAll(name='a', text=re.compile(r'.*View Metric.*', re.DOTALL)):
  47.           tag.parent.parent.extract()
  48.         for tag in soup.findAll(name='a', text=re.compile(r'.*Add my own photo.*', re.DOTALL)):
  49.           tag.parent.parent.extract()
  50.         for tag in soup.findAll(name='div', attrs={'class':['container']}):
  51.           if tag.find(name='h1'):
  52.               continue
  53.           if tag.find(name='h2', text=re.compile(r'.*Ingredients.*', re.DOTALL)):
  54.               print 'tag found Ingred h2'
  55.               continue
  56.           if tag.find(name='h2', text=re.compile(r'Preparation.*', re.DOTALL)):
  57.               print 'tag found Prep h2'
  58.               continue
  59.           tag.extract()
  60.         return soup
  61.  
  62.     feeds = [(u'4 & 5 Star Rated Recipes', u'http://feeds.feedburner.com/Bigovencom-RecipeRaves?format=xml')]
  63.     
  64.  
  65.