home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_3711 < prev    next >
Encoding:
Text File  |  2010-07-28  |  2.3 KB  |  61 lines

  1. from calibre.web.feeds.news import BasicNewsRecipe
  2.  
  3. class DrawAndCook(BasicNewsRecipe):
  4.     title               = 'DrawAndCook'
  5.     __author__          = 'Starson17'
  6.     description         = 'Drawings of recipes!'
  7.     language            = 'en'
  8.     publisher           = 'Starson17'
  9.     category            = 'news, food, recipes'
  10.     use_embedded_content= False
  11.     no_stylesheets      = True
  12.     oldest_article      = 24
  13.     remove_javascript   = True
  14.     remove_empty_feeds    = True
  15.     cover_url           = 'http://farm5.static.flickr.com/4043/4471139063_4dafced67f_o.jpg'
  16.     max_articles_per_feed = 30
  17.  
  18.     remove_attributes = ['style', 'font']
  19.  
  20.     def parse_index(self):
  21.         feeds = []
  22.         for title, url in [
  23.                             ("They Draw and Cook", "http://www.theydrawandcook.com/")
  24.                             ]:
  25.             articles = self.make_links(url)
  26.             if articles:
  27.                 feeds.append((title, articles))
  28.         print 'feeds are: ', feeds
  29.         return feeds
  30.  
  31.     def make_links(self, url):
  32.         soup = self.index_to_soup(url)
  33.         title = ''
  34.         date = ''
  35.         current_articles = []
  36.         soup = self.index_to_soup(url)
  37.         recipes = soup.findAll('div', attrs={'class': 'date-outer'})
  38.         for recipe in recipes:
  39.             title = recipe.h3.a.string
  40.             page_url = recipe.h3.a['href']
  41.             current_articles.append({'title': title, 'url': page_url, 'description':'', 'date':date})
  42.         return current_articles
  43.  
  44.  
  45.     keep_only_tags     = [dict(name='h3', attrs={'class':'post-title entry-title'})
  46.                          ,dict(name='div', attrs={'class':'post-body entry-content'})
  47.                          ]
  48.  
  49.     remove_tags = [dict(name='div', attrs={'class':['separator']})
  50.                   ,dict(name='div', attrs={'class':['post-share-buttons']})
  51.                   ]
  52.  
  53.     extra_css = '''
  54.                     h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
  55.                     h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
  56.                     img {max-width:100%; min-width:100%;}
  57.                     p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
  58.                     body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
  59.         '''
  60.  
  61.