home *** CD-ROM | disk | FTP | other *** search
- #!/usr/bin/env python
- __license__ = 'GPL v3'
- '''
- '''
- from calibre.web.feeds.recipes import BasicNewsRecipe
- from calibre.web.feeds import Feed
-
-
- class ReadersDigest(BasicNewsRecipe):
-
- title = 'Readers Digest'
- __author__ = 'BrianG'
- language = 'en'
- description = 'Readers Digest Feeds'
- no_stylesheets = True
- use_embedded_content = False
- oldest_article = 60
- max_articles_per_feed = 200
-
- language = 'en'
- remove_javascript = True
-
- extra_css = ''' h1 {font-family:georgia,serif;color:#000000;}
- .mainHd{font-family:georgia,serif;color:#000000;}
- h2 {font-family:Arial,Sans-serif;}
- .name{font-family:Arial,Sans-serif; font-size:x-small;font-weight:bold; }
- .date{font-family:Arial,Sans-serif; font-size:x-small ;color:#999999;}
- .byline{font-family:Arial,Sans-serif; font-size:x-small ;}
- .photoBkt{ font-size:x-small ;}
- .vertPhoto{font-size:x-small ;}
- .credits{font-family:Arial,Sans-serif; font-size:x-small ;color:gray;}
- .credit{font-family:Arial,Sans-serif; font-size:x-small ;color:gray;}
- .artTxt{font-family:georgia,serif;}
- .caption{font-family:georgia,serif; font-size:x-small;color:#333333;}
- .credit{font-family:georgia,serif; font-size:x-small;color:#999999;}
- a:link{color:#CC0000;}
- .breadcrumb{font-family:Arial,Sans-serif;font-size:x-small;}
- '''
-
-
- remove_tags = [
- dict(name='h4', attrs={'class':'close'}),
- dict(name='div', attrs={'class':'fromLine'}),
- dict(name='img', attrs={'class':'colorTag'}),
- dict(name='div', attrs={'id':'sponsorArticleHeader'}),
- dict(name='div', attrs={'class':'horizontalAd'}),
- dict(name='div', attrs={'id':'imageCounterLeft'}),
- dict(name='div', attrs={'id':'commentsPrint'})
- ]
-
-
- feeds = [
- ('New in RD', 'http://feeds.rd.com/ReadersDigest'),
- ('Jokes', 'http://feeds.rd.com/ReadersDigestJokes'),
- ('Cartoons', 'http://feeds.rd.com/ReadersDigestCartoons'),
- ('Blogs','http://feeds.rd.com/ReadersDigestBlogs')
- ]
-
- cover_url = 'http://www.rd.com/images/logo-main-rd.gif'
-
-
-
- #-------------------------------------------------------------------------------------------------
-
- def print_version(self, url):
-
- # Get the identity number of the current article and append it to the root print URL
-
- if url.find('/article') > 0:
- ident = url[url.find('/article')+8:url.find('.html?')-4]
- url = 'http://www.rd.com/content/printContent.do?contentId=' + ident
-
- elif url.find('/post') > 0:
-
- # in this case, have to get the page itself to derive the Print page.
- soup = self.index_to_soup(url)
- newsoup = soup.find('ul',attrs={'class':'printBlock'})
- url = 'http://www.rd.com' + newsoup('a')[0]['href']
- url = url[0:url.find('&Keep')]
-
- return url
-
- #-------------------------------------------------------------------------------------------------
-
- def parse_index(self):
-
- pages = [
- ('Your America','http://www.rd.com/your-america-inspiring-people-and-stories', 'channelLeftContainer',{'class':'moreLeft'}),
- # useless recipes ('Living Healthy','http://www.rd.com/living-healthy', 'channelLeftContainer',{'class':'moreLeft'}),
- ('Advice and Know-How','http://www.rd.com/advice-and-know-how', 'channelLeftContainer',{'class':'moreLeft'})
-
- ]
-
- feeds = []
-
- for page in pages:
- section, url, divider, attrList = page
- newArticles = self.page_parse(url, divider, attrList)
- feeds.append((section,newArticles))
-
- # after the pages of the site have been processed, parse several RSS feeds for additional sections
- newfeeds = Feed()
- newfeeds = self.parse_rss()
-
-
- # The utility code in parse_rss returns a Feed object. Convert each feed/article combination into a form suitable
- # for this module (parse_index).
-
- for feed in newfeeds:
- newArticles = []
- for article in feed.articles:
- newArt = {
- 'title' : article.title,
- 'url' : article.url,
- 'date' : article.date,
- 'description' : article.text_summary
- }
- newArticles.append(newArt)
-
-
- # New and Blogs should be the first two feeds.
- if feed.title == 'New in RD':
- feeds.insert(0,(feed.title,newArticles))
- elif feed.title == 'Blogs':
- feeds.insert(1,(feed.title,newArticles))
- else:
- feeds.append((feed.title,newArticles))
-
-
- return feeds
-
- #-------------------------------------------------------------------------------------------------
-
- def page_parse(self, mainurl, divider, attrList):
-
- articles = []
- mainsoup = self.index_to_soup(mainurl)
- for item in mainsoup.findAll(attrs=attrList):
- newArticle = {
- 'title' : item('img')[0]['alt'],
- 'url' : 'http://www.rd.com'+item('a')[0]['href'],
- 'date' : '',
- 'description' : ''
- }
- articles.append(newArticle)
-
-
-
- return articles
-
-
-
- #-------------------------------------------------------------------------------------------------
-
- def parse_rss (self):
-
- # Do the "official" parse_feeds first
- feeds = BasicNewsRecipe.parse_feeds(self)
-
-
- # Loop thru the articles in all feeds to find articles with "recipe" in it
- recipeArticles = []
- for curfeed in feeds:
- delList = []
- for a,curarticle in enumerate(curfeed.articles):
- if curarticle.title.upper().find('RECIPE') >= 0:
- recipeArticles.append(curarticle)
- delList.append(curarticle)
- if len(delList)>0:
- for d in delList:
- index = curfeed.articles.index(d)
- curfeed.articles[index:index+1] = []
-
- # If there are any recipes found, create a new Feed object and append.
- if len(recipeArticles) > 0:
- pfeed = Feed()
- pfeed.title = 'Recipes'
- pfeed.descrition = 'Recipe Feed (Virtual)'
- pfeed.image_url = None
- pfeed.oldest_article = 30
- pfeed.id_counter = len(recipeArticles)
- # Create a new Feed, add the recipe articles, and then append
- # to "official" list of feeds
- pfeed.articles = recipeArticles[:]
- feeds.append(pfeed)
-
- return feeds
-
-