home *** CD-ROM | disk | FTP | other *** search
- from calibre.web.feeds.recipes import BasicNewsRecipe
- from calibre.ebooks.BeautifulSoup import BeautifulSoup
- #from random import randint
- from urllib import quote
-
- class SportsIllustratedColumnistsRecipe(BasicNewsRecipe) :
- title = u'Sports Illustrated Columnists'
- __author__ = u'kwetal'
- __license__ = u'GPL v3'
- language = 'en'
- version = 2
-
- oldest_article = 7
- max_articles_per_feed = 100
- no_stylesheets = True
- remove_javascript = True
-
- feeds = []
- # RSS sources found at http://sportsillustrated.cnn.com/services/rss/
- feeds.append((u'Jon Heyman', u'http://rss.cnn.com/rss/si_jon_heyman.rss'))
- feeds.append((u'Austin Murphy', u'http://rss.cnn.com/rss/si_austin_murphy.rss'))
- feeds.append((u'Lars Anderson', u'http://rss.cnn.com/rss/si_lars_anderson.rss'))
- feeds.append((u'Melissa Segura', u'http://rss.cnn.com/rss/si_melissa_segura.rss'))
- feeds.append((u'Peter King', u'http://rss.cnn.com/rss/si_peter_king.rss'))
- feeds.append((u'Scott Wraight', u'http://rss.cnn.com/rss/si_scott_wraight.rss'))
-
- def print_version(self, url) :
- # This is the url and the parameters that work to get the print version.
- printUrl = 'http://si.printthis.clickability.com/pt/printThis?clickMap=printThis'
- printUrl += '&fb=Y&partnerID=2356&url=' + quote(url)
-
- return printUrl
-
- # However the original javascript also uses the following parameters, but they can be left out:
- # title : can be some random string
- # random : some random number, but I think the number of digits is important
- # expire : no idea what value to use
- # All this comes from the Javascript function that redirects to the print version. It's called PT() and is defined in the file 48.js
-
- def preprocess_html(self, soup) :
- temp = soup.find('div', attrs = {'class' : 'cnnstoryheadline'})
- if temp :
- # It's an article, make a valid content container
- homeMadeSoup = BeautifulSoup('<html><head></head><body></body></html>')
- body = homeMadeSoup.find('body')
-
- headline = temp.find('h1')
- if headline :
- body.append(headline)
-
- for td in soup.findAll('td', attrs = {'class' : 'cnnstorycontentarea'}) :
- for p in td.findAll('p') :
- body.append(p)
-
- return homeMadeSoup
- else :
- # It's a TOC, just return the whole lot
- return soup
-
-
-