home *** CD-ROM | disk | FTP | other *** search
- from calibre.web.feeds.news import BasicNewsRecipe
- import re
-
- class AdvancedUserRecipe1282101454(BasicNewsRecipe):
- title = 'Popular Science'
- language = 'en'
- __author__ = 'TonytheBookworm'
- description = 'Popular Science'
- publisher = 'Popular Science'
- category = 'gadgets,science'
- oldest_article = 7 # change this if you want more current articles. I like to go a week in
- max_articles_per_feed = 100
- no_stylesheets = True
- remove_javascript = True
- use_embedded_content = True
-
- masthead_url = 'http://www.raytheon.com/newsroom/rtnwcm/groups/Public/documents/masthead/rtn08_popscidec_masthead.jpg'
-
-
- feeds = [
-
- ('Gadgets', 'http://www.popsci.com/full-feed/gadgets'),
- ('Cars', 'http://www.popsci.com/full-feed/cars'),
- ('Science', 'http://www.popsci.com/full-feed/science'),
- ('Technology', 'http://www.popsci.com/full-feed/technology'),
- ('DIY', 'http://www.popsci.com/full-feed/diy'),
-
- ]
-
-
- #The following will get read of the Gallery: links when found
-
- def preprocess_html(self, soup) :
- weblinks = soup.findAll(['head','h2'])
- if weblinks is not None:
- for link in weblinks:
- if re.search('(Gallery)(:)',str(link)):
-
- link.parent.extract()
- return soup
- #-----------------------------------------------------------------
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-