home *** CD-ROM | disk | FTP | other *** search
- from calibre.web.feeds.news import BasicNewsRecipe
- import re
-
- class TheSkepticalInquirer(BasicNewsRecipe):
- title = u'The Skeptical Inquirer'
- description = 'Investigation of fringe science and paranormal claims.'
- language = 'en'
- __author__ = 'Starson17'
- oldest_article = 31
- cover_url = 'http://www.skeptricks.com/images/Skeptical_Inquirer_Magazine.jpg'
- remove_empty_feeds = True
- remove_javascript = True
- max_articles_per_feed = 50
- no_stylesheets = True
-
- keep_only_tags = [dict(name='div', attrs={'id':['content', 'bio']})]
-
- remove_tags = [
- dict(name='div', attrs={'id':['socialMedia']}),
- ]
-
- preprocess_regexps = [
- (re.compile(r'\.\(JavaScript must be enabled to view this email address\)', re.DOTALL|re.IGNORECASE), lambda match: ''),
- ]
-
- def parse_index(self):
- feeds = []
- for title, url in [("The Skeptical Inquirer", "http://www.csicop.org")]:
- articles = self.make_links(url)
- if articles:
- feeds.append((title, articles))
- return feeds
-
- def make_links(self, url):
- soup = self.index_to_soup(url)
- title = ''
- current_articles = []
- for item in soup.findAll(attrs={'class':['article-single bigger']}):
- page_url = url + str(item.a["href"])
- title = str(item.a.string)
- current_articles.append({'title': title, 'url': page_url, 'description':'', 'date':''})
- return current_articles
-
- extra_css = '''
- h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
- h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
- p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
- body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
- '''
-
-