home *** CD-ROM | disk | FTP | other *** search
- #!/usr/bin/env python
-
- # ebook-convert.exe c:\billorielly.recipe c:\test -vv
-
- from calibre.web.feeds.recipes import BasicNewsRecipe
-
- class BillOReilly(BasicNewsRecipe):
- cover_url = 'http://images.billoreilly.com/images/headers/borbanner.jpg'
- title = u"Bill O'Reilly"
- __author__ = 'Rob Lammert - rob.lammert[at]gmail.com'
- description = u"Articles from Bill O'Reilly's website and his Fox New's website"
- language = 'en'
- oldest_article = 7.0
- max_articles_per_feed = 100
- recursions = 0
- encoding = 'utf8'
- no_stylesheets = True
- remove_javascript = True
- #use_embedded_content = False
-
-
- # feeds = [
- # ('Talking Points Memo', u'http://www.foxnews.com/xmlfeed/column/0,5184,19,00.rss'),
- # ('No Spin News', u'http://www.billoreilly.com/blog?rss=true&size=50&useBlurbs=true&categoryID=7')
- # ]
-
- def parse_index(self):
- feeds = []
-
- articles_shows = self.bo_parse_shows('http://www.billoreilly.com/show?action=tvShowArchive')
- articles_columns = self.bo_parse_columns('http://www.billoreilly.com/columns')
-
- if articles_shows:
- feeds.append(("O'Reilly Factor", articles_shows))
-
- if articles_columns:
- feeds.append(("Newspaper Column", articles_columns))
-
- return feeds
-
- def bo_parse_shows(self,url):
- soup = self.index_to_soup(url)
- links = soup.find(attrs={'class': 'showLinks'})
-
- current_articles = []
- counter = 0
- for lnk in links.findAllNext(attrs={'class': ['showLinks']}):
- if counter <= 5:
- title = self.tag_to_string(lnk)
- url = lnk.get('href', False)
-
- if not url or not title:
- continue
-
- if url.startswith('/'):
- url = 'http://www.billoreilly.com'+url+'&dest=/pg/jsp/community/tvshowprint.jsp'
-
- self.log('\t\tFound article:', title)
- self.log('\t\t\t', url)
- current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
- counter += 1
- return current_articles
-
- def bo_parse_columns(self,url):
- soup = self.index_to_soup(url)
- links = soup.find(attrs={'id': 'bold'})
-
- current_articles = []
- counter = 0
- for lnk in links.findAllNext(attrs={'id': ['bold']}):
- test = lnk.get('class', False)
- if counter <= 5 and test == 'defaultLinks':
- title = self.tag_to_string(lnk)
- url = lnk.get('href', False)
-
- if not url or not title:
- continue
-
- if url.startswith('/'):
- url = 'http://www.billoreilly.com'+url+'&printerFriendly=true"'
-
- self.log('\t\tFound article:', title)
- self.log('\t\t\t', url)
- current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
- counter += 1
- return current_articles
-