home *** CD-ROM | disk | FTP | other *** search
- from calibre.web.feeds.news import BasicNewsRecipe
- from calibre.ebooks.BeautifulSoup import Tag
-
- class VrijNederlandRecipe(BasicNewsRecipe) :
- __license__ = 'GPL v3'
- __author__ = 'kwetal'
- language = 'nl'
- locale = 'nl'
- version = 1
-
- title = u'Vrij Nederland'
- publisher = u'Weekbladpers Tijdschriften'
- category = u'News, Opinion'
- description = u'Weekly opinion magazine from the Netherlands'
-
- oldest_article = 7
- max_articles_per_feed = 100
- use_embedded_content = False
-
- no_stylesheets = True
- remove_javascript = True
- # Does not seem to work
- #extra_css = '''li.calibre2 {padding-bottom: 40px}'''
-
- conversion_options = {'publisher': publisher, 'tags': category, 'comments': description}
-
- feeds = []
- feeds.append((u'Politiek', u'http://www.vn.nl/politiek.rss'))
- feeds.append((u'Buitenland', u'http://www.vn.nl/buitenland.rss'))
- feeds.append((u'Economie', u'http://www.vn.nl/economie.rss'))
- feeds.append((u'Justitie', u'http://www.vn.nl/justitie.rss'))
- feeds.append((u'Samenleving', u'http://www.vn.nl/samenleving.rss'))
- feeds.append((u'Crime', u'http://www.vn.nl/crime.rss'))
- feeds.append((u'Media', u'http://www.vn.nl/media.rss'))
- feeds.append((u'De Republiek der Letteren', u'http://www.vn.nl/republiek.rss'))
- feeds.append((u'Max van Weezel', u'http://www.vn.nl/vanweezel.rss'))
- feeds.append((u'Ko Colijn', u'http://www.vn.nl/colijn.rss'))
- feeds.append((u'Kees Kraaijeveld', u'http://www.vn.nl/kraaijeveld.rss'))
- feeds.append((u'Frank Kalshoven', u'http://www.vn.nl/kalshoven.rss'))
- feeds.append((u'Stephan Sanders', u'http://www.vn.nl/sanders.rss'))
- feeds.append((u'Micha Wertheim', u'http://www.vn.nl/wertheim.rss'))
- feeds.append((u'Arnon Grunberg', u'http://www.vn.nl/grunberg.rss'))
- feeds.append((u'Carel Peeters', u'http://www.vn.nl/carelpeeters.rss'))
-
- keep_only_tags = [dict(name = 'div', attrs = {'class' : 'cl-column column-one'})]
-
- remove_tags = []
- remove_tags.append(dict(name = 'div', attrs = {'class' : 'wpg-element guest-book-overview'}))
- remove_tags.append(dict(name = 'div', attrs = {'class' : 'wpg-element forum-message-form'}))
- remove_tags.append(dict(name = 'div', attrs = {'class' : 'mediaterms'}))
- remove_tags.append(dict(name = 'div', attrs = {'class': 'label-term'}))
- remove_tags.append(dict(name = 'div', attrs = {'class': 'wpg-element Media-Collection-Element-Artikel-Lijst'}))
- remove_tags.append(dict(name = 'object'))
- remove_tags.append(dict(name = 'link'))
- remove_tags.append(dict(name = 'meta'))
-
- def preprocess_html(self, soup):
- # Just clean up the result a little
- meta = soup.find('div', attrs = {'class': 'meta'})
- if meta:
- link = meta.find('span', attrs = {'class': 'link'})
- if link:
- link.extract()
- for seperator in meta.findAll('span', attrs = {'class': 'seperator'}):
- seperator.extract()
-
- # Their header is full of 'if IE6/7/8' tags. Just get rid of it altogether
- theirHead = soup.head
- theirHead.extract()
- myHead = Tag(soup, 'head')
- soup.insert(0, myHead)
-
- return soup
-
-
-
-