home *** CD-ROM | disk | FTP | other *** search
- from calibre.web.feeds.news import BasicNewsRecipe
-
- class Cyberpresse(BasicNewsRecipe):
-
- title = u'Cyberpresse'
- __author__ = 'balok and Sujata Raman'
- description = 'Canadian news in French'
- language = 'fr'
-
- oldest_article = 7
- max_articles_per_feed = 100
- no_stylesheets = True
- remove_javascript = True
- html2lrf_options = ['--left-margin=0','--right-margin=0','--top-margin=0','--bottom-margin=0']
- encoding = 'utf-8'
-
-
- keep_only_tags = [dict(name='div', attrs={'class':'article-page'}),
- dict(name='div', attrs={'id':'articlePage'}),
- ]
-
- extra_css = '''
- .photodata{font-family:Arial,Helvetica,Verdana,sans-serif;color: #999999; font-size: 90%; }
- h1{font-family:Georgia,Times,serif ; font-size: large; }
- .amorce{font-family:Arial,Helvetica,Verdana,sans-serif; font-weight:bold;}
- .article-page{font-family:Arial,Helvetica,Verdana,sans-serif; font-size: x-small;}
- #articlePage{font-family:Arial,Helvetica,Verdana,sans-serif; font-size: x-small;}
- .auteur{font-family:Georgia,Times,sans-serif; font-size: 90%; color:#006699 ;}
- .bodyText{font-family:Arial,Helvetica,Verdana,sans-serif; font-size: x-small;}
- .byLine{font-family:Arial,Helvetica,Verdana,sans-serif; font-size: 90%;}
- .entry{font-family:Arial,Helvetica,Verdana,sans-serif; font-size: x-small;}
- .minithumb-auteurs{font-family:Arial,Helvetica,Verdana,sans-serif; font-size: 90%; }
- a{color:#003399; font-weight:bold; }
- '''
-
- remove_tags = [
- dict(name='div', attrs={'class':['centerbar','colspan','share-module']}),
- dict(name='p', attrs={'class':['zoom']}),
- dict(name='ul', attrs={'class':['stories']}),
- dict(name='h4', attrs={'class':['general-cat']}),
- ]
-
- feeds = [(u'Manchettes', u'http://www.cyberpresse.ca/rss/225.xml'),
- (u'Capitale nationale', u'http://www.cyberpresse.ca/rss/501.xml'),
- (u'Opinions', u'http://www.cyberpresse.ca/rss/977.xml'),
- (u'Insolite', u'http://www.cyberpresse.ca/rss/279.xml')
- ]
-
- def postprocess_html(self, soup, first):
-
- for tag in soup.findAll(name=['i','strong']):
- tag.name = 'div'
-
- return soup
-
-
-