home *** CD-ROM | disk | FTP | other *** search
- from calibre.web.feeds.news import BasicNewsRecipe
- class AdvancedUserRecipe1303841067(BasicNewsRecipe):
-
- title = u'Express.de'
- __author__ = 'schuster'
- oldest_article = 2
- max_articles_per_feed = 50
- no_stylesheets = True
- use_embedded_content = False
- language = 'de'
- extra_css = '''
- h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small;}
- h1{ font-family:Arial,Helvetica,sans-serif; font-size:x-large; font-weight:bold;}
- '''
- remove_javascript = True
- remove_tags_befor = [dict(name='div', attrs={'class':'Datum'})]
- remove_tags_after = [dict(name='div', attrs={'class':'MoreNews'})]
-
- remove_tags = [dict(id='kalaydo'),
- dict(id='Header'),
- dict(id='Searchline'),
- dict(id='MainNav'),
- dict(id='Logo'),
- dict(id='MainLinkSpacer'),
- dict(id='MainLinks'),
- dict(id='ContainerPfad'), #neu
- dict(title='Diese Seite Bookmarken'),
-
- dict(name='span'),
- dict(name='div', attrs={'class':'spacer_leftneu'}),
- dict(name='div', attrs={'class':'button kalaydologo'}),
- dict(name='div', attrs={'class':'button stellenneu'}),
- dict(name='div', attrs={'class':'button autoneu'}),
- dict(name='div', attrs={'class':'button immobilienneu'}),
- dict(name='div', attrs={'class':'button kleinanzeigen'}),
- dict(name='div', attrs={'class':'button tiereneu'}),
- dict(name='div', attrs={'class':'button ferienwohnungen'}),
- dict(name='div', attrs={'class':'button inserierenneu'}),
- dict(name='div', attrs={'class':'spacer_rightneu'}),
- dict(name='div', attrs={'class':'spacer_rightcorner'}),
- dict(name='div', attrs={'class':'HeaderMetaNav'}),
- dict(name='div', attrs={'class':'HeaderSearchOption'}),
- dict(name='div', attrs={'class':'HeaderSearch'}),
- dict(name='div', attrs={'class':'sbutton'}),
- dict(name='div', attrs={'class':'active'}),
- dict(name='div', attrs={'class':'MoreNews'}), #neu
- dict(name='div', attrs={'class':'ContentBoxSubline'}) #neu
- ]
-
-
- def preprocess_html(self, soup):
- for alink in soup.findAll('a'):
- if alink.string is not None:
- tstr = alink.string
- alink.replaceWith(tstr)
- return soup
-
- feeds = [(u'Top-Themen', u'http://www.express.de/home/-/2126/2126/-/view/asFeed/-/index.xml'),
- (u'Regional - K├╢ln', u'http://www.express.de/regional/koeln/-/2856/2856/-/view/asFeed/-/index.xml'),
- (u'Regional - Bonn', u'http://www.express.de/regional/bonn/-/2860/2860/-/view/asFeed/-/index.xml'),
- (u'Regional - D├╝sseldorf', u'http://www.express.de/regional/duesseldorf/-/2858/2858/-/view/asFeed/-/index.xml'),
- (u'Regional - Region', u'http://www.express.de/regional/-/2178/2178/-/view/asFeed/-/index.xml'),
- (u'Sport-News', u'http://www.express.de/sport/-/2176/2176/-/view/asFeed/-/index.xml'),
- (u'Fussball-News', u'http://www.express.de/sport/fussball/-/3186/3186/-/view/asFeed/-/index.xml'),
- (u'1.FC K├╢ln News', u'http://www.express.de/sport/fussball/fc-koeln/-/3192/3192/-/view/asFeed/-/index.xml'),
- (u'Alemannia Aachen News', u'http://www.express.de/sport/fussball/alemannia/-/3290/3290/-/view/asFeed/-/index.xml'),
- (u'Borussia M~Gladbach', u'http://www.express.de/sport/fussball/gladbach/-/3286/3286/-/view/asFeed/-/index.xml'),
- (u'Fortuna D~Dorf', u'http://www.express.de/sport/fussball/fortuna/-/3292/3292/-/view/asFeed/-/index.xml'),
- (u'Basketball News', u'http://www.express.de/sport/basketball/-/3190/3190/-/view/asFeed/-/index.xml'),
- (u'Big Brother', u'http://www.express.de/news/promi-show/big-brother/-/2402/2402/-/view/asFeed/-/index.xml'),
- ]
-
-