Chip 2011 November

home *** CD-ROM | disk | FTP | other *** search

/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / superesportes.recipe < prev next >

Wrap

Text File | 2011-09-09 | 3.5 KB | 80 lines

__license__ = 'GPL v3' __copyright__ = '2010, Luciano Furtado <lrfurtado at yahoo.com.br>' ''' www.superesportes.com.br ''' from calibre.web.feeds.news import BasicNewsRecipe class SuperEsportesRecipe(BasicNewsRecipe): title = u'www.superesportes.com.br' description = u'Superesportes - NotΓêÜΓëácias do esporte no Brasil e no mundo' __author__ = 'Luciano Furtado' language = 'pt' category = 'esportes, Brasil' no_stylesheets = True oldest_article = 7 use_embedded_content=0 max_articles_per_feed = 10 cover_url = 'http://imgs.mg.superesportes.com.br/superesportes_logo.png' extra_css = 'div.info_noticias h1 { font-size: 100% }' remove_tags = [ dict(name='div',attrs={'class':'topo'}), dict(name='div',attrs={'class':'rodape'}), dict(name='div',attrs={'class':'navegacao'}), dict(name='div',attrs={'class':'lateral2'}), dict(name='div',attrs={'class':'leia_mais'}), dict(name='div',attrs={'id':'comentar'}), dict(name='div',attrs={'id':'vrumelc_noticia'}), dict(name='div',attrs={'class':'compartilhe'}), dict(name='div',attrs={'class':'linha_noticias'}), dict(name='div',attrs={'class':'botoes_noticias'}), dict(name='div',attrs={'class':'barra_time bg_time'}), ] def parse_index(self): feeds = [] sections = [ (u'Atletico', 'http://www.df.superesportes.com.br/futebol/atletico-mg/capa_atletico_mg/index.shtml'), (u'Botafogo', 'http://www.df.superesportes.com.br/futebol/botafogo/capa_botafogo/index.shtml'), (u'Corinthinas', 'http://www.df.superesportes.com.br/futebol/corinthians/capa_corinthians/index.shtml'), (u'Cruzeiro', 'http://www.df.superesportes.com.br/futebol/cruzeiro/capa_cruzeiro/index.shtml'), (u'Flamengo', 'http://www.df.superesportes.com.br/futebol/flamengo/capa_flamengo/index.shtml'), (u'Fluminense', 'http://www.df.superesportes.com.br/futebol/fluminense/capa_fluminense/index.shtml'), (u'Palmeiras', 'http://www.df.superesportes.com.br/futebol/palmeiras/capa_palmeiras/index.shtml'), (u'Santos', 'http://www.df.superesportes.com.br/futebol/santos/capa_santos/index.shtml'), (u'SΓêÜ┬úo Paulo', 'http://www.df.superesportes.com.br/futebol/sao-paulo/capa_sao_paulo/index.shtml'), (u'Vasco', 'http://www.df.superesportes.com.br/futebol/vasco/capa_vasco/index.shtml'), ] for section, url in sections: current_articles = [] soup = self.index_to_soup(url) latestNews = soup.find(name='ul',attrs={'class': 'lista_ultimas_noticias'}) for li_tag in latestNews.findAll(name='li'): a_tag = li_tag.find('a', href= True) if a_tag is None: continue title = self.tag_to_string(a_tag) url = a_tag.get('href', False) self.log("\n\nFound title: " + title + "\nUrl: " + url + "\nSection: " + section) current_articles.append({'title': title, 'url': url, 'description': title, 'date':''}) if current_articles: feeds.append((section, current_articles)) return feeds