Maximum CD 2010 November

home *** CD-ROM | disk | FTP | other *** search

/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_3938 < prev next >

Wrap

Text File | 2010-05-16 | 3.7 KB | 93 lines

__license__ = 'GPL v3' __copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>' ''' www.nin.co.rs ''' import re from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe class Nin(BasicNewsRecipe): title = 'NIN online' __author__ = 'Darko Miletic' description = 'Nedeljne Informativne Novine' publisher = 'NIN d.o.o.' category = 'news, politics, Serbia' no_stylesheets = True delay = 1 oldest_article = 15 encoding = 'utf-8' needs_subscription = True remove_empty_feeds = True PREFIX = 'http://www.nin.co.rs' INDEX = PREFIX + '/?change_lang=ls' use_embedded_content = False language = 'sr' publication_type = 'magazine' extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Verdana, Lucida, sans1, sans-serif} .article_description{font-family: Verdana, Lucida, sans1, sans-serif} .artTitle{font-size: x-large; font-weight: bold; color: #900} .izjava{font-size: x-large; font-weight: bold} .columnhead{font-size: small; font-weight: bold;} img{margin-top:0.5em; margin-bottom: 0.7em} b{margin-top: 1em} ' conversion_options = { 'comment' : description , 'tags' : category , 'publisher' : publisher , 'language' : language , 'linearize_tables' : True } preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] remove_attributes = ['height','width'] def get_browser(self): br = BasicNewsRecipe.get_browser() if self.username is not None and self.password is not None: br.open(self.INDEX) br.select_form(name='form1') br['login_name' ] = self.username br['login_password'] = self.password br.submit() return br keep_only_tags =[dict(name='td', attrs={'width':'520'})] remove_tags_after =dict(name='html') def get_cover_url(self): cover_url = None soup = self.index_to_soup(self.INDEX) link_item = soup.find('img',attrs={'width':'100','border':'0'}) if link_item: cover_url = self.PREFIX + link_item['src'] return cover_url def parse_index(self): articles = [] count = 0 soup = self.index_to_soup(self.PREFIX) for item in soup.findAll('a',attrs={'class':'lmeninavFont'}): count = count +1 if self.test and count > 2: return articles section = self.tag_to_string(item) feedlink = self.PREFIX + item['href'] feedpage = self.index_to_soup(feedlink) self.report_progress(0, _('Fetching feed')+' %s...'%(section)) inarts = [] for art in feedpage.findAll('span',attrs={'class':'artTitle'}): alink = art.parent url = self.PREFIX + alink['href'] title = self.tag_to_string(art) sparent = alink.parent alink.extract() description = self.tag_to_string(sparent) date = strftime(self.timefmt) inarts.append({ 'title' :title ,'date' :date ,'url' :url ,'description':description }) articles.append((section,inarts)) return articles