home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_3938 < prev    next >
Encoding:
Text File  |  2010-05-16  |  3.7 KB  |  93 lines

  1.  
  2. __license__   = 'GPL v3'
  3. __copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
  4. '''
  5. www.nin.co.rs
  6. '''
  7.  
  8. import re
  9. from calibre import strftime
  10. from calibre.web.feeds.news import BasicNewsRecipe
  11.  
  12. class Nin(BasicNewsRecipe):
  13.     title                  = 'NIN online'
  14.     __author__             = 'Darko Miletic'
  15.     description            = 'Nedeljne Informativne Novine'
  16.     publisher              = 'NIN d.o.o.'
  17.     category               = 'news, politics, Serbia'
  18.     no_stylesheets         = True
  19.     delay                  = 1
  20.     oldest_article         = 15
  21.     encoding               = 'utf-8'
  22.     needs_subscription     = True
  23.     remove_empty_feeds     = True
  24.     PREFIX                 = 'http://www.nin.co.rs'
  25.     INDEX                  = PREFIX + '/?change_lang=ls'
  26.     use_embedded_content   = False
  27.     language               = 'sr'
  28.     publication_type       = 'magazine'
  29.     extra_css              = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Verdana, Lucida, sans1, sans-serif} .article_description{font-family: Verdana, Lucida, sans1, sans-serif} .artTitle{font-size: x-large; font-weight: bold; color: #900} .izjava{font-size: x-large; font-weight: bold} .columnhead{font-size: small; font-weight: bold;} img{margin-top:0.5em; margin-bottom: 0.7em} b{margin-top: 1em} '
  30.  
  31.     conversion_options = {
  32.                           'comment'          : description
  33.                         , 'tags'             : category
  34.                         , 'publisher'        : publisher
  35.                         , 'language'         : language
  36.                         , 'linearize_tables' : True
  37.                         }
  38.  
  39.     preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
  40.     remove_attributes = ['height','width']
  41.  
  42.     def get_browser(self):
  43.         br = BasicNewsRecipe.get_browser()
  44.         if self.username is not None and self.password is not None:
  45.             br.open(self.INDEX)
  46.             br.select_form(name='form1')
  47.             br['login_name'    ] = self.username
  48.             br['login_password'] = self.password
  49.             br.submit()
  50.         return br
  51.  
  52.     keep_only_tags    =[dict(name='td', attrs={'width':'520'})]
  53.     remove_tags_after =dict(name='html')
  54.  
  55.     def get_cover_url(self):
  56.         cover_url = None
  57.         soup = self.index_to_soup(self.INDEX)
  58.         link_item = soup.find('img',attrs={'width':'100','border':'0'})
  59.         if link_item:
  60.            cover_url = self.PREFIX + link_item['src']
  61.         return cover_url
  62.  
  63.     def parse_index(self):
  64.         articles = []
  65.         count = 0
  66.         soup = self.index_to_soup(self.PREFIX)
  67.         for item in soup.findAll('a',attrs={'class':'lmeninavFont'}):
  68.             count = count +1
  69.             if self.test and count > 2:
  70.                return articles
  71.             section  = self.tag_to_string(item)
  72.             feedlink = self.PREFIX + item['href']
  73.             feedpage = self.index_to_soup(feedlink)
  74.             self.report_progress(0, _('Fetching feed')+' %s...'%(section))
  75.             inarts   = []
  76.             for art in feedpage.findAll('span',attrs={'class':'artTitle'}):
  77.                 alink = art.parent
  78.                 url   = self.PREFIX + alink['href']
  79.                 title = self.tag_to_string(art)
  80.                 sparent = alink.parent
  81.                 alink.extract()
  82.                 description = self.tag_to_string(sparent)
  83.                 date = strftime(self.timefmt)
  84.                 inarts.append({
  85.                                   'title'      :title
  86.                                  ,'date'       :date
  87.                                  ,'url'        :url
  88.                                  ,'description':description
  89.                                 })
  90.             articles.append((section,inarts))
  91.         return articles
  92.  
  93.