home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_4127 < prev    next >
Encoding:
Text File  |  2010-06-05  |  3.7 KB  |  81 lines

  1. __license__   = 'GPL v3'
  2. __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
  3. '''
  4. vreme.com
  5. '''
  6.  
  7. import re
  8. from calibre import strftime
  9. from calibre.web.feeds.news import BasicNewsRecipe
  10.  
  11. class Vreme(BasicNewsRecipe):
  12.     title                = 'Vreme'
  13.     __author__           = 'Darko Miletic'
  14.     description          = 'Politicki Nedeljnik Srbije'
  15.     publisher            = 'NP Vreme d.o.o.'
  16.     category             = 'news, politics, Serbia'
  17.     delay                = 1
  18.     no_stylesheets       = True
  19.     needs_subscription   = True
  20.     INDEX                = 'http://www.vreme.com'
  21.     LOGIN                = 'http://www.vreme.com/account/login.php?url=%2F'
  22.     use_embedded_content = False
  23.     encoding             = 'utf-8'
  24.     language             = 'sr'
  25.     publication_type     = 'magazine'    
  26.     masthead_url         = 'http://www.vreme.com/g/vreme-logo.gif'
  27.     extra_css            = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .heading1{font-family: sans1, sans-serif; font-size: x-large; font-weight: bold} .heading2{font-family: sans1, sans-serif; font-size: large; font-weight: bold} .toc-heading{font-family: sans1, sans-serif; font-size: small} .column-heading2{font-family: sans1, sans-serif; font-size: large} .column-heading1{font-family: sans1, sans-serif; font-size: x-large} .column-normal{font-family: sans1, sans-serif; font-size: medium} .large{font-family: sans1, sans-serif; font-size: large} '
  28.  
  29.     conversion_options = {
  30.                           'comment'          : description
  31.                         , 'tags'             : category
  32.                         , 'publisher'        : publisher
  33.                         , 'language'         : language
  34.                         , 'linearize_tables' : True
  35.                         }
  36.  
  37.  
  38.     preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
  39.     remove_tags_before = dict(attrs={'class':'toc-heading'})
  40.     remove_tags_after  = dict(attrs={'class':'footer'     })
  41.  
  42.     def get_browser(self):
  43.         br = BasicNewsRecipe.get_browser()
  44.         if self.username is not None and self.password is not None:
  45.             br.open(self.LOGIN)
  46.             br.select_form(name='f')
  47.             br['username'] = self.username
  48.             br['password'] = self.password
  49.             br.submit()
  50.         return br
  51.  
  52.     def parse_index(self):
  53.         articles = []
  54.         soup = self.index_to_soup(self.INDEX)
  55.         cover_item = soup.find('div',attrs={'id':'najava'})
  56.         if cover_item:
  57.            self.cover_url = self.INDEX + cover_item.img['src']      
  58.         for item in soup.findAll(['h3','h4']):
  59.             description = u''
  60.             title_prefix = u''
  61.             feed_link = item.find('a')
  62.             if feed_link and feed_link.has_key('href') and feed_link['href'].startswith('/cms/view.php'):
  63.                 url   = self.INDEX + feed_link['href']
  64.                 title = title_prefix + self.tag_to_string(feed_link)
  65.                 date  = strftime(self.timefmt)
  66.                 articles.append({
  67.                                   'title'      :title
  68.                                  ,'date'       :date
  69.                                  ,'url'        :url
  70.                                  ,'description':description
  71.                                 })
  72.         return [('Nedeljnik Vreme', articles)]
  73.  
  74.     remove_tags = [
  75.                     dict(name=['object','link'])
  76.                    ,dict(name='table',attrs={'xclass':'image'})
  77.                   ]
  78.  
  79.     def print_version(self, url):
  80.         return url + '&print=yes'
  81.