home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2011 January / maximum-cd-2011-01.iso / DiscContents / calibre-0.7.26.msi / file_3929 < prev    next >
Encoding:
Text File  |  2010-09-30  |  7.6 KB  |  127 lines

  1. __license__   = 'GPL v3'
  2. __copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
  3. '''
  4. danas.rs
  5. '''
  6.  
  7. import re
  8. from calibre.web.feeds.news import BasicNewsRecipe
  9.  
  10. class Danas(BasicNewsRecipe):
  11.     title                 = 'Danas'
  12.     __author__            = 'Darko Miletic'
  13.     description           = 'Dnevne novine sa vestima iz sveta, politike, ekonomije, kulture, sporta, Beograda, Novog Sada i cele Srbije.'
  14.     publisher             = 'Danas d.o.o.'
  15.     category              = 'news, politics, Serbia'
  16.     oldest_article        = 2
  17.     max_articles_per_feed = 100
  18.     no_stylesheets        = False
  19.     use_embedded_content  = False
  20.     encoding              = 'utf-8'
  21.     masthead_url          = 'http://www.danas.rs/images/basic/danas.gif'
  22.     language              = 'sr'
  23.     remove_javascript     = True
  24.     publication_type      = 'newspaper'
  25.     remove_empty_feeds    = True
  26.     extra_css             = """ @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
  27.                                 @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
  28.                                 .article_description,body,.lokacija{font-family: Tahoma,Arial,Helvetica,sans1,sans-serif}
  29.                                 .nadNaslov,h1,.preamble{font-family: Georgia,"Times New Roman",Times,serif1,serif}
  30.                                 .antrfileText{border-left: 2px solid #999999; 
  31.                                               margin-left: 0.8em; 
  32.                                               padding-left: 1.2em; 
  33.                                               margin-bottom: 0; 
  34.                                               margin-top: 0} 
  35.                                 h2,.datum,.lokacija,.autor{font-size: small}
  36.                                 .antrfileNaslov{border-left: 2px solid #999999; 
  37.                                                 margin-left: 0.8em; 
  38.                                                 padding-left: 1.2em; 
  39.                                                 font-weight:bold; 
  40.                                                 margin-bottom: 0; 
  41.                                                 margin-top: 0} 
  42.                                 img{margin-bottom: 0.8em} 
  43.                             """
  44.  
  45.     conversion_options = {
  46.                           'comment'          : description
  47.                         , 'tags'             : category
  48.                         , 'publisher'        : publisher
  49.                         , 'language'         : language
  50.                         }
  51.  
  52.     preprocess_regexps = [ 
  53.                            (re.compile(u'\u0110'), lambda match: u'\u00D0')
  54.                           ,(re.compile(u'\u2018'), lambda match: '‘') # left single quotation mark                        
  55.                           ,(re.compile(u'\u2019'), lambda match: '’') # right single quotation mark
  56.                           ,(re.compile(u'\u201a'), lambda match: '‘') # single low-9 quotation mark                        
  57.                           ,(re.compile(u'\u201b'), lambda match: '’') # single high-reversed-9 quotation mark
  58.                           ,(re.compile(u'\u201c'), lambda match: '“') # left double quotation mark
  59.                           ,(re.compile(u'\u201d'), lambda match: '”') # right double quotation mark
  60.                           ,(re.compile(u'\u201e'), lambda match: '“') # double low-9 quotation mark                          
  61.                           ,(re.compile(u'\u201f'), lambda match: '”') # double high-reversed-9 quotation mark
  62.                          ]
  63.  
  64.     keep_only_tags     = [dict(name='div', attrs={'id':'left'})]
  65.     remove_tags = [
  66.                      dict(name='div', attrs={'class':['width_1_4','metaClanka','baner']})
  67.                     ,dict(name='div', attrs={'id':'comments'})
  68.                     ,dict(name=['object','link','iframe','meta'])
  69.                   ]
  70.     remove_attributes = ['w:st','st']
  71.  
  72.     feeds          = [
  73.                         (u'Politika'             , u'http://www.danas.rs/rss/rss.asp?column_id=27')
  74.                        ,(u'Hronika'              , u'http://www.danas.rs/rss/rss.asp?column_id=2' )
  75.                        ,(u'Drustvo'              , u'http://www.danas.rs/rss/rss.asp?column_id=24')
  76.                        ,(u'Dijalog'              , u'http://www.danas.rs/rss/rss.asp?column_id=1' )
  77.                        ,(u'Ekonomija'            , u'http://www.danas.rs/rss/rss.asp?column_id=6' )
  78.                        ,(u'Svet'                 , u'http://www.danas.rs/rss/rss.asp?column_id=25')
  79.                        ,(u'Srbija'               , u'http://www.danas.rs/rss/rss.asp?column_id=28')
  80.                        ,(u'Kultura'              , u'http://www.danas.rs/rss/rss.asp?column_id=5' )
  81.                        ,(u'Sport'                , u'http://www.danas.rs/rss/rss.asp?column_id=13')
  82.                        ,(u'Scena'                , u'http://www.danas.rs/rss/rss.asp?column_id=42')
  83.                        ,(u'Feljton'              , u'http://www.danas.rs/rss/rss.asp?column_id=19')
  84.                        ,(u'Periskop'             , u'http://www.danas.rs/rss/rss.asp?column_id=4' )
  85.                        ,(u'Famozno'              , u'http://www.danas.rs/rss/rss.asp?column_id=47')
  86.                        ,(u'Sluzbena beleska'     , u'http://www.danas.rs/rss/rss.asp?column_id=48')
  87.                        ,(u'Suocavanja'           , u'http://www.danas.rs/rss/rss.asp?column_id=49')
  88.                        ,(u'Moj Izbor'            , u'http://www.danas.rs/rss/rss.asp?column_id=50')
  89.                        ,(u'Direktno'             , u'http://www.danas.rs/rss/rss.asp?column_id=51')
  90.                        ,(u'I tome slicno'        , u'http://www.danas.rs/rss/rss.asp?column_id=52')
  91.                        ,(u'No longer and not yet', u'http://www.danas.rs/rss/rss.asp?column_id=53')
  92.                        ,(u'Resetovanje'          , u'http://www.danas.rs/rss/rss.asp?column_id=54')
  93.                        ,(u'Iza scene'            , u'http://www.danas.rs/rss/rss.asp?column_id=60')
  94.                        ,(u'Drustvoslovlje'       , u'http://www.danas.rs/rss/rss.asp?column_id=55')
  95.                        ,(u'Zvaka u pepeljari'    , u'http://www.danas.rs/rss/rss.asp?column_id=56')
  96.                        ,(u'Vostani Serbie'       , u'http://www.danas.rs/rss/rss.asp?column_id=57')
  97.                        ,(u'Med&Jad-a'            , u'http://www.danas.rs/rss/rss.asp?column_id=58')
  98.                        ,(u'Svetlosti pozornice'  , u'http://www.danas.rs/rss/rss.asp?column_id=59')
  99.                        ,(u'Dva cvancika'         , u'http://www.danas.rs/rss/rss.asp?column_id=65')
  100.                        ,(u'Iz kornera'           , u'http://www.danas.rs/rss/rss.asp?column_id=64')
  101.                      ]
  102.  
  103.     def preprocess_html(self, soup):
  104.         for tagn in ['st1:place','st1:city','st1:country-region','st1:state']:
  105.             for item in soup.body.findAll(tagn):
  106.                 item.name='span'
  107.         for item in soup.findAll(style=True):
  108.             del item['style']
  109.         for item in soup.findAll('a'):
  110.             if item.has_key('name'):
  111.                item.extract()
  112.         for item in soup.findAll('img'):
  113.             if not item.has_key('alt'):
  114.                item['alt'] = 'image'    
  115.         return soup
  116.  
  117.     def print_version(self, url):
  118.         return url + '&action=print'
  119.  
  120.     def get_cover_url(self):
  121.         cover_url = None
  122.         soup = self.index_to_soup('http://www.danas.rs/')
  123.         for citem in soup.findAll('img'):
  124.             if citem['src'].endswith('naslovna.jpg'):
  125.                return 'http://www.danas.rs' + citem['src']
  126.         return cover_url
  127.