home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_3914 < prev    next >
Encoding:
Text File  |  2009-10-14  |  2.8 KB  |  74 lines

  1. #!/usr/bin/env  python
  2.  
  3. __license__   = 'GPL v3'
  4. __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
  5. '''
  6. moscowtimes.ru
  7. '''
  8.  
  9. from calibre.web.feeds.news import BasicNewsRecipe
  10.  
  11. class Moscowtimes(BasicNewsRecipe):
  12.     title                 = u'The Moscow Times'
  13.     __author__            = 'Darko Miletic and Sujata Raman'
  14.     description           = 'News from Russia'
  15.     language = 'en'
  16.     lang = 'en'
  17.     oldest_article        = 7
  18.     max_articles_per_feed = 100
  19.     no_stylesheets        = True
  20.     use_embedded_content  = False
  21.     #encoding = 'utf-8'
  22.     encoding =  'cp1252'
  23.     remove_javascript = True
  24.  
  25.     conversion_options = {
  26.           'comment'          : description
  27.         , 'language'         : lang
  28.     }
  29.  
  30.     extra_css      = '''
  31.                         h1{ color:#0066B3; font-family: Georgia,serif ; font-size: large}
  32.                         .article_date{ font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; color:#000000; font-size: x-small;}
  33.                         .autors{color:#999999 ; font-weight: bold ; font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size: x-small; }
  34.                         .photoautors{ color:#999999 ; font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size: x-small; }
  35.                         .text{font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size:75%; }
  36.                         '''
  37.     feeds          = [
  38.                         (u'The Moscow Times Top Stories' , u'http://www.themoscowtimes.com/rss/top'),
  39.                         (u'The Moscow Times Current Issue' , u'http://www.themoscowtimes.com/rss/issue'),
  40.                         (u'The Moscow Times News' , u'http://www.themoscowtimes.com/rss/news'),
  41.                         (u'The Moscow Times Business' , u'http://www.themoscowtimes.com/rss/business'),
  42.                         (u'The Moscow Times Art and Ideas' , u'http://www.themoscowtimes.com/rss/art'),
  43.                         (u'The Moscow Times Opinion' , u'http://www.themoscowtimes.com/rss/opinion')
  44.                      ]
  45.  
  46.     keep_only_tags = [
  47.                         dict(name='div', attrs={'class':['newstextblock']})
  48.                     ]
  49.  
  50.     remove_tags    = [
  51.                         dict(name='div', attrs={'class':['photo_nav']})
  52.                     ]
  53.  
  54.     def preprocess_html(self, soup):
  55.         soup.html['xml:lang'] = self.lang
  56.         soup.html['lang']     = self.lang
  57.         mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
  58.         soup.head.insert(0,mtag)
  59.  
  60.         return self.adeify_images(soup)
  61.  
  62.  
  63.     def get_cover_url(self):
  64.  
  65.         href =  'http://www.themoscowtimes.com/pdf/'
  66.  
  67.         soup = self.index_to_soup(href)
  68.         div = soup.find('div',attrs={'class':'left'})
  69.         a = div.find('a')
  70.         print a
  71.         if a :
  72.            cover_url = a.img['src']
  73.         return cover_url
  74.