home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2011 January / maximum-cd-2011-01.iso / DiscContents / calibre-0.7.26.msi / file_4392 < prev    next >
Encoding:
Text File  |  2010-09-30  |  3.0 KB  |  53 lines

  1. import re
  2. from calibre.web.feeds.news import BasicNewsRecipe
  3.  
  4. class AdvancedUserRecipe1283848012(BasicNewsRecipe):
  5.     description   = 'TheMarker Financial News in Hebrew'
  6.     __author__            = 'TonyTheBookworm, Marbs'
  7.     cover_url      = 'http://static.ispot.co.il/wp-content/upload/2009/09/themarker.jpg'
  8.     title          = u'TheMarker'
  9.     language              = 'he'
  10.     simultaneous_downloads = 5
  11.     remove_javascript     = True
  12.     timefmt        = '[%a, %d %b, %Y]'
  13.     oldest_article = 1
  14.     remove_tags = [dict(name='tr', attrs={'bgcolor':['#738A94']})          ]
  15.     max_articles_per_feed = 10
  16.     extra_css='body{direction: rtl;} .article_description{direction: rtl; } a.article{direction: rtl; } .calibre_feed_description{direction: rtl; }'
  17.     feeds          = [(u'Head Lines', u'http://www.themarker.com/tmc/content/xml/rss/hpfeed.xml'),
  18.                       (u'TA Market', u'http://www.themarker.com/tmc/content/xml/rss/sections/marketfeed.xml'),
  19.                       (u'Real Estate', u'http://www.themarker.com/tmc/content/xml/rss/sections/realEstaterfeed.xml'),
  20.                       (u'Wall Street & Global', u'http://www.themarker.com/tmc/content/xml/rss/sections/wallsfeed.xml'),
  21.                       (u'Law', u'http://www.themarker.com/tmc/content/xml/rss/sections/lawfeed.xml'),
  22.                       (u'Media', u'http://www.themarker.com/tmc/content/xml/rss/sections/mediafeed.xml'),
  23.                       (u'Consumer', u'http://www.themarker.com/tmc/content/xml/rss/sections/consumerfeed.xml'),
  24.                       (u'Career', u'http://www.themarker.com/tmc/content/xml/rss/sections/careerfeed.xml'),
  25.                       (u'Car', u'http://www.themarker.com/tmc/content/xml/rss/sections/carfeed.xml'),
  26.                       (u'High Tech', u'http://www.themarker.com/tmc/content/xml/rss/sections/hightechfeed.xml'),
  27.                       (u'Investor Guide', u'http://www.themarker.com/tmc/content/xml/rss/sections/investorGuidefeed.xml')]
  28.  
  29.     def print_version(self, url):
  30.         split1 = url.split("=")
  31.         weblinks = url
  32.  
  33.         if weblinks is not None:
  34.             for link in weblinks:
  35.                 #---------------------------------------------------------
  36.                 #here we need some help with some regexpressions
  37.                 #we are trying to find it.themarker.com in a url
  38.                 #-----------------------------------------------------------
  39.                 re1='.*?'   # Non-greedy match on filler
  40.                 re2='(it\\.themarker\\.com)'    # Fully Qualified Domain Name 1
  41.                 rg = re.compile(re1+re2,re.IGNORECASE|re.DOTALL)
  42.                 m = rg.search(url)
  43.  
  44.  
  45.                 if m:
  46.                  split2 = url.split("article/")
  47.                  print_url = 'http://it.themarker.com/tmit/PrintArticle/' + split2[1]
  48.  
  49.                 else:
  50.                     print_url = 'http://www.themarker.com/ibo/misc/printFriendly.jhtml?ElementId=%2Fibo%2Frepositories%2Fstories%2Fm1_2000%2F' + split1[1]+'.xml'
  51.  
  52.         return print_url
  53.