home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_691 (.txt) < prev    next >
Encoding:
Python Compiled Bytecode  |  2010-08-06  |  5.8 KB  |  214 lines

  1. # Source Generated with Decompyle++
  2. # File: in.pyc (Python 2.6)
  3.  
  4. import urlparse
  5. import urllib
  6. __all__ = [
  7.     'RobotFileParser']
  8.  
  9. class RobotFileParser:
  10.     
  11.     def __init__(self, url = ''):
  12.         self.entries = []
  13.         self.default_entry = None
  14.         self.disallow_all = False
  15.         self.allow_all = False
  16.         self.set_url(url)
  17.         self.last_checked = 0
  18.  
  19.     
  20.     def mtime(self):
  21.         return self.last_checked
  22.  
  23.     
  24.     def modified(self):
  25.         import time
  26.         self.last_checked = time.time()
  27.  
  28.     
  29.     def set_url(self, url):
  30.         self.url = url
  31.         (self.host, self.path) = urlparse.urlparse(url)[1:3]
  32.  
  33.     
  34.     def read(self):
  35.         opener = URLopener()
  36.         f = opener.open(self.url)
  37.         lines = [ line.strip() for line in f ]
  38.         f.close()
  39.         self.errcode = opener.errcode
  40.         if self.errcode in (401, 403):
  41.             self.disallow_all = True
  42.         elif self.errcode >= 400:
  43.             self.allow_all = True
  44.         elif self.errcode == 200 and lines:
  45.             self.parse(lines)
  46.         
  47.  
  48.     
  49.     def _add_entry(self, entry):
  50.         if '*' in entry.useragents:
  51.             self.default_entry = entry
  52.         else:
  53.             self.entries.append(entry)
  54.  
  55.     
  56.     def parse(self, lines):
  57.         state = 0
  58.         linenumber = 0
  59.         entry = Entry()
  60.         for line in lines:
  61.             linenumber += 1
  62.             if not line:
  63.                 if state == 1:
  64.                     entry = Entry()
  65.                     state = 0
  66.                 elif state == 2:
  67.                     self._add_entry(entry)
  68.                     entry = Entry()
  69.                     state = 0
  70.                 
  71.             
  72.             i = line.find('#')
  73.             if i >= 0:
  74.                 line = line[:i]
  75.             
  76.             line = line.strip()
  77.             if not line:
  78.                 continue
  79.             
  80.             line = line.split(':', 1)
  81.             if len(line) == 2:
  82.                 line[0] = line[0].strip().lower()
  83.                 line[1] = urllib.unquote(line[1].strip())
  84.                 if line[0] == 'user-agent':
  85.                     if state == 2:
  86.                         self._add_entry(entry)
  87.                         entry = Entry()
  88.                     
  89.                     entry.useragents.append(line[1])
  90.                     state = 1
  91.                 elif line[0] == 'disallow':
  92.                     if state != 0:
  93.                         entry.rulelines.append(RuleLine(line[1], False))
  94.                         state = 2
  95.                     
  96.                 elif line[0] == 'allow':
  97.                     if state != 0:
  98.                         entry.rulelines.append(RuleLine(line[1], True))
  99.                         state = 2
  100.                     
  101.                 
  102.             line[0] == 'user-agent'
  103.         
  104.         if state == 2:
  105.             self.entries.append(entry)
  106.         
  107.  
  108.     
  109.     def can_fetch(self, useragent, url):
  110.         if self.disallow_all:
  111.             return False
  112.         if self.allow_all:
  113.             return True
  114.         if not urllib.quote(urlparse.urlparse(urllib.unquote(url))[2]):
  115.             pass
  116.         url = '/'
  117.         for entry in self.entries:
  118.             if entry.applies_to(useragent):
  119.                 return entry.allowance(url)
  120.         
  121.         if self.default_entry:
  122.             return self.default_entry.allowance(url)
  123.         return True
  124.  
  125.     
  126.     def __str__(self):
  127.         return []([ str(entry) + '\n' for entry in self.entries ])
  128.  
  129.  
  130.  
  131. class RuleLine:
  132.     
  133.     def __init__(self, path, allowance):
  134.         if path == '' and not allowance:
  135.             allowance = True
  136.         
  137.         self.path = urllib.quote(path)
  138.         self.allowance = allowance
  139.  
  140.     
  141.     def applies_to(self, filename):
  142.         if not self.path == '*':
  143.             pass
  144.         return filename.startswith(self.path)
  145.  
  146.     
  147.     def __str__(self):
  148.         if not self.allowance or 'Allow':
  149.             pass
  150.         return 'Disallow' + ': ' + self.path
  151.  
  152.  
  153.  
  154. class Entry:
  155.     
  156.     def __init__(self):
  157.         self.useragents = []
  158.         self.rulelines = []
  159.  
  160.     
  161.     def __str__(self):
  162.         ret = []
  163.         for agent in self.useragents:
  164.             ret.extend([
  165.                 'User-agent: ',
  166.                 agent,
  167.                 '\n'])
  168.         
  169.         for line in self.rulelines:
  170.             ret.extend([
  171.                 str(line),
  172.                 '\n'])
  173.         
  174.         return ''.join(ret)
  175.  
  176.     
  177.     def applies_to(self, useragent):
  178.         useragent = useragent.split('/')[0].lower()
  179.         for agent in self.useragents:
  180.             if agent == '*':
  181.                 return True
  182.             agent = agent.lower()
  183.             if agent in useragent:
  184.                 return True
  185.         
  186.         return False
  187.  
  188.     
  189.     def allowance(self, filename):
  190.         for line in self.rulelines:
  191.             if line.applies_to(filename):
  192.                 return line.allowance
  193.         
  194.         return True
  195.  
  196.  
  197.  
  198. class URLopener(urllib.FancyURLopener):
  199.     
  200.     def __init__(self, *args):
  201.         urllib.FancyURLopener.__init__(self, *args)
  202.         self.errcode = 200
  203.  
  204.     
  205.     def prompt_user_passwd(self, host, realm):
  206.         return (None, None)
  207.  
  208.     
  209.     def http_error_default(self, url, fp, errcode, errmsg, headers):
  210.         self.errcode = errcode
  211.         return urllib.FancyURLopener.http_error_default(self, url, fp, errcode, errmsg, headers)
  212.  
  213.  
  214.