home *** CD-ROM | disk | FTP | other *** search
/ PC World 2002 May / PCWorld_2002-05_cd.bin / Software / TemaCD / activepython / ActivePython-2.1.1.msi / Python21_Lib_shlex.py < prev    next >
Encoding:
Python Source  |  2001-07-26  |  7.8 KB  |  211 lines

  1. """A lexical analyzer class for simple shell-like syntaxes."""
  2.  
  3. # Module and documentation by Eric S. Raymond, 21 Dec 1998
  4. # Input stacking and error message cleanup added by ESR, March 2000
  5. # push_source() and pop_source() made explicit by ESR, January 2001.
  6.  
  7. import os.path
  8. import sys
  9.  
  10. __all__ = ["shlex"]
  11.  
  12. class shlex:
  13.     "A lexical analyzer class for simple shell-like syntaxes."
  14.     def __init__(self, instream=None, infile=None):
  15.         if instream:
  16.             self.instream = instream
  17.             self.infile = infile
  18.         else:
  19.             self.instream = sys.stdin
  20.             self.infile = None
  21.         self.commenters = '#'
  22.         self.wordchars = ('abcdfeghijklmnopqrstuvwxyz'
  23.                           'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_')
  24.         self.whitespace = ' \t\r\n'
  25.         self.quotes = '\'"'
  26.         self.state = ' '
  27.         self.pushback = []
  28.         self.lineno = 1
  29.         self.debug = 0
  30.         self.token = ''
  31.         self.filestack = []
  32.         self.source = None
  33.         if self.debug:
  34.             print 'shlex: reading from %s, line %d' \
  35.                   % (self.instream, self.lineno)
  36.  
  37.     def push_token(self, tok):
  38.         "Push a token onto the stack popped by the get_token method"
  39.         if self.debug >= 1:
  40.             print "shlex: pushing token " + `tok`
  41.         self.pushback = [tok] + self.pushback
  42.  
  43.     def push_source(self, newstream, newfile=None):
  44.         "Push an input source onto the lexer's input source stack."
  45.         self.filestack.insert(0, (self.infile, self.instream, self.lineno))
  46.         self.infile = newfile
  47.         self.instream = newstream
  48.         self.lineno = 1
  49.         if self.debug:
  50.             if newfile:
  51.                 print 'shlex: pushing to file %s' % (self.infile,)
  52.             else:
  53.                 print 'shlex: pushing to stream %s' % (self.instream,)
  54.  
  55.     def pop_source(self):
  56.         "Pop the input source stack."
  57.         self.instream.close()
  58.         (self.infile, self.instream, self.lineno) = self.filestack[0]
  59.         self.filestack = self.filestack[1:]
  60.         if self.debug:
  61.             print 'shlex: popping to %s, line %d' \
  62.                   % (self.instream, self.lineno)
  63.         self.state = ' '
  64.  
  65.     def get_token(self):
  66.         "Get a token from the input stream (or from stack if it's nonempty)"
  67.         if self.pushback:
  68.             tok = self.pushback[0]
  69.             self.pushback = self.pushback[1:]
  70.             if self.debug >= 1:
  71.                 print "shlex: popping token " + `tok`
  72.             return tok
  73.         # No pushback.  Get a token.
  74.         raw = self.read_token()
  75.         # Handle inclusions
  76.         while raw == self.source:
  77.             spec = self.sourcehook(self.read_token())
  78.             if spec:
  79.                 (newfile, newstream) = spec
  80.                 self.push_source(newstream, newfile)
  81.             raw = self.get_token()
  82.         # Maybe we got EOF instead?
  83.         while raw == "":
  84.             if len(self.filestack) == 0:
  85.                 return ""
  86.             else:
  87.                 self.pop_source()
  88.                 raw = self.get_token()
  89.          # Neither inclusion nor EOF
  90.         if self.debug >= 1:
  91.             if raw:
  92.                 print "shlex: token=" + `raw`
  93.             else:
  94.                 print "shlex: token=EOF"
  95.         return raw
  96.  
  97.     def read_token(self):
  98.         "Read a token from the input stream (no pushback or inclusions)"
  99.         tok = ''
  100.         while 1:
  101.             nextchar = self.instream.read(1)
  102.             if nextchar == '\n':
  103.                 self.lineno = self.lineno + 1
  104.             if self.debug >= 3:
  105.                 print "shlex: in state", repr(self.state), \
  106.                       "I see character:", repr(nextchar)
  107.             if self.state is None:
  108.                 self.token = ''        # past end of file
  109.                 break
  110.             elif self.state == ' ':
  111.                 if not nextchar:
  112.                     self.state = None  # end of file
  113.                     break
  114.                 elif nextchar in self.whitespace:
  115.                     if self.debug >= 2:
  116.                         print "shlex: I see whitespace in whitespace state"
  117.                     if self.token:
  118.                         break   # emit current token
  119.                     else:
  120.                         continue
  121.                 elif nextchar in self.commenters:
  122.                     self.instream.readline()
  123.                     self.lineno = self.lineno + 1
  124.                 elif nextchar in self.wordchars:
  125.                     self.token = nextchar
  126.                     self.state = 'a'
  127.                 elif nextchar in self.quotes:
  128.                     self.token = nextchar
  129.                     self.state = nextchar
  130.                 else:
  131.                     self.token = nextchar
  132.                     if self.token:
  133.                         break   # emit current token
  134.                     else:
  135.                         continue
  136.             elif self.state in self.quotes:
  137.                 self.token = self.token + nextchar
  138.                 if nextchar == self.state:
  139.                     self.state = ' '
  140.                     break
  141.                 elif not nextchar:      # end of file
  142.                     if self.debug >= 2:
  143.                         print "shlex: I see EOF in quotes state"
  144.                     # XXX what error should be raised here?
  145.                     raise ValueError, "No closing quotation"
  146.             elif self.state == 'a':
  147.                 if not nextchar:
  148.                     self.state = None   # end of file
  149.                     break
  150.                 elif nextchar in self.whitespace:
  151.                     if self.debug >= 2:
  152.                         print "shlex: I see whitespace in word state"
  153.                     self.state = ' '
  154.                     if self.token:
  155.                         break   # emit current token
  156.                     else:
  157.                         continue
  158.                 elif nextchar in self.commenters:
  159.                     self.instream.readline()
  160.                     self.lineno = self.lineno + 1
  161.                 elif nextchar in self.wordchars or nextchar in self.quotes:
  162.                     self.token = self.token + nextchar
  163.                 else:
  164.                     self.pushback = [nextchar] + self.pushback
  165.                     if self.debug >= 2:
  166.                         print "shlex: I see punctuation in word state"
  167.                     self.state = ' '
  168.                     if self.token:
  169.                         break   # emit current token
  170.                     else:
  171.                         continue
  172.         result = self.token
  173.         self.token = ''
  174.         if self.debug > 1:
  175.             if result:
  176.                 print "shlex: raw token=" + `result`
  177.             else:
  178.                 print "shlex: raw token=EOF"
  179.         return result
  180.  
  181.     def sourcehook(self, newfile):
  182.         "Hook called on a filename to be sourced."
  183.         if newfile[0] == '"':
  184.             newfile = newfile[1:-1]
  185.         # This implements cpp-like semantics for relative-path inclusion.
  186.         if type(self.infile) == type("") and not os.path.isabs(newfile):
  187.             newfile = os.path.join(os.path.dirname(self.infile), newfile)
  188.         return (newfile, open(newfile, "r"))
  189.  
  190.     def error_leader(self, infile=None, lineno=None):
  191.         "Emit a C-compiler-like, Emacs-friendly error-message leader."
  192.         if not infile:
  193.             infile = self.infile
  194.         if not lineno:
  195.             lineno = self.lineno
  196.         return "\"%s\", line %d: " % (infile, lineno)
  197.  
  198.  
  199. if __name__ == '__main__':
  200.     if len(sys.argv) == 1:
  201.         lexer = shlex()
  202.     else:
  203.         file = sys.argv[1]
  204.         lexer = shlex(open(file), file)
  205.     while 1:
  206.         tt = lexer.get_token()
  207.         if tt:
  208.             print "Token: " + repr(tt)
  209.         else:
  210.             break
  211.