home *** CD-ROM | disk | FTP | other *** search
/ Freelog Special Freeware 31 / FreelogHS31.iso / Texte / scribus / scribus-1.3.3.9-win32-install.exe / lib / shlex.py < prev    next >
Text File  |  2004-06-01  |  11KB  |  293 lines

  1. # -*- coding: iso-8859-1 -*-
  2. """A lexical analyzer class for simple shell-like syntaxes."""
  3.  
  4. # Module and documentation by Eric S. Raymond, 21 Dec 1998
  5. # Input stacking and error message cleanup added by ESR, March 2000
  6. # push_source() and pop_source() made explicit by ESR, January 2001.
  7. # Posix compliance, split(), string arguments, and
  8. # iterator interface by Gustavo Niemeyer, April 2003.
  9.  
  10. import os.path
  11. import sys
  12. from collections import deque
  13.  
  14. try:
  15.     from cStringIO import StringIO
  16. except ImportError:
  17.     from StringIO import StringIO
  18.  
  19. __all__ = ["shlex", "split"]
  20.  
  21. class shlex:
  22.     "A lexical analyzer class for simple shell-like syntaxes."
  23.     def __init__(self, instream=None, infile=None, posix=False):
  24.         if isinstance(instream, basestring):
  25.             instream = StringIO(instream)
  26.         if instream is not None:
  27.             self.instream = instream
  28.             self.infile = infile
  29.         else:
  30.             self.instream = sys.stdin
  31.             self.infile = None
  32.         self.posix = posix
  33.         if posix:
  34.             self.eof = None
  35.         else:
  36.             self.eof = ''
  37.         self.commenters = '#'
  38.         self.wordchars = ('abcdfeghijklmnopqrstuvwxyz'
  39.                           'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_')
  40.         if self.posix:
  41.             self.wordchars += ('▀αßΓπΣσµτΦΘΩδ∞φε∩≡±≥≤⌠⌡÷°∙·√ⁿ²■ '
  42.                                '└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╪┘┌█▄▌▐')
  43.         self.whitespace = ' \t\r\n'
  44.         self.whitespace_split = False
  45.         self.quotes = '\'"'
  46.         self.escape = '\\'
  47.         self.escapedquotes = '"'
  48.         self.state = ' '
  49.         self.pushback = deque()
  50.         self.lineno = 1
  51.         self.debug = 0
  52.         self.token = ''
  53.         self.filestack = deque()
  54.         self.source = None
  55.         if self.debug:
  56.             print 'shlex: reading from %s, line %d' \
  57.                   % (self.instream, self.lineno)
  58.  
  59.     def push_token(self, tok):
  60.         "Push a token onto the stack popped by the get_token method"
  61.         if self.debug >= 1:
  62.             print "shlex: pushing token " + repr(tok)
  63.         self.pushback.appendleft(tok)
  64.  
  65.     def push_source(self, newstream, newfile=None):
  66.         "Push an input source onto the lexer's input source stack."
  67.         if isinstance(newstream, basestring):
  68.             newstream = StringIO(newstream)
  69.         self.filestack.appendleft((self.infile, self.instream, self.lineno))
  70.         self.infile = newfile
  71.         self.instream = newstream
  72.         self.lineno = 1
  73.         if self.debug:
  74.             if newfile is not None:
  75.                 print 'shlex: pushing to file %s' % (self.infile,)
  76.             else:
  77.                 print 'shlex: pushing to stream %s' % (self.instream,)
  78.  
  79.     def pop_source(self):
  80.         "Pop the input source stack."
  81.         self.instream.close()
  82.         (self.infile, self.instream, self.lineno) = self.filestack.popleft()
  83.         if self.debug:
  84.             print 'shlex: popping to %s, line %d' \
  85.                   % (self.instream, self.lineno)
  86.         self.state = ' '
  87.  
  88.     def get_token(self):
  89.         "Get a token from the input stream (or from stack if it's nonempty)"
  90.         if self.pushback:
  91.             tok = self.pushback.popleft()
  92.             if self.debug >= 1:
  93.                 print "shlex: popping token " + repr(tok)
  94.             return tok
  95.         # No pushback.  Get a token.
  96.         raw = self.read_token()
  97.         # Handle inclusions
  98.         if self.source is not None:
  99.             while raw == self.source:
  100.                 spec = self.sourcehook(self.read_token())
  101.                 if spec:
  102.                     (newfile, newstream) = spec
  103.                     self.push_source(newstream, newfile)
  104.                 raw = self.get_token()
  105.         # Maybe we got EOF instead?
  106.         while raw == self.eof:
  107.             if not self.filestack:
  108.                 return self.eof
  109.             else:
  110.                 self.pop_source()
  111.                 raw = self.get_token()
  112.         # Neither inclusion nor EOF
  113.         if self.debug >= 1:
  114.             if raw != self.eof:
  115.                 print "shlex: token=" + repr(raw)
  116.             else:
  117.                 print "shlex: token=EOF"
  118.         return raw
  119.  
  120.     def read_token(self):
  121.         quoted = False
  122.         escapedstate = ' '
  123.         while True:
  124.             nextchar = self.instream.read(1)
  125.             if nextchar == '\n':
  126.                 self.lineno = self.lineno + 1
  127.             if self.debug >= 3:
  128.                 print "shlex: in state", repr(self.state), \
  129.                       "I see character:", repr(nextchar)
  130.             if self.state is None:
  131.                 self.token = ''        # past end of file
  132.                 break
  133.             elif self.state == ' ':
  134.                 if not nextchar:
  135.                     self.state = None  # end of file
  136.                     break
  137.                 elif nextchar in self.whitespace:
  138.                     if self.debug >= 2:
  139.                         print "shlex: I see whitespace in whitespace state"
  140.                     if self.token or (self.posix and quoted):
  141.                         break   # emit current token
  142.                     else:
  143.                         continue
  144.                 elif nextchar in self.commenters:
  145.                     self.instream.readline()
  146.                     self.lineno = self.lineno + 1
  147.                 elif self.posix and nextchar in self.escape:
  148.                     escapedstate = 'a'
  149.                     self.state = nextchar
  150.                 elif nextchar in self.wordchars:
  151.                     self.token = nextchar
  152.                     self.state = 'a'
  153.                 elif nextchar in self.quotes:
  154.                     if not self.posix:
  155.                         self.token = nextchar
  156.                     self.state = nextchar
  157.                 elif self.whitespace_split:
  158.                     self.token = nextchar
  159.                     self.state = 'a'
  160.                 else:
  161.                     self.token = nextchar
  162.                     if self.token or (self.posix and quoted):
  163.                         break   # emit current token
  164.                     else:
  165.                         continue
  166.             elif self.state in self.quotes:
  167.                 quoted = True
  168.                 if not nextchar:      # end of file
  169.                     if self.debug >= 2:
  170.                         print "shlex: I see EOF in quotes state"
  171.                     # XXX what error should be raised here?
  172.                     raise ValueError, "No closing quotation"
  173.                 if nextchar == self.state:
  174.                     if not self.posix:
  175.                         self.token = self.token + nextchar
  176.                         self.state = ' '
  177.                         break
  178.                     else:
  179.                         self.state = 'a'
  180.                 elif self.posix and nextchar in self.escape and \
  181.                      self.state in self.escapedquotes:
  182.                     escapedstate = self.state
  183.                     self.state = nextchar
  184.                 else:
  185.                     self.token = self.token + nextchar
  186.             elif self.state in self.escape:
  187.                 if not nextchar:      # end of file
  188.                     if self.debug >= 2:
  189.                         print "shlex: I see EOF in escape state"
  190.                     # XXX what error should be raised here?
  191.                     raise ValueError, "No escaped character"
  192.                 # In posix shells, only the quote itself or the escape
  193.                 # character may be escaped within quotes.
  194.                 if escapedstate in self.quotes and \
  195.                    nextchar != self.state and nextchar != escapedstate:
  196.                     self.token = self.token + self.state
  197.                 self.token = self.token + nextchar
  198.                 self.state = escapedstate
  199.             elif self.state == 'a':
  200.                 if not nextchar:
  201.                     self.state = None   # end of file
  202.                     break
  203.                 elif nextchar in self.whitespace:
  204.                     if self.debug >= 2:
  205.                         print "shlex: I see whitespace in word state"
  206.                     self.state = ' '
  207.                     if self.token or (self.posix and quoted):
  208.                         break   # emit current token
  209.                     else:
  210.                         continue
  211.                 elif nextchar in self.commenters:
  212.                     self.instream.readline()
  213.                     self.lineno = self.lineno + 1
  214.                     if self.posix:
  215.                         self.state = ' '
  216.                         if self.token or (self.posix and quoted):
  217.                             break   # emit current token
  218.                         else:
  219.                             continue
  220.                 elif self.posix and nextchar in self.quotes:
  221.                     self.state = nextchar
  222.                 elif self.posix and nextchar in self.escape:
  223.                     escapedstate = 'a'
  224.                     self.state = nextchar
  225.                 elif nextchar in self.wordchars or nextchar in self.quotes \
  226.                     or self.whitespace_split:
  227.                     self.token = self.token + nextchar
  228.                 else:
  229.                     self.pushback.appendleft(nextchar)
  230.                     if self.debug >= 2:
  231.                         print "shlex: I see punctuation in word state"
  232.                     self.state = ' '
  233.                     if self.token:
  234.                         break   # emit current token
  235.                     else:
  236.                         continue
  237.         result = self.token
  238.         self.token = ''
  239.         if self.posix and not quoted and result == '':
  240.             result = None
  241.         if self.debug > 1:
  242.             if result:
  243.                 print "shlex: raw token=" + repr(result)
  244.             else:
  245.                 print "shlex: raw token=EOF"
  246.         return result
  247.  
  248.     def sourcehook(self, newfile):
  249.         "Hook called on a filename to be sourced."
  250.         if newfile[0] == '"':
  251.             newfile = newfile[1:-1]
  252.         # This implements cpp-like semantics for relative-path inclusion.
  253.         if isinstance(self.infile, basestring) and not os.path.isabs(newfile):
  254.             newfile = os.path.join(os.path.dirname(self.infile), newfile)
  255.         return (newfile, open(newfile, "r"))
  256.  
  257.     def error_leader(self, infile=None, lineno=None):
  258.         "Emit a C-compiler-like, Emacs-friendly error-message leader."
  259.         if infile is None:
  260.             infile = self.infile
  261.         if lineno is None:
  262.             lineno = self.lineno
  263.         return "\"%s\", line %d: " % (infile, lineno)
  264.  
  265.     def __iter__(self):
  266.         return self
  267.  
  268.     def next(self):
  269.         token = self.get_token()
  270.         if token == self.eof:
  271.             raise StopIteration
  272.         return token
  273.  
  274. def split(s, comments=False):
  275.     lex = shlex(s, posix=True)
  276.     lex.whitespace_split = True
  277.     if not comments:
  278.         lex.commenters = ''
  279.     return list(lex)
  280.  
  281. if __name__ == '__main__':
  282.     if len(sys.argv) == 1:
  283.         lexer = shlex()
  284.     else:
  285.         file = sys.argv[1]
  286.         lexer = shlex(open(file), file)
  287.     while 1:
  288.         tt = lexer.get_token()
  289.         if tt:
  290.             print "Token: " + repr(tt)
  291.         else:
  292.             break
  293.