home *** CD-ROM | disk | FTP | other *** search
/ Cricao de Sites - 650 Layouts Prontos / WebMasters.iso / Servidores / xampp-win32-1.6.7-installer.exe / php / PEAR / SQL / Lexer.php < prev    next >
Encoding:
PHP Script  |  2008-07-02  |  19.3 KB  |  568 lines

  1. <?php
  2. /* vim: set expandtab tabstop=4 shiftwidth=4: */
  3. // +----------------------------------------------------------------------+
  4. // | Copyright (c) 2002-2004 Brent Cook                                        |
  5. // +----------------------------------------------------------------------+
  6. // | This library is free software; you can redistribute it and/or        |
  7. // | modify it under the terms of the GNU Lesser General Public           |
  8. // | License as published by the Free Software Foundation; either         |
  9. // | version 2.1 of the License, or (at your option) any later version.   |
  10. // |                                                                      |
  11. // | This library is distributed in the hope that it will be useful,      |
  12. // | but WITHOUT ANY WARRANTY; without even the implied warranty of       |
  13. // | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU    |
  14. // | Lesser General Public License for more details.                      |
  15. // |                                                                      |
  16. // | You should have received a copy of the GNU Lesser General Public     |
  17. // | License along with this library; if not, write to the Free Software  |
  18. // | Foundation, Inc., 59 Temple Place, Suite 330,Boston,MA 02111-1307 USA|
  19. // +----------------------------------------------------------------------+
  20. // | Authors: Brent Cook <busterbcook@yahoo.com>                          |
  21. // |          Jason Pell <jasonpell@hotmail.com>                          |
  22. // +----------------------------------------------------------------------+
  23. //
  24. // $Id: Lexer.php,v 1.20 2004/05/07 12:33:35 busterb Exp $
  25. //
  26.  
  27. include 'SQL/ctype.php';
  28.  
  29. // {{{ token definitions
  30. // variables: 'ident', 'sys_var'
  31. // values:    'real_val', 'text_val', 'int_val', null
  32. // }}}
  33.  
  34. /**
  35.  * A lexigraphical analyser inspired by the msql lexer
  36.  *
  37.  * @author  Brent Cook <busterbcook@yahoo.com>
  38.  * @version 0.5
  39.  * @access  public
  40.  * @package SQL_Parser
  41.  */
  42. class Lexer
  43. {
  44.     // array of valid tokens for the lexer to recognize
  45.     // format is 'token literal'=>TOKEN_VALUE
  46.     var $symbols = array();
  47.  
  48. // {{{ instance variables
  49.     var $tokPtr = 0;
  50.     var $tokStart = 0;
  51.     var $tokLen = 0;
  52.     var $tokText = '';
  53.     var $lineNo = 0;
  54.     var $lineBegin = 0;
  55.     var $string = '';
  56.     var $stringLen = 0;
  57.     
  58.     // Will not be altered by skip()
  59.     var $tokAbsStart = 0;
  60.     var $skipText = '';
  61.     
  62.     // Provide lookahead capability.
  63.     var $lookahead = 0;
  64.     // Specify how many tokens to save in tokenStack, so the
  65.     // token stream can be pushed back.
  66.     var $tokenStack = array();
  67.     var $stackPtr = 0;
  68. // }}}
  69.  
  70. // {{{ incidental functions
  71.     function Lexer($string = '', $lookahead=0)
  72.     {
  73.         $this->string = $string;
  74.         $this->stringLen = strlen($string);
  75.         $this->lookahead = $lookahead;
  76.     }
  77.     
  78.     function get() {
  79.         ++$this->tokPtr;
  80.         ++$this->tokLen;
  81.         return ($this->tokPtr <= $this->stringLen) ? $this->string{$this->tokPtr - 1} : null;
  82.     }
  83.  
  84.     function unget() {
  85.         --$this->tokPtr;
  86.         --$this->tokLen;
  87.     }
  88.  
  89.     function skip() {
  90.         ++$this->tokStart;
  91.         return ($this->tokPtr != $this->stringLen) ? $this->string{$this->tokPtr++} : '';
  92.     }
  93.  
  94.     function revert() {
  95.         $this->tokPtr = $this->tokStart;
  96.         $this->tokLen = 0;
  97.     }
  98.  
  99.     function isCompop($c) {
  100.         return (($c == '<') || ($c == '>') || ($c == '=') || ($c == '!'));
  101.     }
  102. // }}}
  103.  
  104. // {{{ pushBack()
  105. /*
  106.  * Push back a token, so the very next call to lex() will return that token.
  107.  * Calls to this function will be ignored if there is no lookahead specified
  108.  * to the constructor, or the pushBack() function has already been called the
  109.  * maximum number of token's that can be looked ahead.
  110.  */
  111. function pushBack()
  112. {
  113.     if($this->lookahead>0 && count($this->tokenStack)>0 && $this->stackPtr>0) {
  114.         $this->stackPtr--;
  115.     }
  116. }
  117. // }}}
  118.  
  119. // {{{ lex()
  120. function lex()
  121. {
  122.     if($this->lookahead>0) {
  123.         // The stackPtr, should always be the same as the count of
  124.         // elements in the tokenStack.  The stackPtr, can be thought
  125.         // of as pointing to the next token to be added.  If however
  126.         // a pushBack() call is made, the stackPtr, will be less than the
  127.         // count, to indicate that we should take that token from the
  128.         // stack, instead of calling nextToken for a new token.
  129.  
  130.         if ($this->stackPtr<count($this->tokenStack)) {
  131.  
  132.             $this->tokText = $this->tokenStack[$this->stackPtr]['tokText'];
  133.             $this->skipText = $this->tokenStack[$this->stackPtr]['skipText'];
  134.             $token = $this->tokenStack[$this->stackPtr]['token'];
  135.             
  136.             // We have read the token, so now iterate again.
  137.             $this->stackPtr++;
  138.             return $token;
  139.  
  140.         } else {
  141.  
  142.             // If $tokenStack is full (equal to lookahead), pop the oldest
  143.             // element off, to make room for the new one.
  144.  
  145.             if ($this->stackPtr == $this->lookahead) {
  146.                 // For some reason array_shift and
  147.                 // array_pop screw up the indexing, so we do it manually.
  148.                 for($i=0; $i<(count($this->tokenStack)-1); $i++) {
  149.                     $this->tokenStack[$i] = $this->tokenStack[$i+1];
  150.                 }
  151.                 
  152.                 // Indicate that we should put the element in
  153.                 // at the stackPtr position.
  154.                 $this->stackPtr--;
  155.             }
  156.             
  157.             $token = $this->nextToken();
  158.             $this->tokenStack[$this->stackPtr] =
  159.                 array('token'=>$token,
  160.                       'tokText'=>$this->tokText,
  161.                       'skipText'=>$this->skipText);
  162.             $this->stackPtr++;
  163.             return $token;
  164.         }
  165.     }
  166.     else
  167.     {
  168.         return $this->nextToken();
  169.     }
  170. }
  171. // }}}
  172.  
  173. // {{{ nextToken()
  174. function nextToken()
  175. {
  176.     if ($this->string == '') return;
  177.     $state = 0;
  178.     $this->tokAbsStart = $this->tokStart;
  179.     
  180.     while (true){
  181.         //echo "State: $state, Char: $c\n";
  182.         switch($state) {
  183.             // {{{ State 0 : Start of token
  184.             case 0:
  185.                 $this->tokPtr = $this->tokStart;
  186.                 $this->tokText = '';
  187.                 $this->tokLen = 0;
  188.                 $c = $this->get();
  189.  
  190.                 if (is_null($c)) { // End Of Input
  191.                     $state = 1000;
  192.                     break;
  193.                 }
  194.  
  195.                 while (($c == ' ') || ($c == "\t")
  196.                     || ($c == "\n") || ($c == "\r")) {
  197.                     if ($c == "\n" || $c == "\r") {
  198.                         // Handle MAC/Unix/Windows line endings.
  199.                         if($c == "\r") {
  200.                             $c = $this->skip();
  201.                                 
  202.                             // If not DOS newline
  203.                             if($c != "\n")
  204.                                 $this->unget();
  205.                         }
  206.                         ++$this->lineNo;
  207.                         $this->lineBegin = $this->tokPtr;
  208.                     }
  209.                     
  210.                        $c = $this->skip();
  211.                     $this->tokLen = 1;
  212.                 }
  213.                 
  214.                 // Escape quotes and backslashes
  215.                 if ($c == '\\') {
  216.                      $t = $this->get();
  217.                     if ($t == '\'' || $t == '\\' || $t == '"') {
  218.                         $this->tokText = $t;
  219.                         $this->tokStart = $this->tokPtr;
  220.                         return $this->tokText;
  221.                     } else {
  222.                         $this->unget();
  223.                         
  224.                         // Unknown token.  Revert to single char
  225.                         $state = 999;
  226.                         break;
  227.                     }
  228.                 }
  229.                 
  230.                 if (($c == '\'') || ($c == '"')) { // text string
  231.                     $quote = $c;
  232.                     $state = 12;
  233.                     break;
  234.                 }
  235.  
  236.                 if ($c == '_') { // system variable
  237.                     $state = 18;
  238.                     break;
  239.                 }
  240.  
  241.                 if (ctype_alpha(ord($c))) { // keyword or ident
  242.                     $state = 1;
  243.                     break;
  244.                 }
  245.  
  246.                 if (ctype_digit(ord($c))) { // real or int number
  247.                     $state = 5;
  248.                     break;
  249.                 }
  250.  
  251.                 if ($c == '.') {
  252.                     $t = $this->get();
  253.                     if ($t == '.') { // ellipsis
  254.                         if ($this->get() == '.') {
  255.                             $this->tokText = '...';
  256.                             $this->tokStart = $this->tokPtr;
  257.                             return $this->tokText;
  258.                         } else {
  259.                             $state = 999;
  260.                             break;
  261.                         }
  262.                     } else if (ctype_digit(ord($t))) { // real number
  263.                         $this->unget();
  264.                         $state = 7;
  265.                         break;
  266.                     } else { // period
  267.                         $this->unget();
  268.                     }
  269.                 }
  270.  
  271.                 if ($c == '#') { // Comments
  272.                     $state = 14;
  273.                     break;
  274.                 }
  275.                 if ($c == '-') {
  276.                     $t = $this->get();
  277.                     if ($t == '-') {
  278.                         $state = 14;
  279.                         break;
  280.                     } else { // negative number
  281.                         $this->unget();
  282.                         $state = 5;
  283.                         break;
  284.                     }
  285.                 }
  286.  
  287.                 if ($this->isCompop($c)) { // comparison operator
  288.                     $state = 10;
  289.                     break;
  290.                 }
  291.                 // Unknown token.  Revert to single char
  292.                 $state = 999;
  293.                 break;
  294.             // }}}
  295.  
  296.             // {{{ State 1 : Incomplete keyword or ident
  297.             case 1:
  298.                 $c = $this->get();
  299.                 if (ctype_alnum(ord($c)) || ($c == '_') || ($c == '.')) {
  300.                     $state = 1;
  301.                     break;
  302.                 }
  303.                 $state = 2;
  304.                 break;
  305.             // }}}
  306.  
  307.             /* {{{ State 2 : Complete keyword or ident */
  308.             case 2:
  309.                 $this->unget();
  310.                 $this->tokText = substr($this->string, $this->tokStart,
  311.                                         $this->tokLen);
  312.                 
  313.                 $testToken = strtolower($this->tokText);
  314.                 if (isset($this->symbols[$testToken])) {
  315.                 
  316.                     $this->skipText = substr($this->string, $this->tokAbsStart,
  317.                                             $this->tokStart-$this->tokAbsStart);
  318.                     $this->tokStart = $this->tokPtr;
  319.                     return $testToken;
  320.                 } else {
  321.                     $this->skipText = substr($this->string, $this->tokAbsStart,
  322.                                             $this->tokStart-$this->tokAbsStart);
  323.                     $this->tokStart = $this->tokPtr;
  324.                     return 'ident';
  325.                 }
  326.                 break;
  327.             // }}}
  328.  
  329.             // {{{ State 5: Incomplete real or int number
  330.             case 5:
  331.                 $c = $this->get();
  332.                 if (ctype_digit(ord($c))) {
  333.                     $state = 5;
  334.                     break;
  335.                 } else if ($c == '.') {
  336.                     $t = $this->get();
  337.                     if($t == '.') { // ellipsis
  338.                         $this->unget();
  339.                     } else { // real number
  340.                         $state = 7;
  341.                         break;
  342.                     }
  343.                 } else if(ctype_alpha(ord($c))) { // number must end with non-alpha character
  344.                     $state = 999;
  345.                     break;
  346.                 } else {
  347.                 // complete number
  348.                 $state = 6;
  349.                 break;
  350.                 }
  351.             // }}}
  352.  
  353.             // {{{ State 6: Complete integer number
  354.             case 6:
  355.                 $this->unget();
  356.                 $this->tokText = intval(substr($this->string, $this->tokStart,
  357.                                                $this->tokLen));
  358.                 $this->skipText = substr($this->string, $this->tokAbsStart,
  359.                                          $this->tokStart-$this->tokAbsStart);
  360.                 $this->tokStart = $this->tokPtr;
  361.                 return 'int_val';
  362.                 break;
  363.             // }}}
  364.  
  365.             // {{{ State 7: Incomplete real number
  366.             case 7:
  367.                 $c = $this->get();
  368.  
  369.                 /* Analogy Start */
  370.                 if ($c == 'e' || $c == 'E') {
  371.                         $state = 15;
  372.                         break;
  373.                 }
  374.                 /* Analogy End   */
  375.  
  376.                 if (ctype_digit(ord($c))) {
  377.                     $state = 7;
  378.                     break;
  379.                 }
  380.                 $state = 8;
  381.                 break;
  382.             // }}}
  383.  
  384.             // {{{ State 8: Complete real number */
  385.             case 8:
  386.                 $this->unget();
  387.                 $this->tokText = floatval(substr($this->string, $this->tokStart,
  388.                                         $this->tokLen));
  389.                 $this->skipText = substr($this->string, $this->tokAbsStart,
  390.                                          $this->tokStart-$this->tokAbsStart);
  391.                 $this->tokStart = $this->tokPtr;
  392.                 return 'real_val';
  393.             // }}}
  394.  
  395.             // {{{ State 10: Incomplete comparison operator
  396.             case 10:
  397.                 $c = $this->get();
  398.                 if ($this->isCompop($c))
  399.                 {
  400.                     $state = 10;
  401.                     break;
  402.                 }
  403.                 $state = 11;
  404.                 break;
  405.             // }}}
  406.  
  407.             // {{{ State 11: Complete comparison operator
  408.             case 11:
  409.                 $this->unget();
  410.                 $this->tokText = substr($this->string, $this->tokStart,
  411.                                         $this->tokLen);
  412.                 if($this->tokText) {
  413.                     $this->skipText = substr($this->string, $this->tokAbsStart,
  414.                                             $this->tokStart-$this->tokAbsStart);
  415.                     $this->tokStart = $this->tokPtr;
  416.                     return $this->tokText;
  417.                 }
  418.                 $state = 999;
  419.                 break;
  420.             // }}}
  421.  
  422.             // {{{ State 12: Incomplete text string
  423.             case 12:
  424.                 $bail = false;
  425.                 while (!$bail) {
  426.                     switch ($this->get()) {
  427.                         case '':
  428.                             $this->tokText = null;
  429.                             $bail = true;
  430.                             break;
  431.                         case "\\":
  432.                             if (!$this->get()) {
  433.                                 $this->tokText = null;
  434.                                 $bail = true;
  435.                             }
  436.                                 //$bail = true;
  437.                             break;
  438.                         case $quote:
  439.                             $this->tokText = stripslashes(substr($this->string,
  440.                                        ($this->tokStart+1), ($this->tokLen-2)));
  441.                             $bail = true;
  442.                             break;
  443.                     }
  444.                 }
  445.                 if (!is_null($this->tokText)) {
  446.                     $state = 13;
  447.                     break;
  448.                 }
  449.                 $state = 999;
  450.                 break;
  451.             // }}}
  452.  
  453.             // {{{ State 13: Complete text string
  454.             case 13:
  455.                 $this->skipText = substr($this->string, $this->tokAbsStart,
  456.                                          $this->tokStart-$this->tokAbsStart);
  457.                 $this->tokStart = $this->tokPtr;
  458.                 return 'text_val';
  459.                 break;
  460.             // }}}
  461.  
  462.             // {{{ State 14: Comment
  463.             case 14:
  464.                 $c = $this->skip();
  465.                 if ($c == "\n" || $c == "\r" || $c == "") {
  466.                     // Handle MAC/Unix/Windows line endings.
  467.                     if ($c == "\r") {
  468.                         $c = $this->skip();
  469.                         // If not DOS newline
  470.                         if ($c != "\n") {
  471.                             $this->unget();
  472.                         }
  473.                     }
  474.  
  475.                     if ($c != "") {
  476.                         ++$this->lineNo;
  477.                         $this->lineBegin = $this->tokPtr;
  478.                     }
  479.  
  480.                     // We need to skip all the text.
  481.                     $this->tokStart = $this->tokPtr;
  482.                     $state = 0;
  483.                 } else {
  484.                     $state = 14;
  485.                 }
  486.                 break;
  487.             // }}}
  488.  
  489.             // {{{ State 15: Exponent Sign in Scientific Notation
  490.             case 15:
  491.                     $c = $this->get();
  492.                     if($c == '-' || $c == '+') {
  493.                             $state = 16;
  494.                             break;
  495.                     }
  496.                     $state = 999;
  497.                     break;
  498.             // }}}
  499.  
  500.             // {{{ state 16: Exponent Value-first digit in Scientific Notation
  501.             case 16:
  502.                     $c = $this->get();
  503.                     if (ctype_digit(ord($c))) {
  504.                             $state = 17;
  505.                             break;
  506.                     }
  507.                     $state = 999;  // if no digit, then token is unknown
  508.                     break;
  509.             // }}}
  510.  
  511.             // {{{ State 17: Exponent Value in Scientific Notation
  512.             case 17:
  513.                     $c = $this->get();
  514.                     if (ctype_digit(ord($c))) {
  515.                             $state = 17;
  516.                             break;
  517.                     }
  518.                     $state = 8;  // At least 1 exponent digit was required
  519.                     break;
  520.             // }}}
  521.  
  522.             // {{{ State 18 : Incomplete System Variable
  523.             case 18:
  524.                 $c = $this->get();
  525.                 if (ctype_alnum(ord($c)) || $c == '_') {
  526.                     $state = 18;
  527.                     break;
  528.                 }
  529.                 $state = 19;
  530.                 break;
  531.             // }}}
  532.  
  533.             // {{{ State 19: Complete Sys Var
  534.             case 19:
  535.                 $this->unget();
  536.                 $this->tokText = substr($this->string, $this->tokStart,
  537.                                         $this->tokLen);
  538.                 $this->skipText = substr($this->string, $this->tokAbsStart,
  539.                                          $this->tokStart-$this->tokAbsStart);
  540.                 $this->tokStart = $this->tokPtr;
  541.                 return 'sys_var';
  542.             // }}}
  543.  
  544.             // {{{ State 999 : Unknown token.  Revert to single char
  545.             case 999:
  546.                 $this->revert();
  547.                 $this->tokText = $this->get();
  548.                 $this->skipText = substr($this->string, $this->tokAbsStart,
  549.                                          $this->tokStart-$this->tokAbsStart);
  550.                 $this->tokStart = $this->tokPtr;
  551.                 return $this->tokText;
  552.             // }}}
  553.  
  554.             // {{{ State 1000 : End Of Input
  555.             case 1000:
  556.                 $this->tokText = '*end of input*';
  557.                 $this->skipText = substr($this->string, $this->tokAbsStart,
  558.                                          $this->tokStart-$this->tokAbsStart);
  559.                 $this->tokStart = $this->tokPtr;
  560.                 return null;
  561.             // }}}
  562.         }
  563.     }
  564. }
  565. // }}}
  566. }
  567. ?>
  568.