home *** CD-ROM | disk | FTP | other *** search
/ Computer Active Guide 2009 July / CAG7.ISO / Internetas / SafariSetup.exe / AppleApplicationSupport.msi / WebKit.resources_inspector_SourceHTMLTokenizer.re2js < prev    next >
Encoding:
Text File  |  2010-06-03  |  13.0 KB  |  304 lines

  1. /*
  2.  * Copyright (C) 2009 Google Inc. All rights reserved.
  3.  *
  4.  * Redistribution and use in source and binary forms, with or without
  5.  * modification, are permitted provided that the following conditions are
  6.  * met:
  7.  *
  8.  *     * Redistributions of source code must retain the above copyright
  9.  * notice, this list of conditions and the following disclaimer.
  10.  *     * Redistributions in binary form must reproduce the above
  11.  * copyright notice, this list of conditions and the following disclaimer
  12.  * in the documentation and/or other materials provided with the
  13.  * distribution.
  14.  *     * Neither the name of Google Inc. nor the names of its
  15.  * contributors may be used to endorse or promote products derived from
  16.  * this software without specific prior written permission.
  17.  *
  18.  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19.  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20.  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21.  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22.  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23.  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24.  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25.  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26.  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27.  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28.  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29.  */
  30.  
  31. // Generate js file as follows:
  32. //
  33. // re2c -isc WebCore/inspector/front-end/SourceHTMLTokenizer.re2js \
  34. // | sed 's|^yy\([^:]*\)*\:|case \1:|' \
  35. // | sed 's|[*]cursor[+][+]|this._charAt(cursor++)|' \
  36. // | sed 's|[[*][+][+]cursor|this._charAt(++cursor)|' \
  37. // | sed 's|[*]cursor|this._charAt(cursor)|' \
  38. // | sed 's|yych = \*\([^;]*\)|yych = this._charAt\1|' \
  39. // | sed 's|goto case \([^;]*\)|{ gotoCase = \1; continue; }|' \
  40. // | sed 's|unsigned\ int|var|' \
  41. // | sed 's|var\ yych|case 1: var yych|'
  42.  
  43. WebInspector.SourceHTMLTokenizer = function()
  44. {
  45.     WebInspector.SourceTokenizer.call(this);
  46.  
  47.     // The order is determined by the generated code.
  48.     this._lexConditions = {
  49.         INITIAL: 0,
  50.         COMMENT: 1,
  51.         DOCTYPE: 2,
  52.         TAG: 3,
  53.         DSTRING: 4,
  54.         SSTRING: 5
  55.     };
  56.     this.case_INITIAL = 1000;
  57.     this.case_COMMENT = 1001;
  58.     this.case_DOCTYPE = 1002;
  59.     this.case_TAG = 1003;
  60.     this.case_DSTRING = 1004;
  61.     this.case_SSTRING = 1005;
  62.  
  63.     this._parseConditions = {
  64.         INITIAL: 0,
  65.         ATTRIBUTE: 1,
  66.         ATTRIBUTE_VALUE: 2,
  67.         LINKIFY: 4,
  68.         A_NODE: 8,
  69.         SCRIPT: 16
  70.     };
  71.  
  72.     this.initialCondition = { lexCondition: this._lexConditions.INITIAL, parseCondition: this._parseConditions.INITIAL };
  73.     this.condition = this.initialCondition;
  74. }
  75.  
  76. WebInspector.SourceHTMLTokenizer.prototype = {
  77.     set line(line) {
  78.         if (this._internalJavaScriptTokenizer) {
  79.             var match = /<\/script/i.exec(line);
  80.             if (match) {
  81.                 this._internalJavaScriptTokenizer.line = line.substring(0, match.index);
  82.             } else
  83.                 this._internalJavaScriptTokenizer.line = line;
  84.         }
  85.         this._line = line;
  86.     },
  87.  
  88.     _isExpectingAttribute: function()
  89.     {
  90.         return this._condition.parseCondition & this._parseConditions.ATTRIBUTE;
  91.     },
  92.  
  93.     _isExpectingAttributeValue: function()
  94.     {
  95.         return this._condition.parseCondition & this._parseConditions.ATTRIBUTE_VALUE;
  96.     },
  97.  
  98.     _setExpectingAttribute: function()
  99.     {
  100.         if (this._isExpectingAttributeValue())
  101.             this._condition.parseCondition ^= this._parseConditions.ATTRIBUTE_VALUE;
  102.         this._condition.parseCondition |= this._parseConditions.ATTRIBUTE;
  103.     },
  104.  
  105.     _setExpectingAttributeValue: function()
  106.     {
  107.         if (this._isExpectingAttribute())
  108.             this._condition.parseCondition ^= this._parseConditions.ATTRIBUTE;
  109.         this._condition.parseCondition |= this._parseConditions.ATTRIBUTE_VALUE;
  110.     },
  111.  
  112.     _stringToken: function(cursor, stringEnds)
  113.     {
  114.         if (!this._isExpectingAttributeValue()) {
  115.             this.tokenType = null;
  116.             return cursor;
  117.         }
  118.         this.tokenType = this._attrValueTokenType();
  119.         if (stringEnds)
  120.             this._setExpectingAttribute();
  121.         return cursor;
  122.     },
  123.  
  124.     _attrValueTokenType: function()
  125.     {
  126.         if (this._condition.parseCondition & this._parseConditions.LINKIFY) {
  127.             if (this._condition.parseCondition & this._parseConditions.A_NODE)
  128.                 return "html-external-link";
  129.             return "html-resource-link";
  130.         }
  131.         return "html-attribute-value";
  132.     },
  133.  
  134.     nextToken: function(cursor)
  135.     {
  136.         if (this._internalJavaScriptTokenizer) {
  137.             // Re-set line to force </script> detection first.
  138.             this.line = this._line;
  139.             if (cursor !== this._internalJavaScriptTokenizer._line.length) {
  140.                 // Tokenizer is stateless, so restore its condition before tokenizing and save it after.
  141.                 this._internalJavaScriptTokenizer.condition = this._condition.internalJavaScriptTokenizerCondition;
  142.                 var result = this._internalJavaScriptTokenizer.nextToken(cursor);
  143.                 this.tokenType = this._internalJavaScriptTokenizer.tokenType;
  144.                 this._condition.internalJavaScriptTokenizerCondition = this._internalJavaScriptTokenizer.condition;
  145.                 return result;
  146.             } else if (cursor !== this._line.length)
  147.                 delete this._internalJavaScriptTokenizer;
  148.         }
  149.  
  150.         var cursorOnEnter = cursor;
  151.         var gotoCase = 1;
  152.         while (1) {
  153.             switch (gotoCase)
  154.             // Following comment is replaced with generated state machine.
  155.             /*!re2c
  156.                 re2c:define:YYCTYPE  = "var";
  157.                 re2c:define:YYCURSOR = cursor;
  158.                 re2c:define:YYGETCONDITION = "this.getLexCondition";
  159.                 re2c:define:YYSETCONDITION = "this.setLexCondition";
  160.                 re2c:condprefix = "case this.case_";
  161.                 re2c:condenumprefix = "this._lexConditions.";
  162.                 re2c:yyfill:enable = 0;
  163.                 re2c:labelprefix = "case ";
  164.                 re2c:indent:top = 2;
  165.                 re2c:indent:string = "    ";
  166.  
  167.                 CommentContent = ([^-\r\n] | ("--" [^>]))*;
  168.                 Comment = "<!--" CommentContent "-->";
  169.                 CommentStart = "<!--" CommentContent [\r\n];
  170.                 CommentEnd = CommentContent "-->";
  171.  
  172.                 DocTypeStart = "<!" [Dd] [Oo] [Cc] [Tt] [Yy] [Pp] [Ee];
  173.                 DocTypeContent = [^\r\n>]*;
  174.  
  175.                 ScriptStart = "<" [Ss] [Cc] [Rr] [Ii] [Pp] [Tt];
  176.                 ScriptEnd = "</" [Ss] [Cc] [Rr] [Ii] [Pp] [Tt];
  177.  
  178.                 LT = "<" | "</";
  179.                 GT = ">";
  180.                 EqualSign = "=";
  181.  
  182.                 DoubleStringContent = [^\r\n\"]*;
  183.                 SingleStringContent = [^\r\n\']*;
  184.                 StringLiteral = "\"" DoubleStringContent "\"" | "'" SingleStringContent "'";
  185.                 DoubleStringStart = "\"" DoubleStringContent [\r\n];
  186.                 DoubleStringEnd = DoubleStringContent "\"";
  187.                 SingleStringStart = "'" SingleStringContent [\r\n];
  188.                 SingleStringEnd = SingleStringContent "'";
  189.  
  190.                 Identifier = [^ \r\n"'<>\[\]=]+;
  191.  
  192.                 <INITIAL> Comment { this.tokenType = "html-comment"; return cursor; }
  193.                 <INITIAL> CommentStart => COMMENT { this.tokenType = "html-comment"; return cursor; }
  194.                 <COMMENT> CommentContent => COMMENT { this.tokenType = "html-comment"; return cursor; }
  195.                 <COMMENT> CommentEnd => INITIAL { this.tokenType = "html-comment"; return cursor; }
  196.  
  197.                 <INITIAL> DocTypeStart => DOCTYPE { this.tokenType = "html-doctype"; return cursor; }
  198.                 <DOCTYPE> DocTypeContent => DOCTYPE { this.tokenType = "html-doctype"; return cursor; }
  199.                 <DOCTYPE> GT => INITIAL { this.tokenType = "html-doctype"; return cursor; }
  200.  
  201.                 <INITIAL> ScriptStart => TAG
  202.                 {
  203.                     if (this._condition.parseCondition & this._parseConditions.SCRIPT) {
  204.                         // Do not tokenize script tag contents, keep lexer state although processing "<".
  205.                         this.setLexCondition(this._lexConditions.INITIAL);
  206.                         this.tokenType = null;
  207.                         return cursor;
  208.                     }
  209.                     this.tokenType = "html-tag";
  210.                     this._condition.parseCondition = this._parseConditions.SCRIPT;
  211.                     this._setExpectingAttribute();
  212.                     return cursor;
  213.                 }
  214.  
  215.                 <INITIAL> ScriptEnd => TAG
  216.                 {
  217.                     this.tokenType = "html-tag";
  218.                     this._condition.parseCondition = this._parseConditions.INITIAL;
  219.                     return cursor;
  220.                 }
  221.  
  222.                 <INITIAL> LT => TAG
  223.                 {
  224.                     if (this._condition.parseCondition & this._parseConditions.SCRIPT) {
  225.                         // Do not tokenize script tag contents, keep lexer state although processing "<".
  226.                         this.setLexCondition(this._lexConditions.INITIAL);
  227.                         this.tokenType = null;
  228.                         return cursor;
  229.                     }
  230.  
  231.                     this._condition.parseCondition = this._parseConditions.INITIAL;
  232.                     this.tokenType = "html-tag";
  233.                     return cursor;
  234.                 }
  235.   
  236.                 <TAG> GT => INITIAL
  237.                 {
  238.                     this.tokenType = "html-tag";
  239.                     if (this._condition.parseCondition & this._parseConditions.SCRIPT) {
  240.                         if (!this._internalJavaScriptTokenizer) {
  241.                             this._internalJavaScriptTokenizer = WebInspector.SourceTokenizer.Registry.getInstance().getTokenizer("text/javascript");
  242.                             this._condition.internalJavaScriptTokenizerCondition = this._internalJavaScriptTokenizer.initialCondition;
  243.                         }
  244.                         // Do not tokenize script tag contents.
  245.                         return cursor;
  246.                     }
  247.  
  248.                     this._condition.parseCondition = this._parseConditions.INITIAL;
  249.                     return cursor;
  250.                 }
  251.  
  252.                 <TAG> StringLiteral { return this._stringToken(cursor, true); }
  253.                 <TAG> DoubleStringStart => DSTRING { return this._stringToken(cursor); }
  254.                 <DSTRING> DoubleStringContent => DSTRING { return this._stringToken(cursor); }
  255.                 <DSTRING> DoubleStringEnd => TAG { return this._stringToken(cursor, true); }
  256.                 <TAG> SingleStringStart => SSTRING { return this._stringToken(cursor); }
  257.                 <SSTRING> SingleStringContent => SSTRING { return this._stringToken(cursor); }
  258.                 <SSTRING> SingleStringEnd => TAG { return this._stringToken(cursor, true); }
  259.  
  260.                 <TAG> EqualSign => TAG
  261.                 {
  262.                     if (this._isExpectingAttribute())
  263.                         this._setExpectingAttributeValue();
  264.                     this.tokenType = null;
  265.                     return cursor;
  266.                 }
  267.  
  268.                 <TAG> Identifier
  269.                 {
  270.                     if (this._condition.parseCondition === this._parseConditions.SCRIPT) {
  271.                         // Fall through if expecting attributes.
  272.                         this.tokenType = null;
  273.                         return cursor;
  274.                     }
  275.  
  276.                     if (this._condition.parseCondition === this._parseConditions.INITIAL) {
  277.                         this.tokenType = "html-tag";
  278.                         this._setExpectingAttribute();
  279.                         var token = this._line.substring(cursorOnEnter, cursor);
  280.                         if (token === "a")
  281.                             this._condition.parseCondition |= this._parseConditions.A_NODE;
  282.                         else if (this._condition.parseCondition & this._parseConditions.A_NODE)
  283.                             this._condition.parseCondition ^= this._parseConditions.A_NODE;
  284.                     } else if (this._isExpectingAttribute()) {
  285.                         var token = this._line.substring(cursorOnEnter, cursor);
  286.                         if (token === "href" || token === "src")
  287.                             this._condition.parseCondition |= this._parseConditions.LINKIFY;
  288.                         else if (this._condition.parseCondition |= this._parseConditions.LINKIFY)
  289.                             this._condition.parseCondition ^= this._parseConditions.LINKIFY;
  290.                         this.tokenType = "html-attribute-name";
  291.                     } else if (this._isExpectingAttributeValue())
  292.                         this.tokenType = this._attrValueTokenType();
  293.                     else
  294.                         this.tokenType = null;
  295.                     return cursor;
  296.                 }
  297.                 <*> [^] { this.tokenType = null; return cursor; }
  298.             */
  299.         }
  300.     }
  301. }
  302.  
  303. WebInspector.SourceHTMLTokenizer.prototype.__proto__ = WebInspector.SourceTokenizer.prototype;
  304.