home *** CD-ROM | disk | FTP | other *** search
/ PC Online 1997 October / PCO1097.ISO / FilesBBS / WIN95 / IAVAZIP.EXE / DATA.Z / HtmlTokenizer.class (.txt) < prev    next >
Encoding:
Java Class File  |  1997-08-18  |  5.2 KB  |  506 lines

  1. package com.sfs.html;
  2.  
  3. import java.io.InputStream;
  4. import java.util.Hashtable;
  5.  
  6. class HtmlTokenizer {
  7.    // $FF: renamed from: in java.io.InputStream
  8.    private InputStream field_0;
  9.    private byte[] inBuf = new byte[100];
  10.    private char[] buf = new char[200];
  11.    private int index;
  12.    private int length;
  13.  
  14.    private boolean identifierChar(char var1) {
  15.       if (var1 != '_' && var1 != '#' && var1 != '+' && var1 != '-') {
  16.          return Character.isLetterOrDigit(var1);
  17.       } else {
  18.          return true;
  19.       }
  20.    }
  21.  
  22.    private void readSpaces() throws Exception {
  23.       while(Character.isWhitespace(this.read())) {
  24.       }
  25.  
  26.       this.index += -1;
  27.    }
  28.  
  29.    private void reset() {
  30.       this.index = 0;
  31.    }
  32.  
  33.    protected boolean getCloseTag(String var1) {
  34.       try {
  35.          this.reset();
  36.          this.readSpaces();
  37.          if (this.read() != '<') {
  38.             return false;
  39.          } else {
  40.             this.readSpaces();
  41.             if (this.read() != '/') {
  42.                return false;
  43.             } else {
  44.                this.readSpaces();
  45.                String var2 = this.readIdentifier();
  46.                if (!var1.equalsIgnoreCase(var2)) {
  47.                   return false;
  48.                } else {
  49.                   this.readUntil('>');
  50.                   this.read();
  51.                   this.clear();
  52.                   return true;
  53.                }
  54.             }
  55.          }
  56.       } catch (Exception var3) {
  57.          return false;
  58.       }
  59.    }
  60.  
  61.    private char read() throws Exception {
  62.       while(this.index == this.length) {
  63.          int var1 = this.field_0.read(this.inBuf);
  64.          if (var1 <= 0) {
  65.             throw new Exception();
  66.          }
  67.  
  68.          if (this.length + var1 > this.buf.length) {
  69.             char[] var2 = new char[this.buf.length + this.inBuf.length];
  70.             System.arraycopy(this.buf, 0, var2, 0, this.length);
  71.             this.buf = var2;
  72.          }
  73.  
  74.          for(int var3 = 0; var3 < var1; ++this.length) {
  75.             this.buf[this.length] = (char)(this.inBuf[var3] & 255);
  76.             ++var3;
  77.          }
  78.       }
  79.  
  80.       return this.buf[this.index++];
  81.    }
  82.  
  83.    protected Hashtable getOpenTag(String var1) {
  84.       try {
  85.          this.reset();
  86.          this.readSpaces();
  87.          if (this.read() != '<') {
  88.             return null;
  89.          } else {
  90.             this.readSpaces();
  91.             String var2 = this.readIdentifier();
  92.             if (!var1.equalsIgnoreCase(var2)) {
  93.                return null;
  94.             } else {
  95.                this.readSpaces();
  96.  
  97.                Hashtable var3;
  98.                String var4;
  99.                String var5;
  100.                for(var3 = new Hashtable(); this.read() != '>'; var3.put(var4.toUpperCase(), var5)) {
  101.                   this.index += -1;
  102.                   this.clear();
  103.                   var4 = this.readIdentifier();
  104.                   var5 = "";
  105.                   this.readSpaces();
  106.                   if (this.read() == '=') {
  107.                      this.readSpaces();
  108.                      var5 = this.readIdentifier();
  109.                      this.readSpaces();
  110.                   }
  111.                }
  112.  
  113.                this.clear();
  114.                return var3;
  115.             }
  116.          }
  117.       } catch (Exception var6) {
  118.          return null;
  119.       }
  120.    }
  121.  
  122.    protected void getTagOrText() {
  123.       try {
  124.          this.reset();
  125.          this.readSpaces();
  126.          if (this.read() == '<') {
  127.             this.readUntil('>');
  128.             this.read();
  129.             this.clear();
  130.          } else {
  131.             this.getText();
  132.          }
  133.       } catch (Exception var1) {
  134.          this.getText();
  135.       }
  136.    }
  137.  
  138.    protected HtmlTokenizer(InputStream var1) {
  139.       this.field_0 = var1;
  140.    }
  141.  
  142.    protected boolean eof() {
  143.       this.reset();
  144.  
  145.       try {
  146.          this.read();
  147.          return false;
  148.       } catch (Exception var1) {
  149.          return true;
  150.       }
  151.    }
  152.  
  153.    private String makeMassagedString(char[] var1, int var2, int var3) {
  154.       char[] var4 = new char[var3];
  155.       int var5 = 0;
  156.  
  157.       for(int var6 = 0; var6 < var3; ++var6) {
  158.          char var7 = var1[var2 + var6];
  159.          if (var7 != '&') {
  160.             var4[var5++] = var7;
  161.          } else {
  162.             int var8 = -1;
  163.             char var9 = ' ';
  164.  
  165.             for(int var10 = var6 + 1; var8 < 0 && var10 < var3; ++var10) {
  166.                if (var1[var2 + var10] == ';') {
  167.                   String var11 = new String(var1, var2 + var6 + 1, var10 - var6 - 1);
  168.                   if (var11.equals("lt")) {
  169.                      var9 = '<';
  170.                      var8 = var10;
  171.                   } else if (var11.equals("gt")) {
  172.                      var9 = '>';
  173.                      var8 = var10;
  174.                   } else if (var11.equals("amp")) {
  175.                      var9 = '&';
  176.                      var8 = var10;
  177.                   } else if (var11.equals("quot")) {
  178.                      var9 = '"';
  179.                      var8 = var10;
  180.                   } else if (var11.equals("nbsp")) {
  181.                      var9 = 160;
  182.                      var8 = var10;
  183.                   } else if (var11.equals("copy")) {
  184.                      var9 = 169;
  185.                      var8 = var10;
  186.                   } else if (var11.equals("reg")) {
  187.                      var9 = 174;
  188.                      var8 = var10;
  189.                   } else if (var11.equals("Agrave")) {
  190.                      var9 = 192;
  191.                      var8 = var10;
  192.                   } else if (var11.equals("agrave")) {
  193.                      var9 = 224;
  194.                      var8 = var10;
  195.                   } else if (var11.equals("Aacute")) {
  196.                      var9 = 193;
  197.                      var8 = var10;
  198.                   } else if (var11.equals("aacute")) {
  199.                      var9 = 225;
  200.                      var8 = var10;
  201.                   } else if (var11.equals("Acirc")) {
  202.                      var9 = 194;
  203.                      var8 = var10;
  204.                   } else if (var11.equals("acirc")) {
  205.                      var9 = 194;
  206.                      var8 = var10;
  207.                   } else if (var11.equals("Atilde")) {
  208.                      var9 = 195;
  209.                      var8 = var10;
  210.                   } else if (var11.equals("atilde")) {
  211.                      var9 = 227;
  212.                      var8 = var10;
  213.                   } else if (var11.equals("Auml")) {
  214.                      var9 = 196;
  215.                      var8 = var10;
  216.                   } else if (var11.equals("auml")) {
  217.                      var9 = 228;
  218.                      var8 = var10;
  219.                   } else if (var11.equals("Aring")) {
  220.                      var9 = 197;
  221.                      var8 = var10;
  222.                   } else if (var11.equals("aring")) {
  223.                      var9 = 229;
  224.                      var8 = var10;
  225.                   } else if (var11.equals("Aelig")) {
  226.                      var9 = 198;
  227.                      var8 = var10;
  228.                   } else if (var11.equals("aelig")) {
  229.                      var9 = 230;
  230.                      var8 = var10;
  231.                   } else if (var11.equals("Ccedil")) {
  232.                      var9 = 199;
  233.                      var8 = var10;
  234.                   } else if (var11.equals("ccedil")) {
  235.                      var9 = 231;
  236.                      var8 = var10;
  237.                   } else if (var11.equals("Egrave")) {
  238.                      var9 = 200;
  239.                      var8 = var10;
  240.                   } else if (var11.equals("egrave")) {
  241.                      var9 = 232;
  242.                      var8 = var10;
  243.                   } else if (var11.equals("Eacute")) {
  244.                      var9 = 201;
  245.                      var8 = var10;
  246.                   } else if (var11.equals("eacute")) {
  247.                      var9 = 233;
  248.                      var8 = var10;
  249.                   } else if (var11.equals("Ecirc")) {
  250.                      var9 = 202;
  251.                      var8 = var10;
  252.                   } else if (var11.equals("ecirc")) {
  253.                      var9 = 234;
  254.                      var8 = var10;
  255.                   } else if (var11.equals("Euml")) {
  256.                      var9 = 203;
  257.                      var8 = var10;
  258.                   } else if (var11.equals("euml")) {
  259.                      var9 = 235;
  260.                      var8 = var10;
  261.                   } else if (var11.equals("Igrave")) {
  262.                      var9 = 204;
  263.                      var8 = var10;
  264.                   } else if (var11.equals("igrave")) {
  265.                      var9 = 236;
  266.                      var8 = var10;
  267.                   } else if (var11.equals("Iacute")) {
  268.                      var9 = 205;
  269.                      var8 = var10;
  270.                   } else if (var11.equals("iacute")) {
  271.                      var9 = 237;
  272.                      var8 = var10;
  273.                   } else if (var11.equals("Icirc")) {
  274.                      var9 = 206;
  275.                      var8 = var10;
  276.                   } else if (var11.equals("icirc")) {
  277.                      var9 = 238;
  278.                      var8 = var10;
  279.                   } else if (var11.equals("Iuml")) {
  280.                      var9 = 207;
  281.                      var8 = var10;
  282.                   } else if (var11.equals("iuml")) {
  283.                      var9 = 239;
  284.                      var8 = var10;
  285.                   } else if (var11.equals("ETH")) {
  286.                      var9 = 208;
  287.                      var8 = var10;
  288.                   } else if (var11.equals("eth")) {
  289.                      var9 = 240;
  290.                      var8 = var10;
  291.                   } else if (var11.equals("Ntilde")) {
  292.                      var9 = 209;
  293.                      var8 = var10;
  294.                   } else if (var11.equals("ntilde")) {
  295.                      var9 = 241;
  296.                      var8 = var10;
  297.                   } else if (var11.equals("Ograve")) {
  298.                      var9 = 210;
  299.                      var8 = var10;
  300.                   } else if (var11.equals("ograve")) {
  301.                      var9 = 242;
  302.                      var8 = var10;
  303.                   } else if (var11.equals("Oacute")) {
  304.                      var9 = 211;
  305.                      var8 = var10;
  306.                   } else if (var11.equals("oacute")) {
  307.                      var9 = 243;
  308.                      var8 = var10;
  309.                   } else if (var11.equals("Ocirc")) {
  310.                      var9 = 212;
  311.                      var8 = var10;
  312.                   } else if (var11.equals("ocirc")) {
  313.                      var9 = 244;
  314.                      var8 = var10;
  315.                   } else if (var11.equals("Otilde")) {
  316.                      var9 = 213;
  317.                      var8 = var10;
  318.                   } else if (var11.equals("otilde")) {
  319.                      var9 = 245;
  320.                      var8 = var10;
  321.                   } else if (var11.equals("Ouml")) {
  322.                      var9 = 214;
  323.                      var8 = var10;
  324.                   } else if (var11.equals("ouml")) {
  325.                      var9 = 246;
  326.                      var8 = var10;
  327.                   } else if (var11.equals("Oslash")) {
  328.                      var9 = 216;
  329.                      var8 = var10;
  330.                   } else if (var11.equals("oslash")) {
  331.                      var9 = 248;
  332.                      var8 = var10;
  333.                   } else if (var11.equals("Ugrave")) {
  334.                      var9 = 217;
  335.                      var8 = var10;
  336.                   } else if (var11.equals("ugrave")) {
  337.                      var9 = 249;
  338.                      var8 = var10;
  339.                   } else if (var11.equals("Uacute")) {
  340.                      var9 = 218;
  341.                      var8 = var10;
  342.                   } else if (var11.equals("uacute")) {
  343.                      var9 = 250;
  344.                      var8 = var10;
  345.                   } else if (var11.equals("Ucirc")) {
  346.                      var9 = 219;
  347.                      var8 = var10;
  348.                   } else if (var11.equals("ucirc")) {
  349.                      var9 = 251;
  350.                      var8 = var10;
  351.                   } else if (var11.equals("Uuml")) {
  352.                      var9 = 220;
  353.                      var8 = var10;
  354.                   } else if (var11.equals("uuml")) {
  355.                      var9 = 252;
  356.                      var8 = var10;
  357.                   } else if (var11.equals("Yacute")) {
  358.                      var9 = 221;
  359.                      var8 = var10;
  360.                   } else if (var11.equals("uacute")) {
  361.                      var9 = 253;
  362.                      var8 = var10;
  363.                   } else if (var11.equals("THORN")) {
  364.                      var9 = 222;
  365.                      var8 = var10;
  366.                   } else if (var11.equals("thorn")) {
  367.                      var9 = 254;
  368.                      var8 = var10;
  369.                   } else if (var11.equals("szlig")) {
  370.                      var9 = 223;
  371.                      var8 = var10;
  372.                   } else if (var11.equals("uuml")) {
  373.                      var9 = 255;
  374.                      var8 = var10;
  375.                   } else if (var11.charAt(0) == '#') {
  376.                      int var12 = 0;
  377.  
  378.                      for(int var13 = var6 + 2; var13 < var10; ++var13) {
  379.                         var12 = 10 * var12 + var1[var2 + var13] - 48;
  380.                         var8 = var10;
  381.                      }
  382.  
  383.                      var9 = (char)var12;
  384.                   }
  385.                }
  386.             }
  387.  
  388.             if (var8 > 0) {
  389.                var4[var5++] = var9;
  390.                var6 = var8;
  391.             } else {
  392.                var4[var5++] = var7;
  393.             }
  394.          }
  395.       }
  396.  
  397.       return new String(var4, 0, var5);
  398.    }
  399.  
  400.    protected String getPreformattedText(String var1) {
  401.       this.reset();
  402.  
  403.       try {
  404.          while(!this.endOfPreformattedText(var1)) {
  405.             this.read();
  406.          }
  407.       } catch (Exception var3) {
  408.       }
  409.  
  410.       this.index -= 1 + var1.length();
  411.       if (this.index == 0) {
  412.          return null;
  413.       } else {
  414.          String var2 = this.makeMassagedString(this.buf, 0, this.index);
  415.          this.clear();
  416.          return var2;
  417.       }
  418.    }
  419.  
  420.    private boolean endOfPreformattedText(String var1) {
  421.       if (this.index < 1 + var1.length()) {
  422.          return false;
  423.       } else if (this.buf[this.index - 1 - var1.length()] != '<') {
  424.          return false;
  425.       } else {
  426.          for(int var2 = 0; var2 < var1.length(); ++var2) {
  427.             if (var1.charAt(var2) != this.buf[this.index - var1.length() + var2]) {
  428.                return false;
  429.             }
  430.          }
  431.  
  432.          return true;
  433.       }
  434.    }
  435.  
  436.    private void clear() {
  437.       int var1 = 0;
  438.  
  439.       for(int var2 = this.index; var2 < this.length; ++var2) {
  440.          this.buf[var1] = this.buf[var2];
  441.          ++var1;
  442.       }
  443.  
  444.       this.length -= this.index;
  445.       this.index = 0;
  446.    }
  447.  
  448.    private String readIdentifier() throws Exception {
  449.       int var1 = this.index;
  450.       char var2 = this.read();
  451.       if (var2 == '"') {
  452.          String var3 = this.readUntil(var2);
  453.          this.read();
  454.          return var3;
  455.       } else if (this.identifierChar(var2)) {
  456.          while(this.identifierChar(this.read())) {
  457.          }
  458.  
  459.          this.index += -1;
  460.          return new String(this.buf, var1, this.index - var1);
  461.       } else {
  462.          while(!this.identifierChar(this.read())) {
  463.          }
  464.  
  465.          this.index += -1;
  466.          return new String(this.buf, var1, 1);
  467.       }
  468.    }
  469.  
  470.    private String readUntil(char var1) {
  471.       int var2 = this.index;
  472.  
  473.       try {
  474.          while(this.read() != var1) {
  475.          }
  476.  
  477.          this.index += -1;
  478.       } catch (Exception var4) {
  479.       }
  480.  
  481.       for(int var3 = var2; var3 < this.index; ++var3) {
  482.          if (Character.isWhitespace(this.buf[var3])) {
  483.             this.buf[var3] = ' ';
  484.          }
  485.       }
  486.  
  487.       return this.makeMassagedString(this.buf, var2, this.index - var2);
  488.    }
  489.  
  490.    protected String getText() {
  491.       try {
  492.          this.reset();
  493.          if (this.read() == '<') {
  494.             return null;
  495.          } else {
  496.             this.index += -1;
  497.             String var1 = this.readUntil('<');
  498.             this.clear();
  499.             return var1;
  500.          }
  501.       } catch (Exception var2) {
  502.          return null;
  503.       }
  504.    }
  505. }
  506.