home *** CD-ROM | disk | FTP | other *** search
- import java.io.InputStream;
- import java.net.URL;
- import java.util.EmptyStackException;
- import java.util.Hashtable;
- import java.util.Stack;
-
- public class JDPHtmlParser {
- final boolean debug;
- static Hashtable ampChars = new Hashtable();
- byte[] output;
- byte[] input;
- int inputLength;
- int inputSeek;
- int lineCount = 1;
- JDPHtmlDoc html;
- private String delim = "> =";
- Stack tagStack = new Stack();
- boolean preFormatted;
- private static JDPHtmlTag FORMtag;
- private static JDPHtmlTag INPUTtag;
- private static JDPHtmlTag HRtag;
-
- final int nextChar() {
- if (this.inputSeek >= this.inputLength) {
- return -1;
- } else {
- byte var1 = this.input[this.inputSeek++];
- switch (var1) {
- case 10:
- ++this.lineCount;
- case 11:
- case 12:
- default:
- break;
- case 13:
- if (this.inputSeek >= this.inputLength) {
- ++this.lineCount;
- return 10;
- }
-
- var1 = this.input[this.inputSeek++];
- if (var1 != 10) {
- this.inputSeek += -1;
- var1 = 10;
- }
-
- if (var1 == 10) {
- ++this.lineCount;
- }
- }
-
- return var1;
- }
- }
-
- void parseAttributes(JDPHtmlTagRef var1) {
- while(true) {
- this.skipWhiteSpace();
- int var2 = this.inputSeek;
-
- int var3;
- while((var3 = this.nextChar()) != -1 && var3 != 62 && var3 != 32 && var3 != 61 && var3 != 10 && var3 != 9) {
- }
-
- this.pushBack();
- String var4;
- if (var2 == this.inputSeek) {
- var4 = null;
- } else {
- var4 = this.makeLowerCaseString(this.input, var2, this.inputSeek - var2);
- }
-
- this.skipWhiteSpace();
- if (var4 == null) {
- return;
- }
-
- String var5;
- if (this.peekChar() == 61) {
- this.nextChar();
- this.skipWhiteSpace();
- var3 = this.nextChar();
- if (var3 != 39 && var3 != 34) {
- this.pushBack();
- var2 = this.inputSeek;
- boolean var11 = false;
-
- while((var3 = this.nextChar()) != -1 && var3 != 32 && var3 != 9 && var3 != 10 && var3 != 62) {
- }
- } else {
- int var6 = var3;
- var2 = this.inputSeek;
- boolean var7 = false;
-
- while((var3 = this.nextChar()) != -1 && var3 != var6 && var3 != 62) {
- }
- }
-
- if (var3 == -1) {
- this.warning("unexpected EOF");
- return;
- }
-
- var5 = new String(this.input, 0, var2, this.inputSeek - var2 - 1);
- if (var3 == 62) {
- this.pushBack();
- }
- } else {
- var5 = new String("true");
- }
-
- if (var1 != null) {
- var1.addAttribute(var4, var5);
- }
- }
- }
-
- void parse() {
- int var1 = 0;
- JDPHtmlTag var3 = null;
- boolean var4 = false;
- boolean var5 = false;
-
- int var2;
- label141:
- while((var2 = this.nextChar()) != -1) {
- switch (var2) {
- case 9:
- case 10:
- if (!this.preFormatted) {
- var2 = 32;
- }
- case 32:
- if (!this.preFormatted && (!var5 || var4)) {
- continue;
- }
-
- var4 = true;
- break;
- case 38:
- if (this.peekChar() == 35) {
- var2 = this.parseCharacter();
- } else if ((var2 = this.parseEntity()) == -1) {
- continue;
- }
-
- if (var2 == 145) {
- var2 = 39;
- }
-
- if (var2 == 146) {
- var2 = 39;
- }
-
- if (var2 == 133) {
- this.input[var1++] = 46;
- this.input[var1++] = 46;
- var2 = 46;
- }
-
- var4 = false;
- break;
- case 60:
- boolean var6;
- if (this.peekChar() == 47) {
- this.nextChar();
- var6 = true;
- } else {
- var6 = false;
- }
-
- String var7 = this.parseTagName();
- if (var7 != null) {
- JDPHtmlTag var9 = JDPHtmlTag.lookup(var7);
- JDPHtmlTagRef var8 = null;
- if (this.handleTag(var9, var6, var1)) {
- if (!var6) {
- var8 = this.html.startTag(var9, var1);
- } else {
- var8 = this.html.endTag(var9, var1);
- }
-
- var3 = var9;
- if (var9.breaks) {
- var5 = false;
- }
- }
-
- if (!var6) {
- this.parseAttributes(var8);
- } else {
- this.skipUntil(62);
- }
-
- if (this.nextChar() != 62) {
- this.warning("Malformed tag: " + var3);
- }
-
- if (var8 != null && var8.tag.id == 26) {
- String var10 = var8.getAttribute("prompt");
- int var11 = var1;
- JDPHtmlTagRef var12 = this.html.startTag(FORMtag, var1);
- String var13 = var8.getAttribute("action");
- if (var13 != null) {
- var12.addAttribute("action", var13);
- }
-
- this.html.startTag(HRtag, var1);
- if (var10 != null) {
- for(int var24 = 0; var24 < var10.length(); ++var24) {
- this.input[var1++] = (byte)var10.charAt(var24);
- }
- } else {
- var10 = "This is a searchable index. Enter search keywords: ";
- this.output = new byte[this.input.length + var10.length() + 1];
- this.inputLength += var10.length() + 1;
- System.arraycopy(this.input, 0, this.output, 0, var1);
-
- for(int var14 = 0; var14 < var10.length(); ++var14) {
- this.output[var1++] = (byte)var10.charAt(var14);
- }
-
- System.arraycopy(this.input, var11 - 1, this.output, var1, this.input.length - var11);
- this.inputSeek += var10.length() + 1;
- this.input = this.output;
- }
-
- var12 = this.html.startTag(INPUTtag, var1);
- var12.addAttribute("name", "isindex");
- this.html.endTag(HRtag, var1);
- this.html.endTag(FORMtag, var1);
- }
-
- if (!var6 && var9.hasEndTag && var9.breaks && this.peekChar() == 10) {
- this.nextChar();
- }
-
- if (var3 != null && var3.id == 43) {
- var1 = this.inputLength - this.inputSeek;
- System.arraycopy(this.input, this.inputSeek, this.input, 0, var1);
- break label141;
- }
-
- var4 = false;
- continue;
- }
-
- var2 = 60;
- break;
- default:
- var4 = false;
- }
-
- var3 = null;
- var5 = true;
- this.input[var1++] = (byte)var2;
- }
-
- if (this.tagStack.size() != 0) {
- String var16 = "Missing ";
- int var19 = this.tagStack.size();
-
- while(true) {
- --var19;
- if (var19 <= 1) {
- JDPHtmlTag var21 = (JDPHtmlTag)this.tagStack.pop();
- this.html.endTag(var21, var1);
- var16 = var16 + "</" + var21.name + ">";
- this.warning(var16 + " at end of document.\n");
- break;
- }
-
- JDPHtmlTag var20 = (JDPHtmlTag)this.tagStack.pop();
- this.html.endTag(var20, var1);
- var16 = var16 + "</" + var20.name + ">, ";
- }
- }
-
- this.tagStack = null;
- if (this.input.length != var1) {
- byte[] var18 = new byte[var1];
- System.arraycopy(this.input, 0, var18, 0, var1);
- this.input = var18;
- }
-
- this.html.setText(this.input);
- }
-
- String makeLowerCaseString(byte[] var1, int var2, int var3) {
- return (new String(var1, 0, var2, var3)).toLowerCase();
- }
-
- final boolean isDigit(int var1) {
- return var1 >= 48 && var1 <= 57;
- }
-
- final void warning(String var1) {
- }
-
- boolean handleTag(JDPHtmlTag var1, boolean var2, int var3) {
- if (var2) {
- try {
- JDPHtmlTag var4 = (JDPHtmlTag)this.tagStack.peek();
- if (var4 != var1) {
- if (this.tagStack.search(var1) == -1) {
- this.warning("Ignoring tag: </" + var1.name + ">");
- return false;
- }
-
- while(true) {
- JDPHtmlTag var5 = (JDPHtmlTag)this.tagStack.pop();
- if (var5 == var1) {
- break;
- }
-
- this.warning("Missing </" + var5.name + "> just noticed by </" + var1.name + ">");
- this.html.endTag(var5, var3);
- }
- } else {
- if (var1.id == 33) {
- this.preFormatted = false;
- }
-
- this.tagStack.pop();
- }
- } catch (EmptyStackException var6) {
- this.warning("Ignoring tag: </" + var1.name + ">");
- return false;
- }
- } else if (var1.hasEndTag) {
- if (var1.id == 33) {
- this.preFormatted = true;
- }
-
- this.tagStack.push(var1);
- }
-
- return true;
- }
-
- final int peekChar() {
- return this.inputSeek >= this.inputLength ? -1 : this.input[this.inputSeek];
- }
-
- final boolean isLetter(int var1) {
- return var1 >= 65 && var1 <= 90 || var1 >= 97 && var1 <= 122;
- }
-
- public JDPHtmlParser(InputStream var1, JDPHtmlDoc var2) {
- this.readInput(var1);
- this.html = var2;
-
- try {
- this.parse();
- } catch (Exception var5) {
- this.warning("Caught exception while parsing\n");
- ((Throwable)var5).printStackTrace();
- }
- }
-
- final void skipWhiteSpace() {
- int var1;
- while((var1 = this.nextChar()) == 32 || var1 == 10 || var1 == 9) {
- }
-
- this.pushBack();
- }
-
- final boolean isWhiteSpace(int var1) {
- return var1 == 32 || var1 == 9 || var1 == 10;
- }
-
- private void readInput(InputStream var1) {
- this.input = new byte[16384];
- this.output = this.input;
- this.inputLength = 0;
-
- try {
- int var2;
- while((var2 = var1.read(this.input, this.inputLength, this.input.length - this.inputLength)) >= 0) {
- this.inputLength += var2;
- if (this.inputLength == this.input.length) {
- byte[] var3 = new byte[this.inputLength * 2];
- System.arraycopy(this.input, 0, var3, 0, this.inputLength);
- this.input = var3;
- }
- }
-
- } catch (Exception var4) {
- }
- }
-
- void insistThat(boolean var1) {
- if (!var1) {
- }
-
- }
-
- public static void main(String[] var0) {
- try {
- URL var1 = new URL((URL)null, var0[0]);
- new JDPHtmlParser(var1.openStream(), new JDPHtmlDoc());
- } catch (Exception var3) {
- ((Throwable)var3).printStackTrace();
- }
- }
-
- void skipUntil(int var1) {
- int var2;
- while((var2 = this.nextChar()) != var1 && var2 != -1) {
- }
-
- this.pushBack();
- }
-
- static {
- ampChars.put("lt", new Character('<'));
- ampChars.put("gt", new Character('>'));
- ampChars.put("amp", new Character('&'));
- ampChars.put("quot", new Character('"'));
- ampChars.put("nbsp", new Character(' '));
- ampChars.put("shy", new Character('-'));
- FORMtag = JDPHtmlTag.lookup("form");
- INPUTtag = JDPHtmlTag.lookup("input");
- HRtag = JDPHtmlTag.lookup("hr");
- }
-
- final boolean isTagChar(int var1) {
- return this.isLetter(var1) || this.isDigit(var1) || var1 == 46 || var1 == 45;
- }
-
- int parseCharacter() {
- int var1 = 0;
- this.insistThat(this.nextChar() == 35);
-
- int var2;
- while(this.isDigit(var2 = this.nextChar())) {
- var1 = var1 * 10 + var2 - 48;
- }
-
- if (var2 != 59) {
- this.pushBack();
- }
-
- return var1;
- }
-
- final void pushBack() {
- if (this.input[this.inputSeek += -1] == 10) {
- this.lineCount += -1;
- }
-
- }
-
- int parseEntity() {
- int var3 = this.inputSeek;
- if (!this.isLetter(this.peekChar())) {
- return 38;
- } else {
- int var4;
- while((var4 = this.nextChar()) != -1 && this.isLetter(var4)) {
- }
-
- int var5 = var4;
- String var2 = new String(this.input, 0, var3, this.inputSeek - var3 - 1);
- Character var1;
- if ((var1 = (Character)ampChars.get(var2)) != null) {
- var4 = var1;
- } else {
- var2 = var2.toLowerCase();
- if ((var1 = (Character)ampChars.get(var2)) != null) {
- var4 = var1;
- } else {
- this.warning("Warning: failed to find: &" + var2);
- var4 = -1;
- }
- }
-
- if (var5 != 59) {
- this.pushBack();
- }
-
- return var4;
- }
- }
-
- String parseTagName() {
- int var2 = this.inputSeek;
- int var1;
- if ((var1 = this.nextChar()) == 33) {
- if (this.nextChar() == 45) {
- while(this.nextChar() != 45) {
- }
-
- while(this.nextChar() != 45) {
- }
-
- while(this.nextChar() != 45) {
- }
- } else {
- while(this.nextChar() != 62) {
- }
- }
-
- return "<comment>";
- } else if (!this.isTagChar(var1)) {
- this.pushBack();
- return null;
- } else {
- while((var1 = this.nextChar()) != -1 && this.isTagChar(var1)) {
- }
-
- this.pushBack();
- if (this.inputSeek - var2 == 0) {
- return null;
- } else {
- if (this.input[this.inputSeek] == 10) {
- this.inputSeek += -1;
- }
-
- return this.makeLowerCaseString(this.input, var2, this.inputSeek - var2);
- }
- }
- }
- }
-