The HTMLTokenizer Class of the com.ms.util package parses an HTML version 3.2 document. The parser does not interpret any HTML tags, except for comments and the <PRE> tag.
public class HTMLTokenizer { // Fields public Hashtable attrs; public String tag; public String text; public static final int TT_BEGIN_TAG; public static final int TT_COMMENT; public static final int TT_END_TAG; public static final int TT_TEXT; public int type; // Constructors public HTMLTokenizer(InputStream isin); // Methods public boolean hasMoreTokens(); public void mark(int readLimit) throws Ioexception; public int nextToken()throws ParseException, IOException; public void reset() throws IOException; public String toString(); }