home *** CD-ROM | disk | FTP | other *** search
Java Source | 1999-05-18 | 13.4 KB | 492 lines |
- /*
- * (C) Copyright IBM Corp. 1998 All rights reserved.
- *
- * US Government Users Restricted Rights Use, duplication or
- * disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
- *
- * The program is provided "as is" without any warranty express or
- * implied, including the warranty of non-infringement and the implied
- * warranties of merchantibility and fitness for a particular purpose.
- * IBM will not be liable for any damages suffered by you as a result
- * of using the Program. In no event will IBM be liable for any
- * special, indirect or consequential damages or lost profits even if
- * IBM has been advised of the possibility of their occurrence. IBM
- * will not be liable for any third party claims against you.
- */
-
- package com.ibm.texml;
-
- import java.io.*;
- import java.util.Hashtable;
- import org.w3c.dom.*;
-
- /**
- This class provides methods to convert a DOM
- conforming to TeXML.dtd into actual TeX source.
- */
- public class TeXML
- {
- public static final String ATTR_Begin = "begin";
- public static final String ATTR_Catcode = "cat";
- public static final String ATTR_Char = "ch";
- public static final String ATTR_Close = "close";
- public static final String ATTR_End = "end";
- public static final String ATTR_Linebreaks = "linebreaks";
- public static final String ATTR_Name = "name";
- public static final String ATTR_Open = "open";
- public static final String EL_Command = "cmd";
- public static final String EL_Control = "ctrl";
- public static final String EL_Environment = "env";
- public static final String EL_Group = "group";
- public static final String EL_Opt = "opt";
- public static final String EL_Parm = "parm";
- public static final String EL_Special = "spec";
- public static final String EL_TeXML = "texml";
- public static final String EL_Verb = "verb";
- public static final String ENTITY_AMP = "amp";
- public static final String ENTITY_LT = "lt";
- public static final String ENTITY_GT = "gt";
- public static final String V_ActiveSpace = "tilde";
- public static final String V_Alignment = "align";
- public static final String V_BGroup = "bg";
- public static final String V_Comment = "comment";
- public static final String V_EGroup = "eg";
- public static final String V_Escape = "esc";
- public static final String V_MathShift = "mshift";
- public static final String V_None = "none";
- public static final String V_Parameter = "parm";
- public static final String V_Subscript = "sub";
- public static final String V_Superscript = "sup";
-
- Hashtable handlers;
- Hashtable escapes;
- Hashtable specials;
-
- boolean inVerbatim = false;
- boolean verbatimBreaks = true;
-
- /**
- Create the translator.
- */
- public TeXML()
- {
- handlers = new Hashtable();
- handlers.put(EL_Command, ehCommand);
- handlers.put(EL_Control, ehControl);
- handlers.put(EL_Environment, ehEnvironment);
- handlers.put(EL_Group, ehParm);
- handlers.put(EL_Opt, ehOpt);
- handlers.put(EL_Parm, ehParm);
- handlers.put(EL_Special, ehSpecial);
- handlers.put(EL_Verb, ehVerb);
-
- escapes = new Hashtable();
- escapes.put(new Character('%'), new String("\\%{}"));
- escapes.put(new Character('&'), new String("\\&{}"));
- escapes.put(new Character('{'), new String("\\{"));
- escapes.put(new Character('}'), new String("\\}"));
- escapes.put(new Character('\\'), new String("$\\backslash$"));
- escapes.put(new Character('$'), new String("\\${}"));
- escapes.put(new Character('#'), new String("\\#{}"));
- escapes.put(new Character('_'), new String("\\_{}"));
- escapes.put(new Character('^'), new String("\\char`\\^{}"));
- escapes.put(new Character('~'), new String("\\char`\\~{}"));
- escapes.put(new Character('<'), new String("$<$"));
- escapes.put(new Character('>'), new String("$>$"));
- escapes.put(new Character('|'), new String("$|$"));
-
- specials = new Hashtable();
- specials.put(V_ActiveSpace, "~");
- specials.put(V_Alignment, "&");
- specials.put(V_BGroup, "{");
- specials.put(V_Comment, "%");
- specials.put(V_EGroup, "}");
- specials.put(V_Escape, "\\");
- specials.put(V_MathShift, "$");
- specials.put(V_Parameter, "#");
- specials.put(V_Subscript, "_");
- specials.put(V_Superscript, "^");
- }
-
- /**
- Need to track line separators to make sure we output
- only one in succession.
- */
- private class OutputAutomata
- {
- boolean sawLineBreak;
- PrintWriter pout;
-
- OutputAutomata(OutputStream ostream)
- {
- sawLineBreak = false;
- pout = new PrintWriter(ostream);
- }
-
- protected void finalize()
- {
- pout.flush();
- }
-
- void output(char ch)
- {
- if (ch == '\n')
- {
- if (!sawLineBreak)
- {
- pout.print(ch);
- sawLineBreak = true;
- }
- }
- else
- {
- pout.print(ch);
- sawLineBreak = false;
- }
- }
-
- /**
- Caller certifes that string contains no line breaks.
- */
- void outputClean(String s)
- {
- pout.print(s);
- sawLineBreak = false;
- }
-
- /**
- Output string one character at a time, filtering for line breaks.
- */
- void output(String s)
- {
- for (int i = 0; i < s.length(); ++i)
- {
- output(s.charAt(i));
- }
- }
-
- void flush()
- {
- pout.flush();
- }
- }
-
- private interface ElementHandler
- {
- public void processElement(Element n, OutputAutomata pout);
- }
-
- private void delimitedElement(Element n, OutputAutomata pout,
- String open, String close)
- {
- String b = n.getAttribute(ATTR_Open);
- if (b.length() == 0)
- {
- b = open;
- }
- String e = n.getAttribute(ATTR_Close);
- if (e.length() == 0)
- {
- e = close;
- }
- pout.output(b);
- processChildren(n, pout);
- pout.output(e);
- };
-
- private ElementHandler ehOpt = new ElementHandler()
- {
- public void processElement(Element n, OutputAutomata pout)
- {
- delimitedElement(n, pout, "[", "]");
- }
- };
-
- private ElementHandler ehParm = new ElementHandler()
- {
- public void processElement(Element n, OutputAutomata pout)
- {
- delimitedElement(n, pout, "{", "}");
- }
- };
-
- /**
- Return true if there are option children.
- */
- private boolean hasOptions(NodeList nl)
- {
- int i = 0;
- boolean hasOpt = false;
- while(i < nl.getLength() && !hasOpt)
- {
- Node child = nl.item(i);
- hasOpt = child.getNodeType() == Node.ELEMENT_NODE &&
- ((Element)child).getTagName().equalsIgnoreCase(EL_Opt);
- ++i;
- }
- return hasOpt;
- }
-
- /**
- Return true if there are parameter children.
- */
- private boolean hasParameters(NodeList nl)
- {
- int i = 0;
- boolean hasParm = false;
- while(i < nl.getLength() && !hasParm)
- {
- Node child = nl.item(i);
- hasParm = child.getNodeType() == Node.ELEMENT_NODE &&
- ((Element)child).getTagName().equalsIgnoreCase(EL_Parm);
- ++i;
- }
- return hasParm;
- }
-
- /**
- Process command elements.
- */
- private ElementHandler ehCommand = new ElementHandler()
- {
- public void processElement(Element n, OutputAutomata pout)
- {
- String s = n.getAttribute(ATTR_Name);
- pout.outputClean("\\"+s);
- NodeList nl = n.getChildNodes();
- if (hasParameters(nl) || hasOptions(nl))
- {
- processChildren(n, pout);
- }
- else
- {
- //ensure the command is terminated
- pout.output(' ');
- }
- }
- };
-
- /**
- Process control elements.
- */
- private ElementHandler ehControl = new ElementHandler()
- {
- public void processElement(Element n, OutputAutomata pout)
- {
- String s = n.getAttribute(ATTR_Char);
- pout.outputClean("\\"+s);
- }
- };
-
- /**
- Process environment elements.
- */
- private ElementHandler ehEnvironment = new ElementHandler()
- {
- public void processElement(Element n, OutputAutomata pout)
- {
- String name = n.getAttribute(ATTR_Name);
- String begin = n.getAttribute(ATTR_Begin);
- if (begin.length() == 0)
- {
- begin = "begin";
- }
- String end = n.getAttribute(ATTR_End);
- if (end.length() == 0)
- {
- end = "end";
- }
- pout.outputClean("\\"+begin+"{"+name+"}");
- processChildren(n, pout);
- pout.outputClean("\\"+end+"{"+name+"}");
- }
- };
-
- /**
- No-op for option and parameter elements found out of the context of a command.
- */
- private ElementHandler ehNop = new ElementHandler()
- {
- public void processElement(Element n, OutputAutomata pout)
- {
- System.err.println("Element "+n.getTagName()+" occurred out of context.");
- }
- };
-
- /**
- Process special elements.
- */
- private ElementHandler ehSpecial = new ElementHandler()
- {
- public void processElement(Element n, OutputAutomata pout)
- {
- String s = n.getAttribute(ATTR_Catcode);
- String spec = (String)specials.get(s);
- if (spec == null)
- {
- System.err.println("Unknown special attribute, "+s);
- }
- else
- {
- pout.outputClean(spec);
- }
- }
- };
-
- /**
- Process the verbatim element
- */
- private ElementHandler ehVerb = new ElementHandler()
- {
- public void processElement(Element n, OutputAutomata pout)
- {
- inVerbatim = true;
- if (n.getAttribute(ATTR_Linebreaks).equals(V_None))
- {
- verbatimBreaks = false;
- }
- processChildren(n, pout);
- inVerbatim = false;
- verbatimBreaks = true;
- }
- };
-
- private void processTeXMLElement(Element el, OutputAutomata pout)
- {
- ElementHandler eh = (ElementHandler)handlers.get(el.getTagName());
- if (eh != null)
- {
- eh.processElement(el, pout);
- }
- else
- {
- System.out.println("Unrecognized DOM element element name, "+
- el.getTagName());
- }
- }
-
- private void outputCharacter(char ch, OutputAutomata pout)
- {
- Character tch = new Character(ch);
- String escape = (String)escapes.get(tch);
- if (escape != null)
- {
- // write escaped character
- pout.outputClean(escape);
- }
- else
- {
- // write normal character
- pout.output(ch);
- }
- }
-
- /**
- Copy text to pout.
- Escape specials.
- Eliminate multiple newlines.
- */
- private void processTeXMLText(CharacterData t, OutputAutomata pout)
- {
- String text = t.getData();
- if (inVerbatim)
- {
- if (!verbatimBreaks)
- {
- text = text.replace('\n',' ');
- }
- pout.outputClean(text);
- }
- else
- {
- for(int ofs = 0; ofs < text.length(); ++ofs)
- {
- outputCharacter(text.charAt(ofs), pout);
- }
- }
- }
-
- private void processChildren(Node n, OutputAutomata pout)
- {
- NodeList nl = n.getChildNodes();
- for (int i = 0; i < nl.getLength(); ++i)
- {
- Node child = nl.item(i);
- int type = child.getNodeType();
- if (type == Node.ELEMENT_NODE)
- {
- processTeXMLElement((Element)child, pout);
- }
- else if (type == Node.TEXT_NODE || type == Node.CDATA_SECTION_NODE)
- {
- processTeXMLText((Text)child, pout);
- }
- else if (type == Node.ENTITY_REFERENCE_NODE)
- {
- String name = child.getNodeName();
- if (name.equalsIgnoreCase(ENTITY_AMP))
- {
- outputCharacter('&', pout);
- }
- else if (name.equalsIgnoreCase(ENTITY_LT))
- {
- outputCharacter('<', pout);
- }
- else if (name.equalsIgnoreCase(ENTITY_GT))
- {
- outputCharacter('>', pout);
- }
- }
- else
- {
- System.out.print("Failed to handle node type, "+type);
- System.out.print(", name "+child.getNodeName());
- System.out.println(", value "+child.getNodeValue());
- }
- }
- }
-
- /**
- Translate "TeXML" node and children to TeX on the output stream.
- This method may be called multiple times, with multiple DOM's,
- on a single instance.
-
- @param texML the root "TeXML" node of the TeXML DOM.
- @param ostream the destination of the TeX output.
- */
- public void processTeXML(Element texML, OutputStream ostream)
- {
- OutputAutomata pout = new OutputAutomata(ostream);
- if (texML.getTagName().equalsIgnoreCase(EL_TeXML))
- {
- processChildren(texML, pout);
- }
- else
- {
- System.out.println("Unrecognized DOM root element name, "+
- texML.getTagName());
- System.out.println(EL_TeXML+" expected.");
- }
- pout.flush();
- }
-
- /**
- Translate TeXML document to tex.
- This method may be called multiple times, with multiple DOM's,
- on a single instance.
-
- @param texML the document node of the TeXML DOM.
- @param tex the destination file handle.
- */
- public void processTeXML(Document texML, OutputStream ostream)
- {
- DocumentType dtd = texML.getDoctype();
- Element texRoot = texML.getDocumentElement();
- if (dtd != null)
- {
- System.out.println("DTD is "+dtd.getName());
- }
- processTeXML(texRoot, ostream);
- }
- }
-