home *** CD-ROM | disk | FTP | other *** search
Wrap
Text File | 1997-10-04 | 80.8 KB | 1,282 lines
<!DOCTYPE ARTICLE PUBLIC "+//Silmaril//DTD DocBook V3.0-Based Extension With HTML Forms//EN" "http://www.ucc.ie/dtds/dbhtform.dtd" [ <!ENTITY TeX CDATA "TeX"> <!ENTITY amp CDATA "&"> <!ENTITY apos CDATA "Æ"> <!ENTITY dollar CDATA "$"> <!ENTITY gt CDATA ">"> <!ENTITY hellip CDATA " . . . "> <!ENTITY lt CDATA "<"> <!ENTITY para CDATA "╢"> <!ENTITY pi CDATA "╥"> <!ENTITY plusmn CDATA "▒"> <!ENTITY sect CDATA "º"> <!ENTITY trade CDATA "TM"> <!ENTITY xmlfiles PUBLIC "+//Silmaril//NONSGML The XML Files//EN" "head-xmlfiles.gif" NDATA GIF> <!ENTITY xmllogo PUBLIC "+//Silmaril//NONSGML XML Logo//EN" "xml.gif" NDATA GIF > ]> <?ATTLINK ULINK URL URI> <ARTICLE><ARTHEADER><TITLE>Frequently Asked Questions about the Extensible Markup Language</TITLE> <TITLEABBREV>The XML FAQ <INLINEGRAPHIC FILEREF="xml.gif" FORMAT="GIF" XREFLABEL="[logo]"></INLINEGRAPHIC>&xmllogo;</TITLEABBREV> <ISSUENUM>1.1 (1 October 1997)</ISSUENUM> <BIBLIOSET><BIBLIOMISC>Maintained on behalf of the World Wide Web Consortium's XML Special Interest Group by</BIBLIOMISC> <AUTHORGROUP><EDITOR ID="faq-editor"><FIRSTNAME>Peter</FIRSTNAME> <SURNAME>Flynn</SURNAME> <AFFILIATION><ORGNAME>Silmaril Consultants</ORGNAME></AFFILIATION></EDITOR></AUTHORGROUP> <BIBLIOMISC>with the collaboration of</BIBLIOMISC> <AUTHORGROUP><OTHERCREDIT><FIRSTNAME>Terry</FIRSTNAME> <SURNAME>Allen</SURNAME> <AFFILIATION><ORGNAME></ORGNAME></AFFILIATION></OTHERCREDIT> <OTHERCREDIT><FIRSTNAME>Tom</FIRSTNAME> <SURNAME>Borgman</SURNAME> <AFFILIATION><ORGNAME>Harlequin Ltd</ORGNAME></AFFILIATION></OTHERCREDIT> <OTHERCREDIT><FIRSTNAME>Tim</FIRSTNAME> <SURNAME>Bray</SURNAME> <AFFILIATION><ORGNAME>Textuality, Inc</ORGNAME></AFFILIATION></OTHERCREDIT> <OTHERCREDIT><FIRSTNAME>Robin</FIRSTNAME> <SURNAME>Cover</SURNAME> <AFFILIATION><ORGNAME>Summer Institute of Linguistics</ORGNAME></AFFILIATION></OTHERCREDIT> <OTHERCREDIT><FIRSTNAME>Christopher</FIRSTNAME> <SURNAME>Maden</SURNAME> <AFFILIATION><ORGNAME>O'Reilly & Associates</ORGNAME></AFFILIATION></OTHERCREDIT> <OTHERCREDIT><FIRSTNAME>Eve</FIRSTNAME> <SURNAME>Maler</SURNAME> <AFFILIATION><ORGNAME>Arbortext, Inc</ORGNAME></AFFILIATION></OTHERCREDIT> <OTHERCREDIT><FIRSTNAME>Peter</FIRSTNAME> <SURNAME>Murray-Rust</SURNAME> <AFFILIATION><ORGNAME>Nottingham University</ORGNAME></AFFILIATION></OTHERCREDIT> <OTHERCREDIT><FIRSTNAME>Liam</FIRSTNAME> <SURNAME>Quin</SURNAME> <AFFILIATION><ORGNAME></ORGNAME></AFFILIATION></OTHERCREDIT> <OTHERCREDIT><FIRSTNAME>Michael</FIRSTNAME> <SURNAME>Sperberg-McQueen</SURNAME> <AFFILIATION><ORGNAME>University of Illinois at Chicago</ORGNAME></AFFILIATION></OTHERCREDIT> <OTHERCREDIT><FIRSTNAME>Joel</FIRSTNAME> <SURNAME>Weber</SURNAME> <AFFILIATION><ORGNAME>MIT</ORGNAME></AFFILIATION></OTHERCREDIT> <OTHERCREDIT><SURNAME>Murata</SURNAME> <FIRSTNAME>Makoto</FIRSTNAME><AFFILIATION> <ORGNAME>Fuji Xerox Japan</ORGNAME></AFFILIATION></OTHERCREDIT></AUTHORGROUP> <BIBLIOMISC>and many other members of the </BIBLIOMISC> <AUTHORGROUP><OTHERCREDIT><CONTRIB>XML Special Interest Group of the W3C</CONTRIB> <CONTRIB>as well as FAQ readers around the world. Please use the form at the end for any corrections or additions.</CONTRIB></OTHERCREDIT></AUTHORGROUP> <REVHISTORY ID="faq-revhist"><REVISION><REVNUMBER>0.1</REVNUMBER> <DATE>31 January 1997</DATE> <AUTHORINITIALS>PF</AUTHORINITIALS> <REVREMARK>First draft. Sample questions devised by participants.</REVREMARK></REVISION> <REVISION><REVNUMBER>0.2</REVNUMBER> <DATE>3 February 1997</DATE> <AUTHORINITIALS>PF</AUTHORINITIALS> <REVREMARK>Revised draft. Additional questions and answers.</REVREMARK></REVISION> <REVISION><REVNUMBER>0.3</REVNUMBER> <DATE>17 February 1997</DATE> <AUTHORINITIALS>PF</AUTHORINITIALS> <REVREMARK>Extensive revision following comments from the group. Changes to markup and organization.</REVREMARK></REVISION> <REVISION><REVNUMBER>0.4</REVNUMBER> <DATE>23 February 1997</DATE> <AUTHORINITIALS>PF</AUTHORINITIALS> <REVREMARK>Minor editorial changes</REVREMARK></REVISION> <REVISION><REVNUMBER>0.5</REVNUMBER> <DATE>1 April 1997</DATE> <AUTHORINITIALS>PF</AUTHORINITIALS> <REVREMARK>Added Multidoc Pro as SGML browser; question on XML math; fixed ambiguity in explanation of NETs; added JUMBO; ERB changes of March 26; more details of linking and tools; adding element declaration minimization to the forbidden list.</REVREMARK></REVISION> <REVISION><REVNUMBER>1.0</REVNUMBER> <DATE>1 May 1997</DATE> <AUTHORINITIALS>PF</AUTHORINITIALS> <REVREMARK>Added reference to ToC and printed URLs; added disclaimer at A6; combined old A11 with A5 to explain SGML/XML/HTML; clarified explanation of XML not replacing HTML at C1; added new course and conference at (new) A11; clarified B1, C4, C8; added FPI server at C12; removed examples in C13.</REVREMARK></REVISION> <REVISION REVISIONFLAG="ADDED"><REVNUMBER>1.1</REVNUMBER> <DATE>1 October 1997</DATE> <AUTHORINITIALS>PF</AUTHORINITIALS> <REVREMARK>No more minimization parameters in element declarations; Parsers must now pass all white-space to the application; Everything is now case-sensitive, including all markup; A new proposal for stylesheets: XSL, which combines DSSSL and CSS in an XML format; Java[Script] and and metadata and their use in XML; Updated list of software; First XML book is published; New public mailing list XML-L</REVREMARK></REVISION></REVHISTORY> <RELEASEINFO>Paragraphs which have been added since the last version are shown prefixed with a pilcrow (¶). Paragraphs which have been changed since the last version are shown prefixed with a section sign (§). Paragraphs marked for future deletion but retained at the moment for information are prefixed with a plus/minus sign (±).</RELEASEINFO></BIBLIOSET></ARTHEADER> <ABSTRACT><TITLE>Summary</TITLE> <PARA REVISIONFLAG="CHANGED">This document contains the most frequently-asked questions (with answers) about XML, the Extensible Markup Language. It is intended as a first resource for users, developers, and the interested reader, and should not be regarded as a part of the <ULINK URL="http://www.w3.org/pub/WWW/TR/">XML Draft Specification</ULINK>.</PARA> </ABSTRACT> <NOTE><TITLE>Organization</TITLE> <PARA>The FAQ is divided into four parts: <ITEMIZEDLIST><LISTITEM><SIMPARA><LINK LINKEND="faq-general">General</LINK>,</SIMPARA></LISTITEM> <LISTITEM><SIMPARA><LINK LINKEND="faq-user">User</LINK>,</SIMPARA></LISTITEM> <LISTITEM><SIMPARA><LINK LINKEND="faq-author">Author</LINK>, and</SIMPARA> </LISTITEM> <LISTITEM><SIMPARA><LINK LINKEND="faq-developer">Developer</LINK></SIMPARA></LISTITEM></ITEMIZEDLIST>. The questions are numbered independently within each section. As the numbering may therefore change with each version, comments and suggestions should refer to the version number (see <LINK LINKEND="faq-revhist">Revision History</LINK> above) as well as the Part and Question Number.</PARA> <PARA>There is <LINK LINKEND="faq-form">a form</LINK> at the end of this document which you can use to submit bug reports, suggestions for improvement, and other comments relating to <EMPHASIS>this FAQ only</EMPHASIS>. Comments about the <ULINK URL="http://www.w3.org/pub/WWW/TR/">XML Draft Specification</ULINK> itself should be sent to the <ULINK URL="http://www.w3.org/">W3C</ULINK>.</PARA></NOTE> <NOTE><TITLE>Availability</TITLE> <PARA>The <LINK LINKEND="faq-sgml">SGML</LINK> file for use with any conforming SGML system is available at <FILENAME><ULINK URL="http://www.ucc.ie/xml/faq.sgml">http://www.ucc.ie/xml/faq.sgml</ULINK></FILENAME> (this can also be used online with SGML browsers like <PRODUCTNAME><ULINK URL="http://www.sq.com/products/panorama/panor-fe.htm">Panorama</ULINK></PRODUCTNAME> or <PRODUCTNAME><ULINK URL="http://www.citec.fi/mdp/index.html">Multidoc Pro</ULINK></PRODUCTNAME>; you can also <ULINK URL="http://www.ucc.ie/xml/xmlview.exe">download the DTD and stylesheet installation self-extractor</ULINK> for faster local access with these browsers, or the <ULINK URL="http://www.ucc.ie/xml/catalog">DTD set as ASCII files</ULINK>).</PARA> <PARA>The same text is available in an <ULINK URL="http://www.ucc.ie/xml/faq.html">HTML version</ULINK> for use with an HTML browser (<EMPHASIS>eg</EMPHASIS> <PRODUCTNAME><ULINK URL="http://www.netscape.com/">Netscape Navigator</ULINK></PRODUCTNAME>, <PRODUCTNAME><ULINK URL="http://www.microsoft.com/">Microsoft Internet Explorer</ULINK></PRODUCTNAME>, <PRODUCTNAME><ULINK URL="http://www.spry.com/">Spry Mosaic</ULINK></PRODUCTNAME>, <PRODUCTNAME><ULINK URL="http://www.ncsa.edu">NCSA Mosaic</ULINK></PRODUCTNAME>, <PRODUCTNAME><ULINK URL="http://kufacts.cc.ukans.edu/">Lynx</ULINK></PRODUCTNAME>, <PRODUCTNAME><ULINK URL="http://opera.nta.no">Opera</ULINK></PRODUCTNAME>, <PRODUCTNAME><ULINK URL="http://www.cs.indiana.edu/???">GNUscape Navigator</ULINK></PRODUCTNAME> <EMPHASIS>etc</EMPHASIS>) at <FILENAME><ULINK URL="http://www.ucc.ie/xml/">http://www.ucc.ie/xml/</ULINK></FILENAME>.</PARA> <PARA REVISIONFLAG="ADDED">An XML version will be produced once the specification has been agreed and when DTDs and browsers are available to handle it. </PARA> <PARA>A plaintext (ASCII) version is available <ULINK URL="http://www.ucc.ie/xml/faq.txt">from the Web</ULINK> and (eventually) by <ULINK URL="http://www.ucc.ie/doc/other/howtoftp.html" REMAP="ULINK">anonymous FTP</ULINK> to one of several <ULINK URL="ftp://rtfm.mit.edu/">FAQ repositories</ULINK>. The versions above are also available by <ULINK URL="mailto:webmail@www.ucc.ie">electronic mail</ULINK> to the <PRODUCTNAME><ULINK URL="http://www.ucc.ie/webmail/">WebMail</ULINK></PRODUCTNAME> server (for users with email-only access). </PARA> <PARA REVISIONFLAG="CHANGED">For printed copies there are PostScript<SUPERSCRIPT>™</SUPERSCRIPT> versions for <ULINK URL="http://www.ucc.ie/xml/faqa4.ps">A4</ULINK> and <ULINK URL="http://www.ucc.ie/xml/faqlet.ps">Letter</ULINK> sizes of paper.</PARA> <PARA>The document is also available in oil-based toner on flattened dead trees by sending $10 (or equivalent) to the <ULINK URL="mailto:silmaril@m-net.arbornet.org">editor</ULINK> (email first to check currency and postal address).</PARA> <PARA REVISIONFLAG="CHANGED">Thanks to Murata Makoto for making this document available in Japanese: see <FILENAME><ULINK URL="http://www.iijnet.or.jp/FXIS/XSoft/sgml/xml/XMLFAQ.htm">http://www.iijnet.or.jp/FXIS/XSoft/sgml/xml/XMLFAQ.htm</ULINK></FILENAME></PARA> <PARA>You can download <ULINK URL="xml.gif">the XML logo</ULINK> and an icon for your files in <ULINK URL="xml.ico">ICO</ULINK> (Microsoft Windows) or <ULINK URL="xml.xpm">XBM</ULINK> (X Window system) format (<ULINK URL="mailto:pflynn@imbolc.ucc.ie">volunteer wanted</ULINK> to do a Mac icon).</PARA></NOTE> <SECT1 ID="faq-general"><TITLE>General questions</TITLE> <SECT2 ID="faq-acro"><TITLE>What is XML?</TITLE> <PARA>XML is the <WORDASWORD>Extensible Markup Language</WORDASWORD> (extensible because it is not a fixed format like <LINK LINKEND="faq-html">HTML</LINK>). It is designed to enable the use of <LINK LINKEND="faq-sgml">SGML</LINK> on the World-Wide Web.</PARA> <PARA REVISIONFLAG="CHANGED">It's actually slightly misnamed: XML itself is not a single markup language: it's a metalanguage to let you design your own markup language. A regular markup language defines a way to describe information in a certain class of documents (<FOREIGNPHRASE>eg</FOREIGNPHRASE> HTML). XML lets you define your own customized markup languages for many classes of document. It can do this because it's done in SGML, the international standard metalanguage for markup languages.</PARA></SECT2> <SECT2 ID="faq-def"><TITLE>What is XML for?</TITLE> <PARA REVISIONFLAG="CHANGED">XML is designed <QUOTE>to make it easy and straightforward to use <LINK LINKEND="faq-sgml">SGML</LINK> on the Web: easy to define document types, easy to author and manage SGML-defined documents, and easy to transmit and share them across the Web.</QUOTE></PARA> <PARA>It defines <QUOTE>an extremely simple dialect of SGML which is completely described in the <LINK LINKEND="faq-spec">Draft XML Specification</LINK>. The goal is to enable generic SGML to be served, received, and processed on the Web in the way that is now possible with <LINK LINKEND="faq-html">HTML</LINK>.</QUOTE></PARA> <PARA><QUOTE>For this reason, XML has been designed for ease of implementation, and for interoperability with both SGML and HTML</QUOTE> [quotes from <LINK LINKEND="faq-spec">the XML spec</LINK>].</PARA></SECT2> <SECT2 ID="faq-sgml"><TITLE>What is SGML?</TITLE> <PARA REVISIONFLAG="CHANGED">SGML is the <ULINK URL="http://www.sil.org/sgml/sgml.html">Standard Generalized Markup Language</ULINK> (<ULINK URL="http://www.iso.ch/">ISO 8879</ULINK>), the international standard for defining descriptions of the structure and content of different types of electronic document. There is an SGML FAQ at <ULINK URL="http://www.infosys.utas.edu.au/info/sgmlfaq.txt"><SYSTEMITEM>http://www.infosys.utas.edu.au/info/sgmlfaq.txt</SYSTEMITEM></ULINK>.</PARA></SECT2> <SECT2 ID="faq-html"><TITLE>What is HTML?</TITLE> <PARA REVISIONFLAG="CHANGED">HTML is the <ULINK URL="http://www.w3.org/">HyperText Markup Language</ULINK> (<ULINK URL="http://ds.internic.net/rfc/rfc1866.txt">RFC 1866</ULINK>), a specific application of <LINK LINKEND="faq-sgml">SGML</LINK> used in the <ULINK URL="http://www.w3.org">World-Wide Web</ULINK>.</PARA></SECT2> <SECT2 ID="faq-same"><TITLE>Aren't XML, SGML, and HTML all the same thing?</TITLE> <PARA REVISIONFLAG="CHANGED">Not quite. <LINK LINKEND="faq-sgml">SGML</LINK> is the <WORDASWORD>mother tongue</WORDASWORD>, used for describing thousands of different document types in many fields of human activity, from transcriptions of ancient Sumerian scrolls to the technical documentation for stealth bombers, and from patients' clinical records to musical notation.</PARA> <PARA REVISIONFLAG="CHANGED"><LINK LINKEND="faq-sgml">HTML</LINK> is just one of these document types, the one most frequently used in the <ULINK URL="http://www.w3.org/">Web</ULINK>. It defines a single, fixed type of document with markup that lets you describe a common class of simple office-style report, with headings, paragraphs, lists, illustrations, <FOREIGNPHRASE>etc</FOREIGNPHRASE>, and some provision for hypertext and multimedia.</PARA> <PARA>XML is an abbreviated version of SGML, to make it easier for you to define your own document types, and to make it easier for programmers to write programs to handle them. It omits the more complex and less-used parts of SGML in return for the benefits of being easier to write applications, easier to understand, and more suited to delivery and interoperability over the Web. But it is still SGML, and XML files may still be parsed and validated the same as any other SGML file (see the question on <LINK LINKEND="faq-xmlsoft">XML software</LINK>).</PARA> <PARA>Programmers may find it useful to think of XML as being SGML-- rather than HTML++.</PARA></SECT2> <SECT2 ID="faq-owns"><TITLE>Who is responsible for XML?</TITLE> <PARA REVISIONFLAG="CHANGED">XML is a project of the <ULINK URL="http://www.w3.org/pub/WWW/MarkUp/SGML/Activity">World-Wide Web Consortium (W3C)</ULINK>, and the development of the specification is being supervised by their XML Working Group. A Special Interest Group of co-opted contributors and experts from various fields contributes comments and reviews by email.</PARA> <PARA>XML is a public format: it is not a proprietary development of any company.</PARA></SECT2> <SECT2 ID="faq-import"><TITLE>Why is XML such an important development?</TITLE> <PARA REVISIONFLAG="CHANGED">It removes two constraints which are holding back Web developments:</PARA> <ORDEREDLIST REVISIONFLAG="ADDED"><LISTITEM><PARA>dependence on a single, inflexible document type (<LINK LINKEND="faq-html">HTML</LINK>);</PARA></LISTITEM> <LISTITEM><PARA>the complexity of full <LINK LINKEND="faq-sgml">SGML</LINK>, whose syntax allows many powerful but hard-to-program options.</PARA></LISTITEM></ORDEREDLIST> <PARA>XML simplifies the levels of optionality in SGML, and allows the development of user-defined document types on the Web.</PARA> </SECT2> <SECT2 ID="faq-howto"><TITLE>How does XML make SGML simpler and still let you define your own document types?</TITLE> <PARA REVISIONFLAG="CHANGED">To make SGML simpler, XML redefines some of <LINK LINKEND="faq-sgml">SGML</LINK>'s internal values and parameters, and removes a large number of the more complex and sometimes less-used features which made it harder to write processing programs (see Appendix A of <LINK LINKEND="faq-spec">the XML specification</LINK>).</PARA> <PARA REVISIONFLAG="CHANGED">But it retains all of SGML's structural abilities which let you define your own document type. It also introduces a new class of document which does not require you to use a predefined document type. See the questions about <LINK LINKEND="faq-validwf"><QUOTE>valid</QUOTE> and <QUOTE>well-formed</QUOTE> documents</LINK>, and <LINK LINKEND="faq-selfdef">how to define your own document types</LINK> in the <LINK LINKEND="faq-developer">Developers' Section</LINK>.</PARA></SECT2> <SECT2 ID="faq-extend"><TITLE>Why not just carry on extending HTML?</TITLE> <PARA><LINK LINKEND="faq-html">HTML</LINK> is already overburdened with dozens of interesting but often incompatible inventions from different manufacturers, because it provides only one way of describing your information.</PARA> <PARA>XML will allow groups of people or organizations to create their own customized markup languages for exchanging information in their domain (music, chemistry, electronics, hill-walking, finance, surfing, linguistics, knitting, history, engineering, rabbit-keeping <FOREIGNPHRASE>etc</FOREIGNPHRASE>).</PARA> <PARA>HTML is at the limit of its usefulness as a way of describing information, and while it will continue to play an important role for the content it currently represents, many new applications require a more robust and flexible infrastructure. </PARA></SECT2> <SECT2 ID="faq-word"><TITLE>Why do we need all this SGML stuff? Why not just use <PRODUCTNAME>Word</PRODUCTNAME> or <PRODUCTNAME>Notes</PRODUCTNAME>?</TITLE> <PARA>Information on a network which connects many different types of computer has to be usable on all of them. Public information cannot afford to be restricted to one make or model or manufacturer, or to cede control of its data format to private hands. It is also helpful for such information to be in a form that can be reused in many different ways, as this can minimize wasted time and effort.</PARA> <PARA REVISIONFLAG="CHANGED"><LINK LINKEND="faq-sgml">SGML</LINK> is the international standard which is used for defining this kind of application, but those who need an alternative based on different software are entirely free to implement similar services using such a system, especially if they are for private use. </PARA> </SECT2> <SECT2 ID="faq-more"><TITLE>Where do I find more information about XML?</TITLE> <PARA>Online, there's the <LINK LINKEND="faq-spec">XML Draft Specification</LINK> and ancillary documentation available from the W3C; an <ULINK URL="http://www.sil.org/sgml/xml.html">XML section</ULINK> with an extensive list of online reference material in Robin Cover's <ULINK URL="http://www.sil.org/sgml/sgml.html">SGML pages</ULINK>; and a <ULINK URL="http://www.textuality.com/xml/">summary</ULINK> and <ULINK URL="http://www.textuality.com/xml/faq.html">condensed FAQ</ULINK> from Tim Bray.</PARA> <PARA REVISIONFLAG="ADDED">The items listed below are the ones the maintainer has been able to discover: please mail <ULINK URL="mailto:pflynn@imbolc.ucc.ie">me</ULINK> if you come across others. Old items are retained here for reference at the moment- they will eventually expire.</PARA> <ITEMIZEDLIST><LISTITEM><PARA REVISIONFLAG="ADDED">Eve Maler is giving a one-day tutorial (working title <CITETITLE>XML for the SGML-Knowledgeable</CITETITLE>) at the <ULINK URL="http://www.gca.org/">GCA</ULINK> offices on 14 November 1997. This will also be given at SGML/XML '97 (see below).</PARA></LISTITEM> <LISTITEM><PARA REVISIONFLAG="CHANGED">Technology Appraisals Ltd are holding a seminar in London, England, on <CITETITLE>XML ready for prime time?</CITETITLE> on 24-25 November 1997. Details from <ULINK URL="mailto:techapp@cix.compulink.co.uk">Susan Dennington</ULINK> at TAL.</PARA></LISTITEM> <LISTITEM><PARA>Peter Murray-Rust is preparing an <ULINK URL="http://www.vsms.nottingham.ac.uk/vsms/java/advert/advert.txt">XML/Java Virtual Course</ULINK> entitled <CITETITLE><ULINK URL="http://www.vsms.nottingtham.ac.uk/vsms/java">Scientific Information Components using Java and XML</ULINK></CITETITLE> Details are at <FILENAME><ULINK URL="http://www.vsms.nottingham.ac.uk/vsms/java/advert/advert.txt">http://www.vsms.nottingham.ac.uk/vsms/java/advert/advert.txt</ULINK></FILENAME>. The XML will be very low-level (<FOREIGNPHRASE>ie</FOREIGNPHRASE> <LINK LINKEND="faq-wf">well-formed</LINK> only, balanced tags, and quoted attributes; no DTDs, entities, marked sections, catalogs, links, etc.) It concentrates on building element trees (including those from legacy files).</PARA></LISTITEM> <LISTITEM><PARA REVISIONFLAG="CHANGED">The annual SGML Conference run by the Graphic Communications Association has been renamed the SGML/XML Conference. SGML/XML '97 will be held in Washington DC, 8-11 December 1997 (further details on <ULINK URL="http://www.gca.org/">the GCA's Web site</ULINK>).</PARA></LISTITEM></ITEMIZEDLIST> <PARA REVISIONFLAG="CHANGED">There is a list of articles on XML which have appeared in the computing press: details are being kept in Robin Cover's <ULINK URL="http://www.sil.org/sgml/sgml.html">SGML pages</ULINK>.</PARA> <PARA REVISIONFLAG="CHANGED">The first XML books are starting to appear: </PARA> <BIBLIOGRAPHY><BIBLIOENTRY><AUTHOR><SURNAME>Light</SURNAME><FIRSTNAME>Richard</FIRSTNAME></AUTHOR> <TITLE><ULINK URL="http://www.mcp.com/info/1-57521/1-57521-334-6/">Presenting XML</ULINK></TITLE><PUBLISHERNAME>Sams.Net</PUBLISHERNAME><ISBN>1-27221-334-6</ISBN> <PAGENUMS>414</PAGENUMS><ADDRESS><OTHERADDR>http://www.mcp.com/info/1-57521/1-57521-334-6/</OTHERADDR></ADDRESS> <RELEASEINFO>With contributions from Simon North and Charles Allen, and a foreword from Tim Bray</RELEASEINFO></BIBLIOENTRY></BIBLIOGRAPHY></SECT2> <SECT2 ID="faq-mailinglist"><TITLE>Where can I discuss implementation and development of XML?</TITLE> <PARA>There is a mailing list called <FILENAME>xml-dev</FILENAME> for those committed to developing components for XML. You can subscribe by sending a 1-line mail message to <FILENAME><ULINK URL="mailto:majordomo@ic.ac.uk">majordomo@ic.ac.uk</ULINK></FILENAME> saying:<COMMAND>subscribe xml-dev <REPLACEABLE>yourname@yoursite</REPLACEABLE></COMMAND> The list is hypermailed for online reference at <FILENAME><ULINK URL="http://www.lists.ic.ac.uk/hypermail/xml-dev/">http://www.lists.ic.ac.uk/hypermail/xml-dev/</ULINK></FILENAME>.</PARA> <PARA>Note that this list is for those people actively involved in developing resources for XML. It is <EMPHASIS>not</EMPHASIS> for general information about XML (see this FAQ and <LINK LINKEND="faq-more">other sources</LINK>) or for general discussion about SGML implementation and resources (see <FILENAME><ULINK URL="news:comp.text.sgml">comp.text.sgml</ULINK></FILENAME>).</PARA> <PARA>There is a general-purpose mailing list <FILENAME>XML-L</FILENAME> for public discussions: to subscribe, send a 1-line mail message to <FILENAME><ULINK URL="mailto:listserv@listserv.hea.ie">LISTSERV@listserv.hea.ie</ULINK></FILENAME> saying<COMMAND>subscribe XML-L <REPLACEABLE>forename</REPLACEABLE> <REPLACEABLE>surname</REPLACEABLE></COMMAND>(substituting your own forename and surname). To unsubscribe, send a 1-line message to the same address saying<COMMAND>unsubscribe XML-L</COMMAND>Please Read The Fine Documentation which you will be sent when you join either mailing list, as it contains important information, particularly about what to do when your email address changes.</PARA></SECT2></SECT1> <SECT1 ID="faq-user"><TITLE>Users of SGML (including browsers of HTML)</TITLE> <SECT2 ID="faq-usexml"><TITLE>Do I have to do anything to use XML?</TITLE> <PARA REVISIONFLAG="CHANGED">Not yet. XML is still being developed, but there are already <LINK LINKEND="faq-browser">some pilot browsers</LINK>, so you can experiment with them. When the specification is complete, more software should start to appear, and you may be able to download browsers and use them to browse the Web much as you do with current applications.</PARA> <PARA REVISIONFLAG="CHANGED">You can use the pilot browsers to look at some of the emerging XML material, such as <ULINK URL="http://sunsite.unc.edu/pub/sun-info/standards/dsssl/egs/21_shaks/">Jon Bosak's Shakespeare plays</ULINK> and the molecular experiments of the <ULINK URL="http://www.venus.co.uk/omf/cml/">Chemical Markup Language (CML)</ULINK>. There are some more example sources listed at <ULINK URL="http://www.sil.org/sgml/xml.html#examples"><SYSTEMITEM>http://www.sil.org/sgml/xml.html#examples</SYSTEMITEM></ULINK>.</PARA> <PARA>If you want to start preparations for writing your own XML, see <LINK LINKEND="faq-author">the questions in the Authors' Section</LINK>.</PARA></SECT2> <SECT2 ID="faq-xmloffer"><TITLE>Why should I use XML instead of HTML?</TITLE> <ITEMIZEDLIST REVISIONFLAG="CHANGED"><LISTITEM><PARA>Authors and providers can <LINK LINKEND="faq-doctype">design their own document types</LINK> using XML, instead of being stuck with HTML. Document types can be explicitly tailored to an audience, so the cumbersome fudging that has to take place with <LINK LINKEND="faq-html">HTML</LINK> to achieve special effects should become a thing of the past: authors and designers will be free to invent their own markup elements;</PARA></LISTITEM> <LISTITEM><PARA>Information content can be richer and easier to use, because the <LINK LINKEND="faq-hypertext">hypertext linking abilities of XML</LINK> are much greater than those of HTML.</PARA> </LISTITEM> <LISTITEM><PARA>XML can provide more and better facilities for browser presentation and performance;</PARA></LISTITEM> <LISTITEM><PARA>It removes many of the underlying complexities of SGML in favor of a more flexible model, so writing programs to handle XML will be much easier than doing the same for full SGML.</PARA> </LISTITEM> <LISTITEM><PARA>Information will be more accessible and reusable, because the more flexible markup of XML can be used by any XML software instead of being restricted to specific manufacturers as has become the case with HTML.</PARA> </LISTITEM> <LISTITEM><PARA><LINK LINKEND="faq-valid">Valid XML files</LINK> are kosher SGML, so they can be used outside the Web as well, in an SGML environment (once the spec is stable and SGML software adopts it). </PARA></LISTITEM></ITEMIZEDLIST></SECT2> <SECT2 ID="faq-browser"><TITLE>Where can I get an XML browser?</TITLE> <PARA>There are already some browsers emerging (see below), but the <LINK LINKEND="faq-spec">XML specification</LINK> is still under development. As with <LINK LINKEND="faq-html">HTML</LINK>, there won't be just one browser, but many. However, because the potential number of different XML applications is not limited, no single browser should be expected to handle 100% of everything.</PARA> <PARA>The generic parts of XML (<FOREIGNPHRASE>eg</FOREIGNPHRASE> parsing, tree management, searching, formatting, <FOREIGNPHRASE>etc</FOREIGNPHRASE>) are being combined into general-purpose browser libraries or toolkits to make it easier for <LINK LINKEND="faq-developer">developers</LINK> to take a consistent line when writing XML applications. Such applications could then be customized by adding semantics for specific markets, or using languages like <PRODUCTNAME REMAP="ULINK" XREFLABEL="http://www.sun.com/">Java</PRODUCTNAME> to develop plugins for generic browsers and have the specialist modules delivered transparently over the Web.</PARA> <PARA>Netscape and Microsoft are both now developing XML facilities: some development work at Microsoft can be seen at <FILENAME><ULINK URL="http://www.microsoft.com/msdn/sdk/inetsdk/help/inet5017.htm">http://www.microsoft.com/msdn/sdk/inetsdk/help/inet5017.htm</ULINK>.</FILENAME></PARA> <ITEMIZEDLIST><LISTITEM><PARA><PRODUCTNAME>JUMBO</PRODUCTNAME> is a prototype GUI browser/editor/search/rendering tool for the output of XML parsers, developed as part of the project to produce <LINK LINKEND="faq-usexml">CML</LINK>. It displays the abstract document tree which can be queried and edited in limited fashion. <PRODUCTNAME>Java</PRODUCTNAME> classes can be dynamically loaded for the current DTD and allow complex transformation and rendering. The emphasis is on the import of legacy files into structured documents, and the management of non-textual data, including common data structures (trees, tables, lists, <FOREIGNPHRASE>etc</FOREIGNPHRASE>). Currently <PRODUCTNAME>JUMBO</PRODUCTNAME> parses a subset of XML files (<FOREIGNPHRASE>ie</FOREIGNPHRASE> only elements and their attributes) and will be grafted onto other parsers as soon as possible. The software and a wide range of XML demo files, including Jon Bosak's PLAY, can be downloaded for any Java-enabled browser from <FILENAME><ULINK URL="http://www.venus.co.uk/omf/cml/">http://www.venus.co.uk/omf/cml/</ULINK></FILENAME></PARA></LISTITEM> <LISTITEM><PARA>The <PRODUCTNAME>DynaWeb</PRODUCTNAME> server from Inso Corporation (formerly EBT) can serve other forms of SGML translated on-the-fly to XML (demonstrated at the GCA's XML Conference in San Diego, March 1997). Sun Microsystems are currently serving XML using this software on an experimental basis (message to the <LINK LINKEND="faq-mailinglist"><FILENAME>xml-dev</FILENAME> mailing list</LINK> dated Mon, 17 Mar 1997 16:49:42 -0800 from Jon Bosak).</PARA></LISTITEM> <LISTITEM><PARA>Microsoft are defining their proposed Channel Definition Format (CDF) as an application of XML, but no DTD is available yet. Their Open Software Description (OSD; endorsed by CyberMedia, InstallShield, LANovation, Lotus, and Netscape) is also proposed as an XML application. For further information on these formats, contact <ULINK URL="http://www.microsoft.com/">Microsoft</ULINK></PARA></LISTITEM></ITEMIZEDLIST> <PARA>See also the notes on software for <LINK LINKEND="faq-xmlsoft">authors</LINK> and <LINK LINKEND="faq-api">developers</LINK>, and the more detailed list at <FILENAME><ULINK URL="http://www.sil.org/sgml/xml.html">http://www.sil.org/sgml/xml.html</ULINK></FILENAME>.</PARA></SECT2> <SECT2 ID="faq-switch"><TITLE>Do I have to switch from SGML or HTML to XML?</TITLE> <PARA>No, existing <LINK LINKEND="faq-sgml">SGML</LINK> and <LINK LINKEND="faq-html">HTML</LINK> applications software will continue to work with existing files. But as with any enhanced facility, if you want to view or download and use XML files, you will need to add XML-aware software when it becomes available.</PARA></SECT2></SECT1> <SECT1 ID="faq-author"><TITLE>Authors of SGML (including writers of HTML)</TITLE> <NOTE><PARA>Authors should also read the <LINK LINKEND="faq-developer">Developers' Section</LINK>, which contains further information about the internals of XML files.</PARA></NOTE> <SECT2 ID="faq-replace"><TITLE>Does XML replace HTML?</TITLE> <PARA REVISIONFLAG="CHANGED">No, XML itself does not replace <LINK LINKEND="faq-html">HTML</LINK>: instead, it provides an alternative by allowing you to define your own set of markup elements. HTML is expected to remain in common use for some time to come, and DTDs will be available in XML versions as well as the original SGML versions. XML is designed to make the writing of DTDs much simpler than with full <LINK LINKEND="faq-sgml">SGML</LINK>.</PARA> <PARA REVISIONFLAG="CHANGED">Work is going on to produce XML versions of HTML and other popular DTDs, but this may not take off until the specification for XML 1.0 is complete (targeted November 1997). Watch <SYSTEMITEM>comp.text.sgml</SYSTEMITEM> and <SYSTEMITEM>XML-L</SYSTEMITEM> for announcements.</PARA></SECT2> <SECT2 ID="faq-xmldoc"><TITLE>What does an XML document look like inside?</TITLE> <PARA>The basic structure is very similar to most other applications of SGML, including HTML. XML documents can be very simple, with no document type declaration, and straightforward nested markup of your own design:</PARA> <PROGRAMLISTING><LINK LINKEND="faq-rmdpi"><?XML version="1.0" RMD="NONE"?></LINK> <conversation> <greeting><PROPERTY>Hello, world!</PROPERTY></greeting> <response><PROPERTY>Stop the planet, I want to get off!</PROPERTY></response> </conversation></PROGRAMLISTING> <PARA>Or they can be more complicated, with a DTD specified, and maybe an internal subset, and a more complex structure:</PARA> <PROGRAMLISTING><?XML version="1.0" <LINK LINKEND="faq-rmd">RMD</LINK>="ALL" encoding="UTF-8"?> <!<LINK LINKEND="faq-doctype">DOCTYPE</LINK> titlepage SYSTEM "typo.dtd" [<!ENTITY % active.links "INCLUDE">]> <titlepage> <white-space type="vertical" amount="36"/> <title font="Baskerville" size="24/30" alignment="centered"><PROPERTY>Hello, world!</PROPERTY></title> <white-space type="vertical" amount="12"/> <!-- In some copies the following decoration is hand-colored, presumably by the author --> <image location="http://www.foo.bar/fleuron.eps" type="URL" alignment="centered"/> <white-space type="vertical" amount="24"/> <author font="Baskerville" size="18/22" style="italic"><PROPERTY MOREINFO="REFENTRY">Munde Salutem</PROPERTY></author> </titlepage></PROGRAMLISTING> <PARA>Or they can be anywhere between: a lot will depend on how you want to define your document type (or whose you use) and what it will be used for. See the question on <LINK LINKEND="faq-validwf">valid and well-formed files</LINK>.</PARA></SECT2> <SECT2 ID="faq-space"><TITLE>How does XML handle white-space in my documents?</TITLE> <WARNING><PARA>This section contains a major change from the previous version.</PARA></WARNING> <PARA REVISIONFLAG="CHANGED">The SGML rules regarding white-space have been changed for XML, so <EMPHASIS>all</EMPHASIS> white-space, including linebreaks, TAB characters, and regular spaces, is passed by the parser <EMPHASIS>unchanged</EMPHASIS> to the application (browser, formatter, viewer, <FOREIGNPHRASE>etc</FOREIGNPHRASE>). This means: </PARA> <ITEMIZEDLIST REVISIONFLAG="ADDED"><LISTITEM><PARA><WORDASWORD>insignificant</WORDASWORD> white-space between structural elements (those which can contain only other elements, not text data,) <EMPHASIS>will now</EMPHASIS> get passed to the application (under <WORDASWORD>full</WORDASWORD> SGML this white-space is suppressed);</PARA></LISTITEM> <LISTITEM><PARA><WORDASWORD>significant</WORDASWORD> white-space within elements which can contain text and markup mixed together (<FOREIGNPHRASE>ie</FOREIGNPHRASE> paragraphs) will also get passed to the application as before.</PARA></LISTITEM></ITEMIZEDLIST> <PROGRAMLISTING><chapter> <section> <title> My title for Section 1. </title> <para> ... </para> </section> </chapter></PROGRAMLISTING> <PARA REVISIONFLAG="CHANGED">The parser must, however, still inform the application what white-space occurred in element content, if known. (Users of <QUOTE>full</QUOTE> SGML may recognize that this information was not in the <ULINK URL="http://www.sil.org/sgml/WG8-n931a.html">ESIS</ULINK>, but it <EMPHASIS>is</EMPHASIS> in the <ULINK URL="http://www.sil.org/sgml/topics.html#groves">grove</ULINK>.) In the above example, the application will receive all the pretty-printing linebreaks, TABs, and spaces between the elements as well as those embedded in the section title. It is the function of the application (browser, formatter, viewer, <FOREIGNPHRASE>etc</FOREIGNPHRASE>) to decide which type of white-space to discard and which to retain.</PARA></SECT2> <SECT2 ID="faq-case"><TITLE>Which parts of an XML document are case-sensitive?</TITLE> <WARNING><PARA>This section contains a major change from the previous version.</PARA></WARNING> <PARA REVISIONFLAG="CHANGED">All of an XML file is case-sensitive, both markup <EMPHASIS>and</EMPHASIS> text. This is significantly different from HTML and many other SGML document types.</PARA> <ITEMIZEDLIST REVISIONFLAG="CHANGED"><LISTITEM><PARA>Element names (used in start-tags and end-tags) are case-sensitive: you must stick with whatever combination of upper- or lower-case was used to define them in the <LINK LINKEND="faq-dtd">DTD</LINK> (for valid files);</PARA></LISTITEM> <LISTITEM><PARA>For well-formed files with no DTD, the <EMPHASIS>first occurrence</EMPHASIS> of an element name defines the casing. So you can't say <SGMLTAG CLASS="STARTTAG">BODY</SGMLTAG>…<SGMLTAG CLASS="ENDTAG">body</SGMLTAG>: upper- and lower-case must match; thus <SGMLTAG CLASS="ENDTAG" LANG="xml" REMAP="empty">IMG</SGMLTAG> and <SGMLTAG CLASS="ENDTAG" LANG="xml" REMAP="empty">img</SGMLTAG> are <EMPHASIS>two different elements</EMPHASIS>;</PARA></LISTITEM> <LISTITEM><PARA>Attribute names are also case-sensitive, on a per-element basis: for example <SGMLTAG CLASS="ENDTAG" LANG="xml" REMAP="empty">PIC width="7in"</SGMLTAG> and <SGMLTAG CLASS="ENDTAG" LANG="xml" REMAP="empty">PIC WIDTH="6in"</SGMLTAG> in the same file exhibit two <EMPHASIS>separate</EMPHASIS> attributes, because the different casings of <SGMLTAG CLASS="ATTRIBUTE">width</SGMLTAG> and <SGMLTAG CLASS="ATTRIBUTE">WIDTH</SGMLTAG> distinguish them;</PARA> </LISTITEM> <LISTITEM><PARA>Attribute values are also case-sensitive. Character data values (<FOREIGNPHRASE>eg</FOREIGNPHRASE> <SGMLTAG CLASS="ATTRIBUTE">HRef="MyFile.SGML"</SGMLTAG>) are exactly as before, but ID and IDREF attributes are case-sensitive and no longer get folded to uppercase for comparisons;</PARA></LISTITEM> <LISTITEM><PARA>All entity names (<SGMLTAG CLASS="GENENTITY">Aacute</SGMLTAG>), and your data content (your text), are case-sensitive, exactly as before.</PARA></LISTITEM></ITEMIZEDLIST></SECT2> <SECT2 ID="faq-exist"><TITLE>How can I make my existing HTML files work in XML?</TITLE> <PARA REVISIONFLAG="CHANGED">All XML documents must be <LINK LINKEND="faq-wf">well-formed</LINK> (see below), but a DTD is optional. HTML files currently have to be DTDless in XML, because there is no XML version of the HTML DTD yet. Many HTML authoring tools already produce almost (but not quite) <EMPHASIS>well-formed</EMPHASIS> XML.</PARA> <PARA REVISIONFLAG="CHANGED">If you have created your <LINK LINKEND="faq-html">HTML</LINK> files conforming to one of the several HTML <LINK LINKEND="faq-dtd">Document Type Definitions (DTDs)</LINK>, and they validate OK, then they can be converted as follows:</PARA> <ITEMIZEDLIST REVISIONFLAG="ADDED"><LISTITEM><PARA>replace the <SGMLTAG>DOCTYPE</SGMLTAG> declaration and any internal subset (basically everything within the first set of angled brackets <SGMLTAG><!DOCTYPE HTML...></SGMLTAG>) with the XML Declaration <SYSTEMITEM><?XML version="1.0" RMD="NONE"?></SYSTEMITEM></PARA></LISTITEM> <LISTITEM><PARA>change any <SGMLTAG>EMPTY</SGMLTAG> elements (<FOREIGNPHRASE>eg</FOREIGNPHRASE> <SGMLTAG CLASS="ELEMENT">ISINDEX</SGMLTAG>, <SGMLTAG CLASS="ELEMENT">BASE</SGMLTAG>, <SGMLTAG CLASS="ELEMENT">META</SGMLTAG>, <SGMLTAG CLASS="ELEMENT">LINK</SGMLTAG>, <SGMLTAG CLASS="ELEMENT">NEXTID</SGMLTAG> and <SGMLTAG CLASS="ELEMENT">RANGE</SGMLTAG> in the header, and <SGMLTAG CLASS="ELEMENT">IMG</SGMLTAG>, <SGMLTAG CLASS="ELEMENT">BR</SGMLTAG>, <SGMLTAG CLASS="ELEMENT">HR</SGMLTAG>, <SGMLTAG CLASS="ELEMENT">FRAME</SGMLTAG>, <SGMLTAG CLASS="ELEMENT">WBR</SGMLTAG>, <SGMLTAG CLASS="ELEMENT">BASEFONT</SGMLTAG>, <SGMLTAG CLASS="ELEMENT">SPACER</SGMLTAG>, <SGMLTAG CLASS="ELEMENT">AUDIOSCOPE</SGMLTAG>, <SGMLTAG CLASS="ELEMENT">AREA</SGMLTAG>, <SGMLTAG CLASS="ELEMENT">PARAM</SGMLTAG>, <SGMLTAG CLASS="ELEMENT">KEYGEN</SGMLTAG>, <SGMLTAG CLASS="ELEMENT">COL</SGMLTAG>, <SGMLTAG CLASS="ELEMENT">LIMITTEXT</SGMLTAG>, <SGMLTAG CLASS="ELEMENT">SPOT</SGMLTAG>, <SGMLTAG CLASS="ELEMENT">TAB</SGMLTAG>, <SGMLTAG CLASS="ELEMENT">OVER</SGMLTAG>, <SGMLTAG CLASS="ELEMENT">RIGHT</SGMLTAG>, <SGMLTAG CLASS="ELEMENT">LEFT</SGMLTAG>, <SGMLTAG CLASS="ELEMENT">CHOOSE</SGMLTAG>, <SGMLTAG CLASS="ELEMENT">ATOP</SGMLTAG>, and <SGMLTAG CLASS="ELEMENT">OF</SGMLTAG> in the body) so that they end with <QUOTE><SYSTEMITEM>/></SYSTEMITEM></QUOTE>, for example <SGMLTAG CLASS="ENDTAG" LANG="xml" REMAP="empty">IMG SRC="mypic.gif" alt="Picture"</SGMLTAG></PARA></LISTITEM> <LISTITEM><PARA>ensure there are correctly-matched explicit end-tags for all non-empty elements; <FOREIGNPHRASE>eg</FOREIGNPHRASE> every <SGMLTAG CLASS="STARTTAG">P</SGMLTAG> must have a <SGMLTAG CLASS="ENDTAG">P</SGMLTAG>, <FOREIGNPHRASE>etc</FOREIGNPHRASE>: this can be automated by a normalizer progam like <PRODUCTNAME>sgmlnorm</PRODUCTNAME> (part of <ULINK URL="http://www.jclark.com/sp/"><PRODUCTNAME>SP</PRODUCTNAME></ULINK>) or a function in an editor like <PRODUCTNAME>Emacs</PRODUCTNAME>/<PRODUCTNAME>psgml</PRODUCTNAME>'s <SYSTEMITEM>sgml-normalize</SYSTEMITEM>;</PARA></LISTITEM> <LISTITEM><PARA>escape all markup characters (<SYSTEMITEM><</SYSTEMITEM> and <SYSTEMITEM>&</SYSTEMITEM>) as <SGMLTAG CLASS="GENENTITY">lt</SGMLTAG> and <SGMLTAG CLASS="GENENTITY">amp</SGMLTAG></PARA></LISTITEM> <LISTITEM><PARA>ensure all attribute values are in quotes;</PARA></LISTITEM> <LISTITEM><PARA>ensure all occurrences of all element names in start-tags <EMPHASIS>and</EMPHASIS> end-tags match with respect to upper- and lower-case and that they are consistent throughout the file;</PARA></LISTITEM> <LISTITEM><PARA>ensure all attribute names are similarly in a consistent case throughout the file.</PARA></LISTITEM></ITEMIZEDLIST> <PARA REVISIONFLAG="ADDED">Be aware that many HTML browsers may not accept XML-style <SGMLTAG>EMPTY</SGMLTAG> elements with the trailing slash, so the above changes are not backwards-compatible. An alternative might be to add a dummy end-tag to all <SGMLTAG>EMPTY</SGMLTAG> elements, so <SGMLTAG CLASS="STARTTAG">IMG</SGMLTAG> becomes <SGMLTAG CLASS="STARTTAG">IMG</SGMLTAG><SGMLTAG CLASS="ENDTAG">IMG</SGMLTAG>.</PARA> <PARA REVISIONFLAG="CHANGED">If you have a lot of valid HTML files, you could write a script in an SGML conversion system to do this (such as <PRODUCTNAME><ULINK URL="http://www.omnimark.com">Omnimark</ULINK></PRODUCTNAME>, <PRODUCTNAME><ULINK URL="http://www.balise.fr">Balise</ULINK></PRODUCTNAME>, <PRODUCTNAME><ULINK URL="http://www.dircon.co.uk/sgmlc">SGMLC</ULINK></PRODUCTNAME>, or a system using one of the SGML <PRODUCTNAME>Perl</PRODUCTNAME> libraries), or you could even use edit macros if you know what you're doing.</PARA> <PARA REVISIONFLAG="CHANGED">If your HTML files are invalid then they will almost certainly have to be converted manually, although if the deformities are regular and carefully constructed, the files may actually be almost well-formed, and you could write a program or script to do as described above. To test for invalidity and non-conformance, check the following:</PARA> <ITEMIZEDLIST><LISTITEM><PARA REVISIONFLAG="CHANGED">do the files contain markup syntax errors? For example, are there any backslashes instead of forward slashes on end-tags; or elements which nest incorrectly (<SGMLTAG CLASS="STARTTAG">SAMP</SGMLTAG>an element which starts <SGMLTAG CLASS="STARTTAG">EM</SGMLTAG>inside one element<SGMLTAG CLASS="ENDTAG">SAMP</SGMLTAG> but ends outside it<SGMLTAG CLASS="ENDTAG">EM</SGMLTAG>);</PARA> </LISTITEM> <LISTITEM><PARA REVISIONFLAG="CHANGED">do the files contain markup which conflicts with the HTML DTDs, such as headings inside list items, or list items outside list environments; </PARA></LISTITEM> <LISTITEM><PARA REVISIONFLAG="CHANGED">do the files use elements which are not in any DTD? Although this is easy to transform to a DTDless well-formed file (because you don't have to define elements in advance) most proprietary [browser-specific] extensions have never been formally defined, so it is often difficult to work out where they can meaningfully be used.</PARA></LISTITEM></ITEMIZEDLIST> <PARA REVISIONFLAG="ADDED">Markup which is valid but which is meaningless or void may need to be edited out before conversion (such as repeated empty paragraphs or linebreaks, empty tables, invisible <WORDASWORD>spacing</WORDASWORD> GIFs <FOREIGNPHRASE>etc</FOREIGNPHRASE>: XML uses stylesheets, so you won't need any of these contrivances) </PARA> <PARA REVISIONFLAG="ADDED">See the <LINK LINKEND="faq-wf">rules for <WORDASWORD>well-formed</WORDASWORD> XML files</LINK> for details of what you need to check in XML when converting.</PARA> <PARA REVISIONFLAG="ADDED">Note there are XML versions of the HTML DTD in preparation:</PARA> <ITEMIZEDLIST><LISTITEM><PARA><ULINK URL="mailto:btrafford@worldnet.att.net">Ben Trafford</ULINK> has developed an XML version of HTML 4.2</PARA></LISTITEM> <LISTITEM><PARA>[details of others sought: please contact the <ULINK URL="mailto:pflynn@m-net.arbornet.org">editor</ULINK>]</PARA></LISTITEM></ITEMIZEDLIST></SECT2> <SECT2 ID="faq-subset"><TITLE>If XML is just a subset of SGML, can I use XML files directly with SGML tools?</TITLE> <PARA REVISIONFLAG="CHANGED">Yes, provided: <ITEMIZEDLIST> <LISTITEM><PARA>the document has a valid <LINK LINKEND="faq-dtd">Document Type Definition (DTD)</LINK>, <FOREIGNPHRASE>ie</FOREIGNPHRASE> the files are <LINK LINKEND="faq-valid">valid</LINK>, not just <LINK LINKEND="faq-wf">well-formed</LINK>; and</PARA></LISTITEM> <LISTITEM><PARA>you use software which knows about the features needed to support XML, such as the special form for <SGMLTAG>EMPTY</SGMLTAG> elements; some aspects of the SGML Declaration such as <SGMLTAG>NAMECASE GENERAL NO</SGMLTAG>; multiple attribute declarations.</PARA></LISTITEM></ITEMIZEDLIST></PARA> <PARA REVISIONFLAG="CHANGED">At the moment there are few tools which handle XML files unchanged because of the format of these <SGMLTAG>EMPTY</SGMLTAG> elements, but this is changing. The <PRODUCTNAME>nsgmls</PRODUCTNAME> parser has an experimental XML conformance switch, and the first XML-specific editors and parsers are appearing (see the question on <LINK LINKEND="faq-xmlsoft">software</LINK>).</PARA> <PARA REVISIONFLAG="ADDED">The rules of ISO 8879 are up for minor amendments, some of which are to facilitate changes needed for Web-enablement.</PARA></SECT2> <SECT2 ID="faq-learn"><TITLE>I'm used to authoring and serving HTML. Can I learn XML easily?</TITLE> <PARA>Yes, very easily, but at the moment there is still a need for tutorials, simple tools, and more examples of XML documents. <LINK LINKEND="faq-wf">Well-formed</LINK> XML documents may look similar to <LINK LINKEND="faq-html">HTML</LINK> except for some small but very important points of syntax.</PARA> <PARA>As every user community can have their own document type defined, it should be much easier to learn, because element names can be picked for relevance.</PARA></SECT2> <SECT2 ID="faq-charents"><TITLE>Will XML be able to use non-Latin characters?</TITLE> <PARA REVISIONFLAG="CHANGED">Yes, the <LINK LINKEND="faq-spec">XML Draft Specification</LINK> explicitly says XML uses <ULINK URL="http://www.iso.ch/">ISO 10646</ULINK>, the international standard 31-bit character repertoire which covers most human (and some non-human) written languages. This is currently congruent with Unicode.</PARA> <PARA REVISIONFLAG="CHANGED"><QUOTE>…all XML processors must accept the UTF-8 and UCS-2 encodings of ISO 10646…</QUOTE>. UCS-2 is the 16-bit version of Unicode. UTF-8 is an encoding of Unicode into 8-bit characters: the first 128 are the same as ASCII, the rest are used to encode the rest of Unicode into sequences of between 2 and 6 bytes. UTF-8 in its single-octet form is therefore the same as ISO 646 IRV (ASCII), so you can continue to use ASCII for English or other unaccented languages using the Latin alphabet. Note that UTF-8 is incompatible with ISO 8859-1 (ISO Latin-1) after code point 126 decimal (the end of ASCII).</PARA> <PARA REVISIONFLAG="CHANGED"><QUOTE>…the mechanisms for signalling which of the two are in use, and for bringing other encodings into play, are […] in the discussion of character encodings.</QUOTE> The <LINK LINKEND="faq-spec">XML Draft Specification</LINK> explains how to specify in your XML file which coded character set you are using.</PARA> <PARA REVISIONFLAG="CHANGED">Use of UCS-4 can only legally be specified in SGML or XML when the pending <WORDASWORD>WebSGML</WORDASWORD> Technical Corrigendum to ISO 8879 comes into force to enable numbers longer than eight digits to be used in the SGML Declaration.</PARA> <PARA REVISIONFLAG="CHANGED"><QUOTE>Regardless of the specific encoding used, any character in the ISO 10646 character set may be referred to by the decimal or hexadecimal equivalent of its bit string</QUOTE>: so no matter which character set you personally use, you can still refer to specific individual characters from elsewhere in the encoded repertoire by using <SGMLTAG CLASS="NUMCHARREF"><REPLACEABLE>dddd</REPLACEABLE></SGMLTAG> (decimal character code) or <SGMLTAG CLASS="NUMCHARREF">x<REPLACEABLE>hhhh</REPLACEABLE></SGMLTAG> (hexadecimal character code).</PARA> <PARA REVISIONFLAG="ADDED">The terminology can get confusing, as can the numbers: see the <ULINK URL="http://cns-web.bu.edu/pub/djohnson/web_files/i18n/ISO-10646.html">ISO 10646 Concept Dictionary</ULINK>.</PARA></SECT2> <SECT2 ID="faq-doctype"><TITLE>What's a Document Type Definition (DTD) and where do I get one?</TITLE> <PARA REVISIONFLAG="CHANGED">A DTD is usually a file (or several files to be used together) which contains a formal definition of a particular type of document. This sets out what names can be used for elements, where they may occur, and how they all fit together. For example, if you want a document type to describe <SGMLTAG CLASS="ELEMENT">LIST</SGMLTAG>s which contain <SGMLTAG CLASS="ELEMENT">ITEM</SGMLTAG>s, part of your DTD would contain something like<PROGRAMLISTING><!ELEMENT item (#pcdata)> <!ELEMENT list (item)+></PROGRAMLISTING>This defines items containing text, and lists containing items. It's a formal language which lets processors automatically parse a document and identify where every element comes and how they relate to each other, so that stylesheets, navigators, browsers, search engines, databases, printing routines, and other applications can be used.</PARA> <PARA>[Note that in XML, there are no minimization parameters (<QUOTE><SYSTEMITEM>-</SYSTEMITEM></QUOTE> and <QUOTE><SYSTEMITEM>O</SYSTEMITEM></QUOTE> characters in element definitions between element name and content model), because all elements except empty ones must have both start-tag and end-tag present at all times.]</PARA> <PARA ID="faq-selfdef">There are thousands of SGML DTDs already in existence in all kinds of areas (see the <ULINK URL="http://www.sil.org/sgml/sgml.html">SGML Web pages</ULINK> for examples). Many of them can be downloaded and used freely; or you can write your own. As with any language, you need to learn it to do this: but XML is much simpler than full SGML: see the <LINK LINKEND="faq-restrict">list of restrictions</LINK> which shows what has been cut out. Existing SGML DTDs need to be converted to XML for use with XML systems: expect to see announcements soon of popular DTDs becoming available in XML format.</PARA></SECT2> <SECT2 ID="faq-hypertext"><TITLE>How will XML affect my document links?</TITLE> <PARA REVISIONFLAG="CHANGED"><LINK LINKEND="tei-link">The linking abilities of XML systems</LINK> are much more powerful than those of HTML, so you'll be able to do much more with them. Existing <MARKUP>HREF</MARKUP>-style links will remain usable, but new linking technology is based on the lessons learned in the development of other standards involving hypertext, such as <ULINK URL="http://www.sil.org/sgml/acadapps.html#tei">TEI</ULINK> and <ULINK URL="http://www.sil.org/sgml/">HyTime</ULINK>, which let you manage bidirectional and multi-way links, as well as links to a span of text (within your own or other documents) rather than to a single point. This is already implemented for SGML in browsers like <PRODUCTNAME>Panorama</PRODUCTNAME> and <PRODUCTNAME>Multidoc Pro</PRODUCTNAME>.</PARA> <PARA REVISIONFLAG="CHANGED">The <ULINK URL="http://www.w3.org/TR/WD-xml-link">XML Linking Specification (XLL)</ULINK> document contains a detailed specification. An XML link can be either a URL or a TEI-style Extended Pointer (<WORDASWORD><LINK LINKEND="tei-link">Xptr</LINK></WORDASWORD>), or both. A URL on its own is assumed to be a resource (as with HTML); if an Xptr follows it, it is assumed to be a sub-resource of that URL; an Xptr on its own is assumed to apply to the current document.</PARA> <PARA REVISIONFLAG="CHANGED">An Xptr is always preceded by one of <FILENAME>#</FILENAME>, <FILENAME>?</FILENAME>, or <FILENAME>|</FILENAME>. The <FILENAME>#</FILENAME> and <FILENAME>?</FILENAME> mean the same as in HTML applications; the <FILENAME>|</FILENAME> means the sub-resource can be found by applying the Xptr to the resource, but the method of doing this is left to the application.</PARA> <PARA REVISIONFLAG="CHANGED">The <ULINK URL="http://etext.virginia.edu/bin/tei-tocs?div=DIV2&id=SAXR">TEI Extended Pointer Notation</ULINK> (EPN) is much more powerful than the <WORDASWORD>fragment address</WORDASWORD> on the end of some URLs. <LINK LINKEND="faq-hypertext" ID="tei-link">For example, the word <WORDASWORD>Xptr</WORDASWORD> two paragraphs back could be referred to as <SYSTEMITEM>http://www.ucc.ie/xml/faq.sgml#ID(faq-hypertext)CHILD(3,*)(4,*)</SYSTEMITEM>, meaning the fourth child object within the third child object after the element whose ID is <FILENAME>tei-link</FILENAME>.</LINK> Count the objects from the start of this question in the <ULINK URL="faq.sgml">SGML version</ULINK> (which has the ID <QUOTE><SYSTEMITEM>faq-hypertext</SYSTEMITEM></QUOTE>):</PARA> <ORDEREDLIST REVISIONFLAG="ADDED"><LISTITEM><PARA>the title of the question;</PARA> <PROGRAMLISTING><SECT2 ID="faq-hypertext"> <TITLE>How will XML affect my document links?</TITLE> </PROGRAMLISTING></LISTITEM> <LISTITEM><PARA>the first paragraph;</PARA> <PROGRAMLISTING><PARA><LINK LINKEND="tei-link">The linking abilities of XML systems</LINK> are much more powerful than those... </PROGRAMLISTING></LISTITEM> <LISTITEM><PARA>the second paragraph:</PARA> <ORDEREDLIST><LISTITEM><PARA>the character data from the start of the paragraph to the first item of markup:</PARA> <PROGRAMLISTING><PARA>The</PROGRAMLISTING></LISTITEM> <LISTITEM><PARA>the markup item:</PARA> <PROGRAMLISTING><ULINK URL="http://www.w3.org/TR/WD-xml-link">XML Linking Specification (XLL)</ULINK></PROGRAMLISTING></LISTITEM> <LISTITEM><PARA>the next stretch of character data:</PARA> <PROGRAMLISTING>document contains a detailed specification. An XML link can be either a URL or a TEI-style Extended Pointer (</PROGRAMLISTING></LISTITEM> <LISTITEM><PARA>the next markup item:</PARA> <PROGRAMLISTING><LINK LINKEND="tei-link">Xptr</LINK></PROGRAMLISTING></LISTITEM></ORDEREDLIST></LISTITEM></ORDEREDLIST> <PARA REVISIONFLAG="CHANGED">If you view this file with <PRODUCTNAME>Panorama</PRODUCTNAME> or <PRODUCTNAME>MultiDoc Pro</PRODUCTNAME> you can click on the highlighted cross-reference button at the start of the example sentence, and it will display the locations in Extended Pointer Notation of all the links to it, including the word <QUOTE>Xptr</QUOTE> mentioned. (Doing this in an HTML browser is not meaningful, as they do not support bidirectional linking or EPN.)</PARA></SECT2> <SECT2 ID="faq-math"><TITLE>Can I do mathematics using XML?</TITLE> <PARA>Yes, if the <LINK LINKEND="faq-doctype">document type</LINK> you use provides for math. The long-expired HTML3 could be used, or <ULINK URL="http://www.arbornet.org/~silmaril/dtds/html/htmlpro.html">HTML Pro</ULINK>, or <ULINK URL="http://www.sil.org/sgml/gen-apps.html#iso12083DTDs">ISO 12083 Math</ULINK>, or the proposals of the <ULINK URL="http://www.can.nl/~abbott/OpenMath/">OpenMath</ULINK> or <ULINK URL="http://www.w3.org/TR/WD-math-970515">HTML-Math</ULINK> projects, or one of your own making. Browsers which display simple math embedded in SGML already exist (<FOREIGNPHRASE>eg</FOREIGNPHRASE> <PRODUCTNAME>Panorama</PRODUCTNAME>, <PRODUCTNAME>Multidoc Pro</PRODUCTNAME>), and the mathematics-using communities may develop their own XML software.</PARA> <PARA REVISIONFLAG="CHANGED">The sophistication could vary from math expressions like <INFORMALEQUATION><MATH POSITION="INLINE"><I>x</I><SUB CLASS="char"><I>i</I></SUB></MATH></INFORMALEQUATION> through simple inline equations such as <INFORMALEQUATION><MATH POSITION="INLINE"><I>E</I> = <I>mc</I><SUP>2</SUP></MATH></INFORMALEQUATION> to display equations like<EQUATION> <MATH POSITION="DISPLAY"><BOX><ABOVE><FONT SIZE="+2" FACE="Symbol">S</FONT><SUP><I>n</I></SUP><SUB><I>i</I>=1</SUB> (<I>x</I><SUB><I>i</I></SUB> - <FONT FACE="Symbol">p</FONT>)<SUP>2</SUP></ABOVE><OVER><BELOW><I>n</I></BELOW></BOX></MATH></EQUATION>(If you are using an HTML browser to read this, the above equations may not be rendered correctly unless you have a math plugin for <PRODUCTNAME>Netscape</PRODUCTNAME> like IBM's <PRODUCTNAME><ULINK URL="http://www.ics.raleigh.ibm.com/ics/techexp.htm">TechExplorer</ULINK></PRODUCTNAME> which reads the embedded &TeX; equivalent.</PARA></SECT2> <SECT2 ID="faq-meta" REVISIONFLAG="ADDED"><TITLE>How does XML handle metadata?</TITLE> <PARA>Because XML lets you define your own markup language, you can make full use of the extended hypertext features (see the question on <LINK LINKEND="faq-hypertext">Links</LINK>) of XML to store or link to metadata in any format (<FOREIGNPHRASE>eg</FOREIGNPHRASE> <ULINK URL="http://www2.sub.uni-goettingen.de/dc-wf.html">Dublin Core, Warwick Framework</ULINK>, <ULINK URL="http://www.dstc.edu.au/RDU/RDF/">Resource Description Framework (RDF)</ULINK>, and <ULINK URL="http://www.w3.org/PICS/">Platform for Internet Content Selection (PICS)</ULINK>).</PARA> <PARA>There are no predefined elements in XML, because it is an architecture, not an application, so it is not part of XML's job to specify how or if authors should or should not implement metadata. You are therefore free to use any suitable method from simple attributes to the embedding of entire Dublin Core/Warwick Framework metadata records. Browser makers may also have their own architectural recommendations or methods to propose.</PARA></SECT2> <SECT2 ID="faq-java" REVISIONFLAG="ADDED"><TITLE>Can I use Java, ActiveX, <FOREIGNPHRASE>etc</FOREIGNPHRASE> in XML?</TITLE> <PARA>This depends on what facilities the browser makers implement. XML is about describing information; scripting languages and languages for embedded functionality are the software which enables the information to be manipulated at the user's end.</PARA> <PARA>XML itself provides a way to define the markup needed to implement scripting languages: as a neutral standard it neither encourages not discourages their use, and does not favour one language over another, so the field is wide open. Developments are ongoing: see John Tigue's suggestions for <ULINK URL="http://www.datachannel.com/ChannelWorld/XML/dev/">standardising the API for Java</ULINK> in respect of XML.</PARA> <PARA>Scripting languages <EMPHASIS>are</EMPHASIS> provided for in a proposal for an <ULINK URL="http://www.w3.org/TR/NOTE-XSL-970910">Extensible Style Language, XSL</ULINK> (see question on <LINK LINKEND="faq-style">Stylesheets</LINK>).</PARA></SECT2> <SECT2 ID="faq-style" REVISIONFLAG="ADDED"><TITLE>How do I control appearance?</TITLE> <PARA>The use of a stylesheet is implicit in XML. Some browsers may possibly provide simple default styles for popular elements like <SGMLTAG CLASS="ELEMENT">PARA</SGMLTAG>, or <SGMLTAG CLASS="ELEMENT">LIST</SGMLTAG> containing <SGMLTAG CLASS="ELEMENT">ITEM</SGMLTAG>, but in general a stylesheet gives the author much better control of the layout. But as with any system where files can be viewed at random by arbitrary users, the author cannot know what resources (such as fonts) are on the user's system, so care is needed.</PARA> <ITEMIZEDLIST><LISTITEM><PARA>The international standard for stylesheets for SGML documents is <ULINK URL="http://www.sil.org/sgml/related.html#dsssl">DSSSL, the Document Style and Semantics Specification Language</ULINK> (<ULINK URL="http://www.iso.ch/">ISO 10179</ULINK>). This provides Scheme-like languages for stylesheets and document conversion, and is extensively implemented in the <ULINK URL="http://www.jclark.com/jade/"><PRODUCTNAME>Jade</PRODUCTNAME> formatter</ULINK>.</PARA></LISTITEM> <LISTITEM><PARA>The <ULINK URL="http://www.w3.org/Style/css">Cascading Stylesheet Specification (CSS)</ULINK> provides a simple syntax for assigning styles to elements, and has been implemented in HTML browsers.</PARA></LISTITEM> <LISTITEM><PARA>The Synex stylesheet DTD as already used in <PRODUCTNAME>Panorama</PRODUCTNAME> and <PRODUCTNAME>MultiDoc Pro</PRODUCTNAME>;</PARA></LISTITEM> <LISTITEM><PARA>A new <ULINK URL="http://www.w3.org/TR/NOTE-XSL-970910">Extensible Style Language</ULINK> (XSL) is being proposed for use specifically with XML. This uses XML syntax (a stylesheet is actually an XML file) and combines formatting features from both DSSSL and CSS (HTML) and has already attracted support from several major vendors.</PARA></LISTITEM></ITEMIZEDLIST> <PARA>It remains to be seen which ones browsers will implement.</PARA></SECT2></SECT1> <SECT1 ID="faq-developer"><TITLE>Developers and Implementors (including WebMasters and server operators)</TITLE> <SECT2 ID="faq-spec"><TITLE>Where's the spec?</TITLE> <PARA>Right <ULINK URL="http://www.w3.org/TR/WD-xml">here (<FILENAME REMAP="ULINK" XREFLABEL="http://www.w3.org/TR/WD-xml">http://www.w3.org/TR/WD-xml</FILENAME>)</ULINK>. Includes the EBNF. There's also a <ULINK URL="http://www.iijnet.or.jp/FXIS/XSoft/sgml/xml/xml-lj11.htm">version in Japanese</ULINK>.</PARA></SECT2> <SECT2 ID="faq-validwf"><TITLE>What are these terms <WORDASWORD>DTDless</WORDASWORD>, <WORDASWORD>valid</WORDASWORD>, and <WORDASWORD>well-formed</WORDASWORD>?</TITLE> <PARA>Full SGML uses a Document Type Definition (DTD) to describe the markup (elements) available in any specific type of document. However, the design and construction of a DTD can be a complex and non-trivial task, so XML has been designed so it can be used either with or without a DTD. DTDless operation means you can invent markup without having to define it formally.</PARA> <PARA>To make this work, a DTDless file in effect <WORDASWORD>defines</WORDASWORD> its own markup, informally, by the existence and location of elements where you create them. But when an XML application such as a browser encounters a DTDless file, it needs to be able to understand the document structure as it reads it, because it has no DTD to tell it what to expect, so some changes have been made to the rules. </PARA> <PARA>For example, HTML's <SGMLTAG CLASS="ELEMENT">IMG</SGMLTAG> element is defined as <QUOTE><SGMLTAG>EMPTY</SGMLTAG></QUOTE>: it doesn't have an end-tag. Without a DTD, an XML application would have no way to know whether or not to expect an end-tag for an element, so the concept of <WORDASWORD>well-formed</WORDASWORD> has been introduced. This makes the start and end of every element, and the occurrence of <SGMLTAG>EMPTY</SGMLTAG> elements completely unambiguous.</PARA> <SECT3 ID="faq-wf"><TITLE><QUOTE>Well-formed</QUOTE> documents</TITLE> <PARA>All XML documents must be well-formed:</PARA> <ITEMIZEDLIST><LISTITEM><PARA ID="faq-rmdpi">if there is no DTD in use, the document must start with a <LINK LINKEND="faq-rmd">Required Markup Declaration (RMD)</LINK> saying so:<PROGRAMLISTING><?XML version="1.0" RMD="NONE"?> <foo> <bar>...<blort/>...</bar> </foo></PROGRAMLISTING></PARA> </LISTITEM> <LISTITEM><PARA>all tags must be balanced: that is, all elements which may contain character data must have both start- and end-tags present (omission is not allowed except for <LINK LINKEND="faq-empty">empty elements</LINK>, see below);</PARA></LISTITEM> <LISTITEM><PARA>all attribute values must be in quotes (the single-quote character [the apostrophe] may be used if the value contains a double-quote character, and <FOREIGNPHRASE>vice versa</FOREIGNPHRASE>): if you need both, use <SGMLTAG CLASS="GENENTITY">apos</SGMLTAG> and <SGMLTAG CLASS="GENENTITY">quot</SGMLTAG></PARA></LISTITEM> <LISTITEM><PARA ID="faq-empty">any <MARKUP>EMPTY</MARKUP> element tags (<FOREIGNPHRASE>eg</FOREIGNPHRASE> those with no end-tag like HTML's <SGMLTAG CLASS="ELEMENT">IMG</SGMLTAG>, <SGMLTAG CLASS="ELEMENT">HR</SGMLTAG>, and <SGMLTAG CLASS="ELEMENT">BR</SGMLTAG> and others) must either end with <QUOTE><MARKUP>/></MARKUP></QUOTE> or you have to make them non-<MARKUP>EMPTY</MARKUP> by adding a real end-tag;</PARA> <PARA>Example: <SGMLTAG CLASS="ELEMENT">BR</SGMLTAG> would become either <SGMLTAG CLASS="ENDTAG" LANG="xml" REMAP="empty">BR</SGMLTAG> or <SGMLTAG CLASS="STARTTAG">BR</SGMLTAG><SGMLTAG CLASS="ENDTAG">BR</SGMLTAG>.</PARA></LISTITEM> <LISTITEM><PARA REVISIONFLAG="CHANGED">there must not be any isolated markup characters (<MARKUP><</MARKUP> or <MARKUP>&</MARKUP>) in your text data (<FOREIGNPHRASE>ie</FOREIGNPHRASE> they must be escaped as <SGMLTAG CLASS="GENENTITY">lt</SGMLTAG> and <SGMLTAG CLASS="GENENTITY">amp</SGMLTAG>), and the sequence <SYSTEMITEM>]]></SYSTEMITEM> must be escaped as <SYSTEMITEM>]]></SYSTEMITEM> if it does not occur as the end of a <SGMLTAG>CDATA</SGMLTAG> marked section;</PARA></LISTITEM> <LISTITEM><PARA REVISIONFLAG="CHANGED">elements must nest inside each other properly (no overlapping markup, same rule as for all SGML);</PARA></LISTITEM> <LISTITEM><PARA REVISIONFLAG="ADDED">Well-formed files with no DTD may use attributes on any element, but the attributes must all be of type CDATA by default.</PARA></LISTITEM></ITEMIZEDLIST> <PARA REVISIONFLAG="CHANGED">Well-formed XML files with no DTD are considered to have <SGMLTAG CLASS="GENENTITY">lt</SGMLTAG>, <SGMLTAG CLASS="GENENTITY">gt</SGMLTAG>, <SGMLTAG CLASS="GENENTITY">apos</SGMLTAG>, <SGMLTAG CLASS="GENENTITY">quot</SGMLTAG>, and <SGMLTAG CLASS="GENENTITY">amp</SGMLTAG> predefined and thus available for use even without a DTD. Valid XML files must declare them explicitly if they use them.</PARA> <PARA REVISIONFLAG="CHANGED">Note that the value of the RMD <QUOTE><SYSTEMITEM>NONE</SYSTEMITEM></QUOTE> indicates that an XML processor can parse the document correctly without first reading any part of the DTD, so it can also be used if you do supply a DTD but don't want it used on this occasion. See the next section for other values of the RMD. </PARA></SECT3> <SECT3 ID="faq-valid"><TITLE>Valid XML</TITLE> <PARA ID="faq-docdec">Valid XML files are those which have a <LINK LINKEND="faq-dtd">Document Type Definition (DTD)</LINK> like all other <LINK LINKEND="faq-sgml">SGML</LINK> applications, and which adhere to it. <EMPHASIS>They must also be <LINK LINKEND="faq-wf">well-formed</LINK></EMPHASIS>.</PARA> <PARA>A valid file begins like any other SGML file with a Document Type Declaration, but may have an optional XML Declaration prepended:<PROGRAMLISTING><?XML version="1.0"?> <!DOCTYPE advert SYSTEM "http://www.foo.org/ad.dtd"> <advert> <headline>...<pic/>...</headline> <text>...</text> </advert></PROGRAMLISTING>The <LINK LINKEND="faq-spec">XML Specification</LINK> defines an <ULINK URL="sgmlxml.decl">SGML Declaration for XML</ULINK> which is fixed for all instances. An <LINK LINKEND="faq-dtd">XML version</LINK> of the specified DTD must be accessible to the XML processor, either by being available locally (<FOREIGNPHRASE>ie</FOREIGNPHRASE> the user already has a copy on disk), or by being retrievable via the network. You can enable this <EMPHASIS>either</EMPHASIS> by supplying the URL for the DTD in a System Identifier (as in the example above) <EMPHASIS>or</EMPHASIS> by supplying the <ULINK URL="http://www.ucc.ie/cgi-bin/public">Formal Public Identifier</ULINK>, <FOREIGNPHRASE>eg</FOREIGNPHRASE><PROGRAMLISTING><!DOCTYPE advert PUBLIC "-//Foo, Inc//DTD Advertisements//EN"></PROGRAMLISTING>and providing a catalog file which equates FPIs with their URL equivalents.</PARA> <PARA ID="faq-rmd">The <LINK LINKEND="faq-rmd">Required Markup Declaration (RMD)</LINK> can take two other values (apart from <QUOTE><SYSTEMITEM>NONE</SYSTEMITEM></QUOTE> which was discussed in the previous subsection): <QUOTE><SYSTEMITEM>INTERNAL</SYSTEMITEM></QUOTE> and <QUOTE><SYSTEMITEM>ALL</SYSTEMITEM></QUOTE>.</PARA> <PARA><QUOTE><MARKUP>INTERNAL</MARKUP></QUOTE> indicates that the XML processor is required to read and process the internal subset of the DTD, if provided, to parse the document correctly:<PROGRAMLISTING><?XML VERSION="1.0" RMD="INTERNAL"?> <!DOCTYPE foo [ <!ENTITY alephhb cdata "à"> ]> <foo>The first letter is &alephhb;</foo></PROGRAMLISTING><QUOTE><SYSTEMITEM>ALL</SYSTEMITEM></QUOTE> is the default, when no XML Declaration is present, and indicates that the DTD and the internal subset must both be read in order to parse the document correctly. See the <LINK LINKEND="faq-spec">the XML Specification</LINK> for a more detailed description.</PARA> <PARA>The defaults for the other attributes of the XML Declaration are <SGMLTAG>VERSION="1.0"</SGMLTAG> and <SGMLTAG CLASS="ATTRIBUTE">ENCODING="UTF-8"</SGMLTAG>.</PARA></SECT3></SECT2> <SECT2 ID="faq-dtd" REVISIONFLAG="CHANGED"><TITLE>What else has changed between SGML and XML?</TITLE> <PARA ID="faq-restrict">The principal changes are in what you can do in writing a Document Type Definition (DTD). To simplify the syntax and make it easier to write processing software, a large number of markup declaration options have been suppressed (see Appendix A of <LINK LINKEND="faq-spec">the XML Specification</LINK>).</PARA> <PARA REVISIONFLAG="ADDED">A new delimiter is permitted in Names (the colon) for use in experiments with namespaces (enabling DTDs to distinguish element source, ownership, or application). A colon may only appear in mid-name, though, not at the start or the end, and the syntax may change in a future version.</PARA> </SECT2> <SECT2 ID="faq-xmlsoft"><TITLE>What XML software can I use today?</TITLE> <PARA>There is a modification under development for <PRODUCTNAME>Emacs</PRODUCTNAME>/<PRODUCTNAME>psgml-mode</PRODUCTNAME> to handle XML files.</PARA> <PARA>Most of the well-known <ULINK URL="http://www.sil.org/sgml/">SGML vendors</ULINK> are working on XML versions of editors and other tools; the editors with a product released (or very close) so far are:</PARA> <ITEMIZEDLIST><LISTITEM><PARA>GriF's <PRODUCTNAME>Symposia Doc+</PRODUCTNAME> (<FILENAME><ULINK URL="http://www.grif.fr/">http://www.grif.fr/</ULINK></FILENAME>)</PARA></LISTITEM> <LISTITEM><PARA>STiLO's <PRODUCTNAME>WebWriter</PRODUCTNAME> (<FILENAME><ULINK URL="http://www.stilo.com/">http://www.stilo.com/</ULINK></FILENAME>)</PARA></LISTITEM> <LISTITEM><PARA>ArborText's <PRODUCTNAME>ADEPT*Editor</PRODUCTNAME> (<FILENAME><ULINK URL="http://www.stilo.com/">http://www.arbortext.com/</ULINK></FILENAME>)</PARA></LISTITEM> <LISTITEM><PARA>[anyone with details of others please let me know]</PARA></LISTITEM></ITEMIZEDLIST> <PARA>There is a growing number of XML parsers which can be used to check that your files conform to the <LINK LINKEND="faq-spec">Draft XML Specification</LINK>:</PARA> <ITEMIZEDLIST REVISIONFLAG="CHANGED"><LISTITEM><PARA>Norbert Mikula's <PRODUCTNAME>NXP</PRODUCTNAME> at <FILENAME REMAP="ULINK" XREFLABEL="http://edu.uni-klu.ac.at/~nmikula/NXP/"><ULINK URL="http://www.edu.uni-klu.ac.at/~nmikula/NXP/">http://www.edu.uni-klu.ac.at/~nmikula/NXP/</ULINK></FILENAME></PARA></LISTITEM> <LISTITEM><PARA>Tim Bray's <PRODUCTNAME>Lark</PRODUCTNAME> at <FILENAME><ULINK URL="http://www.textuality.com/Lark/">http://www.textuality.com/Lark/</ULINK></FILENAME></PARA></LISTITEM> <LISTITEM><PARA>Sean Russell's Java test kernel at <FILENAME><ULINK URL="http://jersey.uoregon.edu/ser/software/XML.tar.gz">http://jersey.uoregon.edu/ser/software/XML.tar.gz</ULINK></FILENAME></PARA></LISTITEM> <LISTITEM><PARA>Microsoft's <PRODUCTNAME>MSXML</PRODUCTNAME> parser at <FILENAME><ULINK URL="http://www.microsoft.com/standards/xml/xmlparse.htm">http://www.microsoft.com/standards/xml/xmlparse.htm</ULINK></FILENAME></PARA></LISTITEM> <LISTITEM><PARA>Steve Ball's parser in Tcl at <FILENAME><ULINK URL="http://tcltk.anu.edu.au/XML/">http://tcltk.anu.edu.au/XML/</ULINK></FILENAME></PARA></LISTITEM> <LISTITEM><PARA>[anyone with details of others please let me know]</PARA></LISTITEM></ITEMIZEDLIST> <PARA REVISIONFLAG="CHANGED">For browsers see the question on <LINK LINKEND="faq-browser">XML Browsers</LINK> and the details of the <LINK LINKEND="faq-mailinglist"><FILENAME>xml-dev</FILENAME> mailing list</LINK> for software developers. Bert Bos keeps <ULINK URL="http://www.w3.org/XML/notes.html">a list of some XML developments</ULINK> in bison, flex, perl and Python.</PARA></SECT2> <SECT2 ID="faq-swchx"><TITLE>Do I have to change any of my server software to work with XML?</TITLE> <PARA REVISIONFLAG="CHANGED">Only to serve up <FILENAME>.xml</FILENAME> files as the correct MIME type. MIME types of <SYSTEMITEM>text/xml</SYSTEMITEM> and <SYSTEMITEM>application/xml</SYSTEMITEM> have been submitted for approval, so for serving XML documents all that is needed is to edit the <FILENAME>mime-types</FILENAME> file (or its equivalent) and add the lines<PROGRAMLISTING>text/xml xml XML application/xml xsl XSL</PROGRAMLISTING>However, more sophisticated applications may require HTTP content negotiation to determine what tools the client has for display. Also, since XML is designed to support stylesheets and sophisticated hyperlinking, XML documents will be accompanied by ancillary files such as DTDs, entity files, catalogs, stylesheets, etc, which may need their own MIME entry, and which require placing in the appropriate directories.</PARA> <PARA>If you run scripts generating HTML, which you wish to work with XML, they will need to be modified to produce the relevant document type.</PARA></SECT2> <SECT2 ID="faq-ssincludes"><TITLE>Can I still use server-side <MARKUP>INCLUDE</MARKUP>s?</TITLE> <PARA>Yes, so long as what they generate ends up as part of an XML-conformant file (<FOREIGNPHRASE>ie</FOREIGNPHRASE> either <LINK LINKEND="faq-valid">valid</LINK> or just <LINK LINKEND="faq-wf">well-formed</LINK>).</PARA></SECT2> <SECT2 ID="faq-csincludes"><TITLE>Can I (and my authors) still use client-side <MARKUP>INCLUDE</MARKUP>s?</TITLE> <PARA>The same rule applies as for <LINK LINKEND="faq-ssincludes">server-side <MARKUP>INCLUDE</MARKUP>s</LINK>, so you need to ensure that any embedded code which gets passed to a third-party engine (<FOREIGNPHRASE>eg</FOREIGNPHRASE> <PRODUCTNAME>SDQL</PRODUCTNAME> enquiries, <PRODUCTNAME>Java</PRODUCTNAME> <MARKUP>write</MARKUP>s, <PRODUCTNAME>LiveWire</PRODUCTNAME> requests, streamed content, <FOREIGNPHRASE>etc</FOREIGNPHRASE>) does not contain any characters which might be misinterpreted as XML markup (<FOREIGNPHRASE>ie</FOREIGNPHRASE> no angle brackets or ampersands): either use a <MARKUP>CDATA</MARKUP> marked section to avoid your XML application parsing the embedded code, or use the standard <SGMLTAG CLASS="GENENTITY">lt</SGMLTAG>, <SGMLTAG CLASS="GENENTITY">gt</SGMLTAG>, and <SGMLTAG CLASS="GENENTITY">amp</SGMLTAG> character entity references instead.</PARA></SECT2> <SECT2 ID="faq-terms"><TITLE>I'm trying to understand the XML Spec: why does SGML (and XML) have such difficult terminology?</TITLE> <PARA>For implementation to succeed, the terminology needs to be precise.</PARA> <PARA REVISIONFLAG="ADDED">Example: <WORDASWORD>element</WORDASWORD> and <WORDASWORD>tag</WORDASWORD> are not synonymous: an element is a whole unit of information with its markup, and may consist of a start-tag alone (as in HTML's <SGMLTAG CLASS="STARTTAG">BR</SGMLTAG>) or a start-tag and an end-tag <EMPHASIS>and the content which goes between them</EMPHASIS>; tags alone are simply the markers at the start and end of elements.</PARA> <PARA>Sloppy terminology in specifications causes misunderstandings, so formal standards have to be phrased in formal terminology. This is not a formal document, and the astute reader may already have noticed it refers to <WORDASWORD>element names</WORDASWORD> where <WORDASWORD>element type names</WORDASWORD> is more correct; but the former is more widely understood.</PARA> <PARA>Those new to <LINK LINKEND="faq-sgml">SGML</LINK> may want to read something like the <CITETITLE><ULINK URL="http://etext.virginia.edu/bin/tei-tocs?div=DIV1&id=SG">Gentle Introduction to SGML</ULINK></CITETITLE> chapter of the <ULINK URL="http://www.sil.org/sgml/acadapps#tei">TEI</ULINK>.</PARA></SECT2> <SECT2 ID="faq-api"><TITLE>Is there a Developer's API kit for XML?</TITLE> <PARA>Several are reported to be under development. The ones I have found so far are:</PARA> <ITEMIZEDLIST><LISTITEM> <PARA>The Language Technology Group has produced the <PRODUCTNAME>LT XML</PRODUCTNAME> toolkit (<FILENAME><ULINK URL="http://www.ltg.ed.ac.uk/software/xml/">http://www.ltg.ed.ac.uk/software/xml/</ULINK></FILENAME>) and the DSSSL Syntax Checker (DSC: <FILENAME><ULINK URL="http://www.ltg.ed.ac.uk/~ht/dsc-blurb.html">http://www.ltg.ed.ac.uk/~ht/dsc-blurb.html</ULINK></FILENAME>).</PARA></LISTITEM> <LISTITEM><PARA>[anyone with details of others please let me know]</PARA></LISTITEM></ITEMIZEDLIST> <PARA>The big SGML conversion and application development engines like <PRODUCTNAME>Balise</PRODUCTNAME>, <PRODUCTNAME>Omnimark</PRODUCTNAME>, and <PRODUCTNAME>SGMLC</PRODUCTNAME> are all working on XML versions. Details of SGML software of all kinds are on <ULINK URL="http://www.sil.org/sgml/">the SGML Web pages</ULINK>.</PARA></SECT2></SECT1> <APPENDIX ID="faq-form"><TITLE>Response and query form</TITLE> <TITLEABBREV>Illustration from <ULINK URL="http://webreview.com/97/05/16/feature/index.html">Dale Dougherty's article in Web Review</ULINK> (courtesy of the publishers).<INLINEGRAPHIC FILEREF="head-xmlfiles.gif" FORMAT="GIF" XREFLABEL="[XMLfiles image]"></INLINEGRAPHIC>&xmlfiles;</TITLEABBREV> <FORM ACTION="http://www.ucc.ie/cgi-bin/uncgi/mailform" METHOD="POST"><INFORMALTABLE> <TGROUP COLS="2"><TBODY><ROW><ENTRY VALIGN="TOP"><PARA>Section and question: <INPUT NAME="QNo" SIZE="6"><DUMMY></INPUT></PARA><VARIABLELIST> <VARLISTENTRY><TERM>New material</TERM> <LISTITEM><PARA><INPUT LANG="checked" TYPE="RADIO" NAME="Type" VALUE="NewQ" CHECKED="CHECKED"><DUMMY></INPUT> New question, answer not known</PARA> <PARA><INPUT TYPE="RADIO" NAME="Type" VALUE="NewA"><DUMMY></INPUT> New question, with sample answer</PARA></LISTITEM></VARLISTENTRY> <VARLISTENTRY><TERM>Corrections to existing wording</TERM> <LISTITEM><PARA><INPUT TYPE="RADIO" NAME="Type" VALUE="ModQ"><DUMMY></INPUT> Correction to an existing question only</PARA> <PARA><INPUT TYPE="RADIO" NAME="Type" VALUE="ModA"><DUMMY></INPUT> Correction to an existing answer only</PARA> <PARA><INPUT TYPE="RADIO" NAME="Type" VALUE="ModB"><DUMMY></INPUT> Correction to both question and answer</PARA></LISTITEM></VARLISTENTRY> <VARLISTENTRY><TERM>Additional material</TERM> <LISTITEM><PARA><INPUT TYPE="RADIO" NAME="Type" VALUE="AddQ"><DUMMY></INPUT> Addition to an existing question only</PARA> <PARA><INPUT TYPE="RADIO" NAME="Type" VALUE="AddA"><DUMMY></INPUT> Addition to an existing answer only</PARA> <PARA><INPUT TYPE="RADIO" NAME="Type" VALUE="AddB"><DUMMY></INPUT> Addition to both question and answer</PARA></LISTITEM></VARLISTENTRY></VARIABLELIST></ENTRY> <ENTRY><VARIABLELIST> <VARLISTENTRY><TERM>Question and Answer</TERM> <LISTITEM><PARA><TEXTAREA NAME="Q" ROWS="5" COLS="32">[replace this with your question]</TEXTAREA></PARA> <PARA><TEXTAREA NAME="A" ROWS="5" COLS="32">[replace this with your model answer]</TEXTAREA></PARA></LISTITEM></VARLISTENTRY> <VARLISTENTRY><TERM>Details</TERM> <LISTITEM><PARA>Your name: <INPUT NAME="sender_Name" SIZE="32"><DUMMY></INPUT></PARA> <PARA>Affiliation: <INPUT NAME="Affiliation" SIZE="32"><DUMMY></INPUT></PARA> <PARA>Email address: <INPUT NAME="sender_Email" SIZE="32"><DUMMY></INPUT></PARA> <PARA><INPUT TYPE="SUBMIT" VALUE=" Submit "><DUMMY></INPUT> <INPUT TYPE="RESET" VALUE=" Clear and start again "><DUMMY></INPUT><INPUT TYPE="HIDDEN" NAME="Version" VALUE="v.1.0"><DUMMY></INPUT></PARA></LISTITEM></VARLISTENTRY></VARIABLELIST></ENTRY></ROW></TBODY></TGROUP></INFORMALTABLE></FORM></APPENDIX></ARTICLE>