home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
OS/2 Shareware BBS: 10 Tools
/
10-Tools.zip
/
xwphescr.zip
/
XWPH0208.ZIP
/
src
/
helpers
/
xml.c
< prev
next >
Wrap
C/C++ Source or Header
|
2002-08-08
|
118KB
|
3,313 lines
/*
*@@sourcefile xml.c:
* XML document handling.
*
* XML support in the XWorkplace Helpers is broken into two
* layers:
*
* -- The bottom layer is implemented by the @expat parser,
* which I have ported and hacked to the xwphelpers.
*
* See xmlparse.c for an introduction.
*
* -- Because expat requires so many callbacks and is non-validating,
* I have added a top layer above the expat library
* which is vaguely modelled after the Document Object Model
* (DOM) standardized by the W3C. That's this file.
*
* This top layer allows you to do two things VERY EASILY:
*
* 1) Parse an XML document (which uses expat internally)
* and build a DOM tree from that. See xmlCreateDOM.
*
* 2) Create a DOM tree in memory and write an XML
* document from that. See xmlCreateDocument.
*
* <B>XML</B>
*
* In order to understand XML myself, I have written a couple of
* glossary entries for the complex XML terminology. See @XML
* for a start.
*
* <B>Document Object Model (DOM)</B>
*
* See @DOM for a general introduction.
*
* DOM really calls for object oriented programming so the various
* structs can inherit from each other. Since this implementation
* was supposed to be a C-only interface, we cannot implement
* inheritance at the language level. Instead, each XML document
* is broken up into a tree of node structures only (see DOMNODE),
* each of which has a special type. The W3C DOM allows this
* (and calls this the "flattened" view, as opposed to the
* "inheritance view").
*
* The W3C DOM specification prescribes tons of methods, which I
* really had no use for, so I didn't implement them. This implementation
* is only a DOM insofar as it uses nodes which represent @documents,
* @elements, @attributes, @comments, and @processing_instructions.
*
* Most notably, there are the following differences:
*
* -- External entities don't work yet. As a result, DOCTYPE's
* only make sense if the entire DTD is in the same document
* (internal subset).
*
* -- Not all node types are implemented. See DOMNODE for
* the supported types.
*
* -- Only a subset of the standardized methods is implemented,
* and they are called differently to adhere to the xwphelpers
* conventions.
*
* -- DOM uses UTF-16 for its DOMString type. @expat gives UTF-8
* strings to all the handlers though, so all data in the DOM nodes
* is UTF-8 encoded. This still needs to be fixed.
*
* -- DOM defines the DOMException class. This isn't supported in C.
* Instead, we use special error codes which add to the standard
* OS/2 error codes (APIRET). All our error codes are >= 40000
* to avoid conflicts.
*
* It shouldn't be too difficult to write a C++ encapsulation
* of this though which fully implements all the DOM methods.
*
* However, we do implement node management as in the standard.
* See xmlCreateDomNode and xmlDeleteNode.
*
* The main entry point into this is xmlCreateDOM. See remarks
* there for how this will be typically used.
*
* <B>Validation</B>
*
* @expat doesn't check XML documents for whether they are @valid.
* In other words, expat is a non-validating XML processor.
*
* By contrast, this pseudo-DOM implementation can validate to
* a certain extent.
*
* -- If you pass DF_PARSEDTD to xmlCreateDOM, the DTD will be
* parsed and the document will be validated against it.
* Validation is working as far as elements and attributes
* are checked for proper nesting. However, we cannot fully
* check for proper ordering etc. in (children) mode of
* @element_declarations. This will only check for whether
* elements may appear in another element at all -- not for
* the correct order.
*
* -- Otherwise the @DTD entries will not be stored in the DOM
* nodes, and no validation occurs. Still, if a DTD exists,
* @expat will insert attributes that have a default value
* in their @attribute declaraion and have not been specified.
*
*@@header "helpers\xml.h"
*@@added V0.9.6 (2000-10-29) [umoeller]
*/
/*
* Copyright (C) 2000-2002 Ulrich Möller.
* This file is part of the "XWorkplace helpers" source package.
* This is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published
* by the Free Software Foundation, in version 2 as it comes in the
* "COPYING" file of the XWorkplace main distribution.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#define OS2EMX_PLAIN_CHAR
// this is needed for "os2emx.h"; if this is defined,
// emx will define PSZ as _signed_ char, otherwise
// as unsigned char
#define INCL_DOSERRORS
#include <os2.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "setup.h" // code generation and debugging options
#include "expat\expat.h"
#include "helpers\linklist.h"
#include "helpers\standards.h"
#include "helpers\stringh.h"
#include "helpers\tree.h"
#include "helpers\xstring.h"
#include "helpers\xml.h"
#pragma hdrstop
/*
*@@category: Helpers\XML
* see xml.c.
*/
/*
*@@category: Helpers\XML\Document Object Model (DOM)
* see xml.c.
*/
/* ******************************************************************
*
* Error handling
*
********************************************************************/
/*
*@@ xmlDescribeError:
* returns a string describing the error corresponding to code.
* The code should be one of the enums that can be returned from
* XML_GetErrorCode.
*
*@@changed V0.9.9 (2001-02-14) [umoeller]: adjusted for new error codes
*@@changed V0.9.9 (2001-02-16) [umoeller]: moved this here from xmlparse.c
*/
const char* xmlDescribeError(int code)
{
switch (code)
{
// start of expat (parser) errors
case ERROR_EXPAT_NO_MEMORY:
return ("Out of memory");
case ERROR_EXPAT_SYNTAX:
return ("Syntax error");
case ERROR_EXPAT_NO_ELEMENTS:
return ("No element found");
case ERROR_EXPAT_INVALID_TOKEN:
return ("Not well-formed (invalid token)");
case ERROR_EXPAT_UNCLOSED_TOKEN:
return ("Unclosed token");
case ERROR_EXPAT_PARTIAL_CHAR:
return ("Unclosed token");
case ERROR_EXPAT_TAG_MISMATCH:
return ("Mismatched tag");
case ERROR_EXPAT_DUPLICATE_ATTRIBUTE:
return ("Duplicate attribute");
case ERROR_EXPAT_JUNK_AFTER_DOC_ELEMENT:
return ("Junk after root element");
case ERROR_EXPAT_PARAM_ENTITY_REF:
return ("Illegal parameter entity reference");
case ERROR_EXPAT_UNDEFINED_ENTITY:
return ("Undefined entity");
case ERROR_EXPAT_RECURSIVE_ENTITY_REF:
return ("Recursive entity reference");
case ERROR_EXPAT_ASYNC_ENTITY:
return ("Asynchronous entity");
case ERROR_EXPAT_BAD_CHAR_REF:
return ("Reference to invalid character number");
case ERROR_EXPAT_BINARY_ENTITY_REF:
return ("Reference to binary entity");
case ERROR_EXPAT_ATTRIBUTE_EXTERNAL_ENTITY_REF:
return ("Reference to external entity in attribute");
case ERROR_EXPAT_MISPLACED_XML_PI:
return ("XML processing instruction not at start of external entity");
case ERROR_EXPAT_UNKNOWN_ENCODING:
return ("Unknown encoding");
case ERROR_EXPAT_INCORRECT_ENCODING:
return ("Encoding specified in XML declaration is incorrect");
case ERROR_EXPAT_UNCLOSED_CDATA_SECTION:
return ("Unclosed CDATA section");
case ERROR_EXPAT_EXTERNAL_ENTITY_HANDLING:
return ("Error in processing external entity reference");
case ERROR_EXPAT_NOT_STANDALONE:
return ("Document is not standalone");
case ERROR_EXPAT_UNEXPECTED_STATE:
return ("Unexpected parser state - please send a bug report");
// end of expat (parser) errors
// start of validation errors
case ERROR_DOM_UNDECLARED_ELEMENT:
return ("Element has not been declared");
case ERROR_DOM_ROOT_ELEMENT_MISNAMED:
return ("Root element name does not match DOCTYPE name");
case ERROR_DOM_INVALID_ROOT_ELEMENT:
return ("Invalid or duplicate root element");
case ERROR_DOM_INVALID_SUBELEMENT:
return ("Invalid sub-element in parent element");
case ERROR_DOM_DUPLICATE_ELEMENT_DECL:
return ("Duplicate element declaration");
case ERROR_DOM_DUPLICATE_ATTRIBUTE_DECL:
return ("Duplicate attribute declaration");
case ERROR_DOM_UNDECLARED_ATTRIBUTE:
return ("Undeclared attribute in element");
case ERROR_ELEMENT_CANNOT_HAVE_CONTENT:
return ("Element cannot have content");
case ERROR_DOM_INVALID_ATTRIB_VALUE:
return ("Invalid attribute value");
case ERROR_DOM_REQUIRED_ATTRIBUTE_MISSING:
return ("Required attribute is missing");
case ERROR_DOM_SUBELEMENT_IN_EMPTY_ELEMENT:
return ("Subelement in empty element");
case ERROR_DOM_PARSING:
return ("Parsing error");
case ERROR_DOM_VALIDITY:
return ("Validity error");
case ERROR_DOM_NODETYPE_NOT_SUPPORTED:
return ("DOM node type not supported");
case ERROR_DOM_NO_DOCUMENT:
return ("No DOM document");
case ERROR_DOM_NO_ELEMENT:
return ("No DOM element");
case ERROR_DOM_DUPLICATE_DOCTYPE:
return ("Duplicate doctype");
case ERROR_DOM_DOCTYPE_ROOT_NAMES_MISMATCH:
return ("Root element doesn't match doctype name");
case ERROR_DOM_INTEGRITY:
return ("DOM integrity error");
case ERROR_DOM_DUPLICATE_ATTRIBUTE:
return ("Duplicate attribute");
case ERROR_DOM_VALIDATE_INVALID_ELEMENT:
return ("Validation error: Undeclared element name");
case ERROR_DOM_ELEMENT_DECL_OUTSIDE_DOCTYPE:
return ("Element declaration outside doctype");
case ERROR_DOM_ATTLIST_DECL_OUTSIDE_DOCTYPE:
return ("Attlist declaration outside doctype");
case ERROR_DOM_INCOMPLETE_ENCODING_MAP:
return ("Incomplete encoding map specified");
case ERROR_DOM_INVALID_EXTERNAL_HANDLER:
return ("Invalid 'external' handler specified");
}
return NULL;
}
/*
*@@ xmlSetError:
* sets the DOM's error state and stores error information
* and parser position.
*
*@@added V0.9.9 (2001-02-16) [umoeller]
*/
VOID xmlSetError(PXMLDOM pDom,
APIRET arc,
const char *pcszFailing,
BOOL fValidityError) // in: if TRUE, this is a validation error;
// if FALSE, this is a parser error
{
pDom->arcDOM = arc;
pDom->pcszErrorDescription = xmlDescribeError(pDom->arcDOM);
pDom->ulErrorLine = XML_GetCurrentLineNumber(pDom->pParser);
pDom->ulErrorColumn = XML_GetCurrentColumnNumber(pDom->pParser);
if (pcszFailing)
{
if (!pDom->pxstrFailingNode)
pDom->pxstrFailingNode = xstrCreate(0);
xstrcpy(pDom->pxstrFailingNode, pcszFailing, 0);
}
if (fValidityError)
pDom->fInvalid = TRUE;
}
/* ******************************************************************
*
* Most basic node management
*
********************************************************************/
/*
*@@ CompareXStrings:
* tree comparison func for NodeBases.
* This works for all trees which contain structures
* whose first item is a _NODEBASE because NODEBASE's first
* member is a TREE.
*
* Used in two places:
*
* -- to insert _CMELEMENTDECLNODE nodes into
* _DOMDOCTYPENODE.ElementDeclsTree;
*
* -- to insert _CMELEMENTPARTICLE nodes into
* _CMELEMENTDECLNODE.ElementNamesTree.
*
*@@added V0.9.9 (2001-02-16) [umoeller]
*@@changed V0.9.14 (2001-08-09) [umoeller]: fixed map bug which caused the whole XML stuff to fail
*/
static int TREEENTRY CompareXStrings(ULONG ul1,
ULONG ul2)
{
return (strhcmp(((PXSTRING)ul1)->psz,
((PXSTRING)ul2)->psz));
}
/*
*@@ xmlCreateNodeBase:
* creates a new NODEBASE node.
*
* Gets called from xmlCreateDomNode also to create
* a DOMNODE, since that in turn has a NODEBASE.
*
*@@added V0.9.9 (2001-02-16) [umoeller]
*/
APIRET xmlCreateNodeBase(NODEBASETYPE ulNodeType, // in: node type
ULONG cb, // in: size of struct
const char *pcszNodeName, // in: node name or NULL
ULONG ulNodeNameLength, // in: node name length
// or 0 to run strlen(pcszNodeName)
PNODEBASE *ppNew) // out: new node
{
APIRET arc = NO_ERROR;
PNODEBASE pNewNode = (PNODEBASE)malloc(cb);
if (!pNewNode)
arc = ERROR_NOT_ENOUGH_MEMORY;
else
{
memset(pNewNode, 0, cb);
pNewNode->ulNodeType = ulNodeType;
pNewNode->Tree.ulKey = (ULONG)&pNewNode->strNodeName;
xstrInit(&pNewNode->strNodeName, 0);
if (pcszNodeName)
{
xstrcpy(&pNewNode->strNodeName,
pcszNodeName,
ulNodeNameLength);
}
*ppNew = pNewNode;
}
return arc;
}
/*
*@@ xmlDeleteNode:
* deletes a NODEBASE and frees memory that was
* associated with its members.
*
* After calling this, pNode is no longer valid.
*
* If you invoke this on a DOCUMENT node, the
* entire DOM tree will get deleted recursively.
*
*@@added V0.9.9 (2001-02-16) [umoeller]
*@@changed V0.9.14 (2001-08-09) [umoeller]: fixed crash on string delete
*/
VOID xmlDeleteNode(PNODEBASE pNode)
{
if (pNode)
{
PLISTNODE pNodeThis;
PDOMNODE pDomNode = NULL;
LINKLIST llDeleteNodes; // list that nodes to be deleted
// can be appended to
PLISTNODE pDelNode;
lstInit(&llDeleteNodes, FALSE);
// now handle special types and their allocations
switch (pNode->ulNodeType)
{
case DOMNODE_ELEMENT:
{
PDOMNODE pAttrib;
pDomNode = (PDOMNODE)pNode;
// delete all attribute nodes
pAttrib = (PDOMNODE)treeFirst(pDomNode->AttributesMap);
while (pAttrib)
{
// call delete recursively
PDOMNODE pNext = (PDOMNODE)treeNext((TREE*)pAttrib);
xmlDeleteNode((PNODEBASE)pAttrib);
// this will remove pAttrib from pNode's attrib
// tree and rebalance the tree
pAttrib = pNext;
}
break; }
case DOMNODE_ATTRIBUTE:
case DOMNODE_TEXT:
case DOMNODE_PROCESSING_INSTRUCTION:
case DOMNODE_COMMENT:
pDomNode = (PDOMNODE)pNode;
break;
case DOMNODE_DOCUMENT:
if (((PDOMDOCUMENTNODE)pNode)->pDocType)
xmlDeleteNode((PNODEBASE)((PDOMDOCUMENTNODE)pNode)->pDocType);
pDomNode = (PDOMNODE)pNode;
break;
case DOMNODE_DOCUMENT_TYPE:
{
PDOMDOCTYPENODE pDocType = (PDOMDOCTYPENODE)pNode;
PCMELEMENTDECLNODE pElDecl;
PCMATTRIBUTEDECLBASE pAttrDeclBase;
pDomNode = (PDOMNODE)pNode;
pElDecl = (PCMELEMENTDECLNODE)treeFirst(pDocType->ElementDeclsTree);
while (pElDecl)
{
lstAppendItem(&llDeleteNodes, pElDecl);
pElDecl = (PCMELEMENTDECLNODE)treeNext((TREE*)pElDecl);
}
pAttrDeclBase = (PCMATTRIBUTEDECLBASE)treeFirst(pDocType->AttribDeclBasesTree);
while (pAttrDeclBase)
{
lstAppendItem(&llDeleteNodes, pAttrDeclBase);
pAttrDeclBase = (PCMATTRIBUTEDECLBASE)treeNext((TREE*)pAttrDeclBase);
}
xstrClear(&pDocType->strPublicID);
xstrClear(&pDocType->strSystemID);
break; }
case ELEMENTPARTICLE_EMPTY:
case ELEMENTPARTICLE_ANY:
case ELEMENTPARTICLE_MIXED:
case ELEMENTPARTICLE_CHOICE:
case ELEMENTPARTICLE_SEQ:
case ELEMENTPARTICLE_NAME:
{
PCMELEMENTPARTICLE pp = (PCMELEMENTPARTICLE)pNode;
if (pp->pllSubNodes)
{
pDelNode = lstQueryFirstNode(pp->pllSubNodes);
while (pDelNode)
{
PCMELEMENTPARTICLE
pParticle = (PCMELEMENTPARTICLE)pDelNode->pItemData;
xmlDeleteNode((PNODEBASE)pParticle);
// treeDelete(pp-> // @@todo
pDelNode = pDelNode->pNext;
}
}
break; }
// case ATTRIBUTE_DECLARATION_ENUM: // this is a plain NODEBASE
case ATTRIBUTE_DECLARATION:
{
// PCMATTRIBUTEDECL pDecl = (PCMATTRIBUTEDECL)pNode;
break; }
case ATTRIBUTE_DECLARATION_BASE:
break;
}
if (pDomNode)
{
// recurse into child nodes
while (pNodeThis = lstQueryFirstNode(&pDomNode->llChildren))
// recurse!!
xmlDeleteNode((PNODEBASE)(pNodeThis->pItemData));
// this updates llChildren
if (pDomNode->pParentNode)
{
// node has a parent:
if (pNode->ulNodeType == DOMNODE_ATTRIBUTE)
// this is an attribute:
// remove from parent's attributes map
treeDelete(&pDomNode->pParentNode->AttributesMap,
NULL,
(TREE*)pNode);
else
// remove this node from the parent's list
// of child nodes before deleting this node
lstRemoveItem(&pDomNode->pParentNode->llChildren,
pNode);
pDomNode->pParentNode = NULL;
}
xstrFree(&pDomNode->pstrNodeValue);
lstClear(&pDomNode->llChildren);
}
pDelNode = lstQueryFirstNode(&llDeleteNodes);
while (pDelNode)
{
PNODEBASE pNodeBase = (PNODEBASE)pDelNode->pItemData;
xmlDeleteNode(pNodeBase);
pDelNode = pDelNode->pNext;
}
lstClear(&llDeleteNodes);
xstrClear(&pNode->strNodeName);
free(pNode);
}
}
/*
*@@ xmlCreateDomNode:
* creates a new DOMNODE with the specified
* type and parent. Other than that, the
* node fields are zeroed.
*
* If pParentNode is specified (which is required,
* unless you are creating a document node),
* its children list is automatically updated
* (unless this is an attribute node, which updates
* the attributes map).
*
* This returns the following errors:
*
* -- ERROR_NOT_ENOUGH_MEMORY
*
* -- ERROR_DOM_NOT_SUPPORTED: invalid ulNodeType
* specified.
*
* -- ERROR_DOM_WRONG_DOCUMENT: cannot find the
* document for this node. This happens if you do
* not have a document node at the root of your tree.
*/
APIRET xmlCreateDomNode(PDOMNODE pParentNode, // in: parent node or NULL if root
NODEBASETYPE ulNodeType, // in: DOMNODE_* type
const char *pcszNodeName, // in: node name or NULL
ULONG ulNodeNameLength, // in: node name length
// or 0 to run strlen(pcszNodeName)
PDOMNODE *ppNew) // out: new node
{
PDOMNODE pNewNode = NULL;
APIRET arc = NO_ERROR;
ULONG cb = 0;
switch (ulNodeType)
{
case DOMNODE_DOCUMENT:
cb = sizeof(DOMDOCUMENTNODE);
break;
case DOMNODE_DOCUMENT_TYPE:
cb = sizeof(DOMDOCTYPENODE);
break;
default:
cb = sizeof(DOMNODE);
break;
}
if (!(arc = xmlCreateNodeBase(ulNodeType,
cb,
pcszNodeName,
ulNodeNameLength,
(PNODEBASE*)&pNewNode)))
{
pNewNode->pParentNode = pParentNode;
if (pParentNode)
{
// parent specified:
// check if this is an attribute
if (ulNodeType == DOMNODE_ATTRIBUTE)
{
// attribute:
// add to parent's attributes list
if (treeInsert(&pParentNode->AttributesMap,
NULL,
&pNewNode->NodeBase.Tree,
CompareXStrings))
arc = ERROR_DOM_DUPLICATE_ATTRIBUTE;
// shouldn't happen, because expat takes care of this
}
else
// append this new node to the parent's
// list of child nodes
lstAppendItem(&pParentNode->llChildren,
pNewNode);
if (!arc)
{
// set document pointer...
// if the parent node has a document pointer,
// we can copy that
if (pParentNode->pDocumentNode)
pNewNode->pDocumentNode = pParentNode->pDocumentNode;
else
// parent has no document pointer: then it is probably
// the document itself... check
if (pParentNode->NodeBase.ulNodeType == DOMNODE_DOCUMENT)
pNewNode->pDocumentNode = pParentNode;
else
arc = ERROR_DOM_NO_DOCUMENT;
}
}
lstInit(&pNewNode->llChildren, FALSE);
treeInit(&pNewNode->AttributesMap, NULL);
}
if (!arc)
*ppNew = pNewNode;
else
if (pNewNode)
free(pNewNode);
return arc;
}
/* ******************************************************************
*
* Specific DOM node constructors
*
********************************************************************/
/*
*@@ xmlCreateElementNode:
* creates a new element node with the specified name.
*
*@@added V0.9.9 (2001-02-14) [umoeller]
*/
APIRET xmlCreateElementNode(PDOMNODE pParent, // in: parent node (either document or element)
const char *pcszElement, // in: element name (null-terminated)
PDOMNODE *ppNew)
{
PDOMNODE pNew = NULL;
APIRET arc;
if (!(arc = xmlCreateDomNode(pParent,
DOMNODE_ELEMENT,
pcszElement,
0,
&pNew)))
*ppNew = pNew;
return arc;
}
/*
*@@ xmlCreateAttributeNode:
* creates a new attribute node with the specified data.
*
* NOTE: Attributes have no "parent" node, technically.
* They are added to a special, separate list in @DOM_ELEMENT
* nodes.
*
* This returns the following errors:
*
* -- Error codes from xmlCreateDomNode.
*
* -- ERROR_DOM_NO_ELEMENT: pElement is invalid or does
* not point to an @DOM_ELEMENT node.
*
*@@added V0.9.9 (2001-02-14) [umoeller]
*/
APIRET xmlCreateAttributeNode(PDOMNODE pElement, // in: element node
const char *pcszName, // in: attribute name (null-terminated)
const char *pcszValue, // in: attribute value (null-terminated)
PDOMNODE *ppNew)
{
APIRET arc = NO_ERROR;
if ( (!pElement)
|| (pElement->NodeBase.ulNodeType != DOMNODE_ELEMENT)
)
arc = ERROR_DOM_NO_ELEMENT;
else
{
PDOMNODE pNew = NULL;
if (!(arc = xmlCreateDomNode(pElement, // this takes care of adding to the list
DOMNODE_ATTRIBUTE,
pcszName,
0,
&pNew)))
{
pNew->pstrNodeValue = xstrCreate(0);
xstrcpy(pNew->pstrNodeValue, pcszValue, 0);
*ppNew = pNew;
}
}
return arc;
}
/*
*@@ xmlCreateTextNode:
* creates a new text node with the specified content.
*
* Note: This differs from the createText method
* as specified by DOM, which has no ulLength parameter.
* We need this for speed with @expat though.
*
*@@added V0.9.9 (2001-02-14) [umoeller]
*/
APIRET xmlCreateTextNode(PDOMNODE pParent, // in: parent element node
const char *pcszText, // in: ptr to start of text
ULONG ulLength, // in: length of *pcszText
PDOMNODE *ppNew)
{
PDOMNODE pNew = NULL;
APIRET arc;
if (!(arc = xmlCreateDomNode(pParent,
DOMNODE_TEXT,
NULL,
0,
&pNew)))
{
PSZ pszNodeValue;
if (pszNodeValue = (PSZ)malloc(ulLength + 1))
{
memcpy(pszNodeValue, pcszText, ulLength);
pszNodeValue[ulLength] = '\0';
pNew->pstrNodeValue = xstrCreate(0);
xstrset(pNew->pstrNodeValue, pszNodeValue);
*ppNew = pNew;
}
else
{
arc = ERROR_NOT_ENOUGH_MEMORY;
xmlDeleteNode((PNODEBASE)pNew);
}
}
return arc;
}
/*
*@@ xmlCreateCommentNode:
* creates a new comment node with the specified
* content.
*
*@@added V0.9.9 (2001-02-14) [umoeller]
*/
APIRET xmlCreateCommentNode(PDOMNODE pParent, // in: parent element node
const char *pcszText, // in: comment (null-terminated)
PDOMNODE *ppNew)
{
PDOMNODE pNew = NULL;
APIRET arc;
if (!(arc = xmlCreateDomNode(pParent,
DOMNODE_COMMENT,
NULL,
0,
&pNew)))
{
pNew->pstrNodeValue = xstrCreate(0);
xstrcpy(pNew->pstrNodeValue, pcszText, 0);
*ppNew = pNew;
}
return arc;
}
/*
*@@ xmlCreatePINode:
* creates a new processing instruction node with the
* specified data.
*
*@@added V0.9.9 (2001-02-14) [umoeller]
*/
APIRET xmlCreatePINode(PDOMNODE pParent, // in: parent element node
const char *pcszTarget, // in: PI target (null-terminated)
const char *pcszData, // in: PI data (null-terminated)
PDOMNODE *ppNew)
{
PDOMNODE pNew = NULL;
APIRET arc;
if (!(arc = xmlCreateDomNode(pParent,
DOMNODE_PROCESSING_INSTRUCTION,
pcszTarget,
0,
&pNew)))
{
pNew->pstrNodeValue = xstrCreate(0);
xstrcpy(pNew->pstrNodeValue, pcszData, 0);
*ppNew = pNew;
}
return arc;
}
/*
*@@ xmlCreateDocumentTypeNode:
* creates a new document type node with the
* specified data.
*
*@@added V0.9.9 (2001-02-14) [umoeller]
*/
APIRET xmlCreateDocumentTypeNode(PDOMDOCUMENTNODE pDocumentNode, // in: document node
const char *pcszDoctypeName,
const char *pcszSysid,
const char *pcszPubid,
int fHasInternalSubset,
PDOMDOCTYPENODE *ppNew)
{
APIRET arc = NO_ERROR;
if (pDocumentNode->pDocType)
// we already have a doctype:
arc = ERROR_DOM_DUPLICATE_DOCTYPE;
else
{
// create doctype node
PDOMDOCTYPENODE pNew = NULL;
if (!(arc = xmlCreateDomNode((PDOMNODE)pDocumentNode,
DOMNODE_DOCUMENT_TYPE,
pcszDoctypeName,
0,
(PDOMNODE*)&pNew)))
{
// the node has already been added to the children
// list of the document node... in addition, set
// the doctype field in the document
pDocumentNode->pDocType = pNew;
// initialize the extra fields
xstrcpy(&pNew->strPublicID, pcszPubid, 0);
xstrcpy(&pNew->strSystemID, pcszSysid, 0);
pNew->fHasInternalSubset = fHasInternalSubset;
treeInit(&pNew->ElementDeclsTree, NULL);
treeInit(&pNew->AttribDeclBasesTree, NULL);
*ppNew = pNew;
}
}
return arc;
}
/* ******************************************************************
*
* DOM level 3 content models
*
********************************************************************/
/*
*@@ SetupParticleAndSubs:
*
* This creates sub-particles and recurses to set them up,
* if necessary.
*
*@@added V0.9.9 (2001-02-16) [umoeller]
*/
static APIRET SetupParticleAndSubs(PCMELEMENTPARTICLE pParticle,
PXMLCONTENT pModel,
TREE **ppElementNamesTree) // in: ptr to _CMELEMENTDECLNODE.ElementNamesTree
// (passed to all recursions)
{
APIRET arc = NO_ERROR;
// set up member NODEBASE
switch (pModel->type)
{
case XML_CTYPE_EMPTY: // that's easy
pParticle->NodeBase.ulNodeType = ELEMENTPARTICLE_EMPTY;
break;
case XML_CTYPE_ANY: // that's easy
pParticle->NodeBase.ulNodeType = ELEMENTPARTICLE_ANY;
break;
case XML_CTYPE_NAME: // that's easy
pParticle->NodeBase.ulNodeType = ELEMENTPARTICLE_NAME;
xstrcpy(&pParticle->NodeBase.strNodeName, pModel->name, 0);
treeInsert(ppElementNamesTree,
NULL,
&pParticle->NodeBase.Tree,
CompareXStrings);
break;
case XML_CTYPE_MIXED:
pParticle->NodeBase.ulNodeType = ELEMENTPARTICLE_MIXED;
break;
case XML_CTYPE_CHOICE:
pParticle->NodeBase.ulNodeType = ELEMENTPARTICLE_CHOICE;
break;
case XML_CTYPE_SEQ:
pParticle->NodeBase.ulNodeType = ELEMENTPARTICLE_SEQ;
break;
}
pParticle->ulRepeater = pModel->quant;
if (pModel->numchildren)
{
// these are the three cases where we have subnodes
// in the XMLCONTENT... go for these and recurse
ULONG ul;
pParticle->pllSubNodes = lstCreate(FALSE);
for (ul = 0;
ul < pModel->numchildren;
ul++)
{
PXMLCONTENT pSubModel = &pModel->children[ul];
PCMELEMENTPARTICLE pSubNew = NULL;
if (!(arc = xmlCreateNodeBase(TYPE_UNKNOWN, // node type... for now
sizeof(CMELEMENTPARTICLE),
0,
0,
(PNODEBASE*)&pSubNew)))
{
if (!(arc = SetupParticleAndSubs(pSubNew,
pSubModel,
ppElementNamesTree)))
{
// no error: append sub-particle to this particle's
// children list
lstAppendItem(pParticle->pllSubNodes,
pSubNew);
// and store this particle as the parent in the
// sub-particle
pSubNew->pParentParticle = pParticle;
}
}
if (arc)
break;
}
}
return arc;
}
/*
*@@ xmlCreateElementDecl:
* creates a new _CMELEMENTDECLNODE for the specified
* _XMLCONTENT content model (which is the @expat structure).
* This recurses, if necessary.
*
* NOTE: As opposed to the other "create" functions,
* this does not take a parent node as input. If this
* returns NO_ERROR, it is the caller's responsibility
* to add the produced node to the document's DOCTYPE node.
*
*@@added V0.9.9 (2001-02-16) [umoeller]
*/
APIRET xmlCreateElementDecl(const char *pcszName,
PXMLCONTENT pModel,
PCMELEMENTDECLNODE *ppNew)
{
APIRET arc = NO_ERROR;
PCMELEMENTDECLNODE pNew = NULL;
if (!(arc = xmlCreateNodeBase(TYPE_UNKNOWN, // for now
sizeof(CMELEMENTDECLNODE),
pcszName,
0,
(PNODEBASE*)&pNew)))
{
treeInit(&pNew->ParticleNamesTree, NULL);
// set up the "particle" member and recurse into sub-particles
if (!(arc = SetupParticleAndSubs(&pNew->Particle,
pModel,
&pNew->ParticleNamesTree)))
*ppNew = pNew;
else
free(pNew);
}
return arc;
}
/*
*@@ ValidateElementChildren
* validates the specified element against the document's @DTD,
* more specifically, against the element declaration of the
* new element's parent.
*
* This sets arcDOM in XMLDOM on errors.
*
* According to the XML spec, an element is valid if there
* is a declaration matching the element declaration where the
* element's name matches the element type, and _one_ of the
* following holds:
*
* (1) The declaration matches EMPTY and the element has no @content. (done)
*
* (2) The declaration matches (children) (see @element_declaration)
* and the sequence of child elements belongs to the language
* generated by the regular expression in the content model, with
* optional @white_space between the start-tag and the first child
* element, between child elements, or between the last
* child element and the end-tag. Note that a CDATA section
* is never considered "whitespace", even if it contains
* white space only. @@todo
*
* (3) The declaration matches (mixed) (see @element_declaration)
* and the content consists of @content and child elements
* whose types match names in the content model. (done)
*
* (4) The declaration matches ANY, and the types of any child
* elements have been declared. (done)
*
* Preconditions: The element must already have been inserted
* into the parent element's list, or we cannot validate sequences.
*
*@@added V0.9.9 (2001-02-16) [umoeller]
*/
static VOID ValidateElement(PXMLDOM pDom,
PDOMNODE pNewElement, // in: new element
PCMELEMENTDECLNODE pParentElementDecl)
// in: element decl of element's parent
{
if (pDom && pNewElement)
{
if (!pParentElementDecl)
{
// this is always missing for the root element, of course,
// because the parent is the document
if (pNewElement->pParentNode == (PDOMNODE)pDom->pDocumentNode)
return; // that's OK
else
xmlSetError(pDom,
ERROR_DOM_VALIDATE_INVALID_ELEMENT,
pNewElement->NodeBase.strNodeName.psz,
TRUE);
}
else
{
ULONG ulDeclType = pParentElementDecl->Particle.NodeBase.ulNodeType;
switch (ulDeclType)
{
case ELEMENTPARTICLE_EMPTY:
// this is an error for sure
xmlSetError(pDom,
ERROR_DOM_SUBELEMENT_IN_EMPTY_ELEMENT,
pNewElement->NodeBase.strNodeName.psz,
TRUE);
break;
case ELEMENTPARTICLE_ANY:
// that's always OK
break;
case ELEMENTPARTICLE_MIXED:
case ELEMENTPARTICLE_CHOICE:
case ELEMENTPARTICLE_SEQ:
{
PXSTRING pstrNewElementName
= &pNewElement->NodeBase.strNodeName;
// for all these, we first need to check if
// the element is allowed at all
PCMELEMENTPARTICLE pParticle
= (PCMELEMENTPARTICLE)treeFind(
pParentElementDecl->ParticleNamesTree,
(ULONG)pstrNewElementName,
CompareXStrings);
if (!pParticle)
// not found: then this element is not allowed within this
// parent
xmlSetError(pDom,
ERROR_DOM_INVALID_SUBELEMENT,
pstrNewElementName->psz,
TRUE);
else
{
// the element is allowed at all: now check for the
// lists case... @@todo
switch (ulDeclType)
{
case ELEMENTPARTICLE_CHOICE:
break;
case ELEMENTPARTICLE_SEQ:
break;
}
}
break; }
}
}
}
else
pDom->arcDOM = ERROR_INVALID_PARAMETER;
// yes: get the element decl from the tree
/* PCMELEMENTDECLNODE pElementDecl = xmlFindElementDecl(pDom,
&pElement->NodeBase.strNodeName);
if (!pElementDecl)
{
xmlSetError(pDom,
ERROR_DOM_UNDECLARED_ELEMENT,
pElement->NodeBase.strNodeName.psz,
TRUE);
}
else
{
// element has been declared:
// check if it may appear in this element's parent...
PDOMNODE pParentElement = pElement->pParentNode;
if (!pParentElement)
pDom->arcDOM = ERROR_DOM_INTEGRITY;
else switch (pParentElement->NodeBase.ulNodeType)
{
case DOMNODE_DOCUMENT:
{
// if this is the root element, compare its name
// to the DOCTYPE name
if (pParentElement != (PDOMNODE)pDom->pDocumentNode)
xmlSetError(pDom,
ERROR_DOM_INVALID_ROOT_ELEMENT,
pElement->NodeBase.strNodeName.psz,
TRUE);
else if (strcmp(pDom->pDocumentNode->DomNode.NodeBase.strNodeName.psz,
pElement->NodeBase.strNodeName.psz))
// no match:
xmlSetError(pDom,
ERROR_DOM_ROOT_ELEMENT_MISNAMED,
pElement->NodeBase.strNodeName.psz,
TRUE);
break; }
case DOMNODE_ELEMENT:
{
// parent of element is another element:
// check the parent in the DTD and find out if
// this element may appear in the parent element
PCMELEMENTDECLNODE pParentElementDecl
= xmlFindElementDecl(pDom,
&pParentElement->NodeBase.strNodeName);
if (!pParentElementDecl)
pDom->arcDOM = ERROR_DOM_INTEGRITY;
else
{
// now check the element names tree of the parent element decl
// for whether this element is allowed as a sub-element at all
PCMELEMENTPARTICLE pParticle
= treeFindEQData(&pParentElementDecl->ParticleNamesTree,
(void*)pElement->NodeBase.strNodeName.psz,
CompareNodeBaseData);
if (!pParticle)
// not found: then this element is not allowed within this
// parent
xmlSetError(pDom,
ERROR_DOM_INVALID_SUBELEMENT,
pElement->NodeBase.strNodeName.psz,
TRUE);
}
break; }
}
}
*/
}
/*
*@@ ValidateAttributeType:
* validates the specified attribute's type against the
* document's @DTD.
*
* This sets arcDOM in XMLDOM on errors.
*
*@@added V0.9.9 (2001-02-16) [umoeller]
*/
static VOID ValidateAttributeType(PXMLDOM pDom,
PDOMNODE pAttrib,
PCMATTRIBUTEDECLBASE *ppAttribDeclBase)
{
PDOMNODE pElement = pAttrib->pParentNode;
PCMATTRIBUTEDECL pAttribDecl = xmlFindAttribDecl(pDom,
&pElement->NodeBase.strNodeName,
&pAttrib->NodeBase.strNodeName,
ppAttribDeclBase);
if (!pAttribDecl)
xmlSetError(pDom,
ERROR_DOM_UNDECLARED_ATTRIBUTE,
pAttrib->NodeBase.strNodeName.psz,
TRUE);
else
{
// check if the attribute value is allowed
switch (pAttribDecl->ulAttrType)
{
case CMAT_CDATA:
case CMAT_ID:
case CMAT_IDREF:
case CMAT_IDREFS: // ###
case CMAT_ENTITY:
case CMAT_ENTITIES:
case CMAT_NMTOKEN:
case CMAT_NMTOKENS:
break;
case CMAT_ENUM:
{
// enumeration: then check if it has one of the
// allowed values
PNODEBASE pValue = (PNODEBASE)treeFind(
pAttribDecl->ValuesTree,
(ULONG)pAttrib->pstrNodeValue,
CompareXStrings);
if (!pValue)
xmlSetError(pDom,
ERROR_DOM_INVALID_ATTRIB_VALUE,
pAttrib->NodeBase.strNodeName.psz,
TRUE);
}
}
if (pAttribDecl->ulConstraint == CMAT_FIXED_VALUE)
if (strcmp(pAttrib->pstrNodeValue->psz, pAttribDecl->pstrDefaultValue->psz))
// fixed value doesn't match:
xmlSetError(pDom,
ERROR_DOM_INVALID_ATTRIB_VALUE,
pAttrib->NodeBase.strNodeName.psz,
TRUE);
}
}
/*
*@@ ValidateAllAttributes:
* validates the constraints of all attributes of the specified
* element against the document's @DTD.
*
*@@added V0.9.9 (2001-02-16) [umoeller]
*/
static VOID ValidateAllAttributes(PXMLDOM pDom,
PCMATTRIBUTEDECLBASE pAttribDeclBase,
PDOMNODE pNewElement)
{
PCMATTRIBUTEDECL pDeclThis
= (PCMATTRIBUTEDECL)treeFirst(pAttribDeclBase->AttribDeclsTree);
while ((pDeclThis) && (!pDom->arcDOM))
{
// if attribute is all optional: then we don't need
// to check for whether it's here
if ( (pDeclThis->ulConstraint != CMAT_IMPLIED)
&& (pDeclThis->ulConstraint != CMAT_DEFAULT_VALUE)
// we don't have to check this case because expat
// already adds default attributes for us
)
{
// for all others , we need to find the attribute
PXSTRING pstrAttrNameThis = &pDeclThis->NodeBase.strNodeName;
PDOMNODE pAttrNode = (PDOMNODE)treeFind(
pNewElement->AttributesMap,
(ULONG)pstrAttrNameThis,
CompareXStrings);
// now switch again
switch (pDeclThis->ulConstraint)
{
case CMAT_REQUIRED:
if (!pAttrNode)
// required, but no attribute with this name exists:
xmlSetError(pDom,
ERROR_DOM_REQUIRED_ATTRIBUTE_MISSING,
pstrAttrNameThis->psz,
TRUE);
break;
}
}
pDeclThis = (PCMATTRIBUTEDECL)treeNext((TREE*)pDeclThis);
}
}
/* ******************************************************************
*
* Expat stack
*
********************************************************************/
/*
*@@ DOMSTACKITEM:
*
*@@added V0.9.9 (2001-02-16) [umoeller]
*/
typedef struct _DOMSTACKITEM
{
PDOMNODE pDomNode;
PCMELEMENTDECLNODE pElementDecl;
} DOMSTACKITEM, *PDOMSTACKITEM;
/*
*@@ PopElementStack:
*
* NOTE:
*
*@@added V0.9.9 (2001-02-16) [umoeller]
*/
static PDOMSTACKITEM PopElementStack(PXMLDOM pDom,
PLISTNODE *ppListNode)
{
PDOMSTACKITEM pStackItem = NULL;
PLISTNODE pParentLN = lstPop(&pDom->llElementStack);
if (!pParentLN)
pDom->arcDOM = ERROR_DOM_NO_ELEMENT;
else
{
// we have at least one node:
pStackItem = (PDOMSTACKITEM)pParentLN->pItemData;
if (ppListNode)
*ppListNode = pParentLN;
}
return (pStackItem);
}
/*
*@@ PushElementStack:
*
* NOTE: pDomNode will most frequently be an element
* node, but will also be the document for root and
* a DOCTYPE node while parsing the DTD.
*
*@@added V0.9.9 (2001-02-16) [umoeller]
*/
static VOID PushElementStack(PXMLDOM pDom,
PDOMNODE pDomNode)
{
PDOMSTACKITEM pNew = (PDOMSTACKITEM)malloc(sizeof(*pNew));
if (!pNew)
pDom->arcDOM = ERROR_NOT_ENOUGH_MEMORY;
else
{
memset(pNew, 0, sizeof(*pNew));
pNew->pDomNode = pDomNode;
// shall we validate?
if ( (pDom->pDocTypeNode)
&& (pDomNode->NodeBase.ulNodeType == DOMNODE_ELEMENT)
)
pNew->pElementDecl = xmlFindElementDecl(pDom,
&pDomNode->NodeBase.strNodeName);
lstPush(&pDom->llElementStack,
pNew);
}
}
/* ******************************************************************
*
* Expat handlers
*
********************************************************************/
/*
*@@ UnknownEncodingHandler:
* @expat handler called when the xml
* @text_declaration has an @encoding that is not
* one of the four encodings built into expat.
*
* See XML_SetUnknownEncodingHandler.
*
*@@added V0.9.14 (2001-08-09) [umoeller]
*/
static int EXPATENTRY UnknownEncodingHandler(void *pUserData, // in: out PXMLDOM really
const XML_Char *pcszName,
XML_Encoding *pEncoding)
{
PXMLDOM pDom = (PXMLDOM)pUserData;
ULONG ulCP;
if ( (pDom->pfnGetCPData) // callback exists?
&& (!strncmp(pcszName, "cp", 2))
&& (strlen(pcszName) > 4) // at least five chars (e.g. "cp850")
&& (ulCP = atoi(pcszName + 2))
)
{
// this is a PC codepage:
/* typedef struct _XML_Encoding
{
int map[256];
void *data;
int (* EXPATENTRY convert)(void *data, const char *s);
void (* EXPATENTRY release)(void *data);
} XML_Encoding; */
// ZERO(pEncoding);
pEncoding->convert = NULL;
pEncoding->release = NULL;
memset(&pEncoding->map, -1, sizeof(pEncoding->map));
if (pDom->pfnGetCPData(pDom,
ulCP,
pEncoding->map))
{
// go check if there's any -1 chars left
ULONG ul;
for (ul = 0;
ul < 256;
ul++)
{
if (pEncoding->map[ul] < 0)
xmlSetError(pDom,
ERROR_DOM_INCOMPLETE_ENCODING_MAP,
NULL,
FALSE);
}
// return success
return (1);
}
}
// error
return (0);
}
/*
*@@ StartElementHandler:
* @expat handler called when a new element is
* found.
*
* We create a new record in the container and
* push it onto our stack so we can insert
* children into it. We first start with the
* attributes.
*/
static void EXPATENTRY StartElementHandler(void *pUserData, // in: our PXMLDOM really
const char *pcszElement,
const char **papcszAttribs)
{
PXMLDOM pDom = (PXMLDOM)pUserData;
// continue parsing only if we had no errors so far
if (!pDom->arcDOM)
{
ULONG i;
PDOMSTACKITEM pSI = PopElementStack(pDom,
NULL); // no free
if (!pDom->arcDOM)
{
PDOMNODE pParent = pSI->pDomNode,
pNew = NULL;
if (!(pDom->arcDOM = xmlCreateElementNode(pParent,
pcszElement,
&pNew)))
// OK, node is valid:
// push this on the stack so we can add child elements
PushElementStack(pDom,
pNew);
// shall we validate?
if ( (!pDom->arcDOM)
&& (pDom->pDocTypeNode)
)
ValidateElement(pDom,
pNew, // new element
pSI->pElementDecl); // parent's elem decl
if (!pDom->arcDOM)
{
PCMATTRIBUTEDECLBASE pAttribDeclBase = NULL;
// shall we validate?
if (pDom->pDocTypeNode)
// yes: get attrib decl base for speed
pAttribDeclBase
= xmlFindAttribDeclBase(pDom,
&pNew->NodeBase.strNodeName);
// now for the attribs
for (i = 0;
(papcszAttribs[i]) && (!pDom->arcDOM);
i += 2)
{
PDOMNODE pAttrib;
if (!(pDom->arcDOM = xmlCreateAttributeNode(pNew, // element,
papcszAttribs[i], // attr name
papcszAttribs[i + 1], // attr value
&pAttrib)))
{
// shall we validate?
if (pDom->pDocTypeNode)
ValidateAttributeType(pDom,
pAttrib,
&pAttribDeclBase);
}
else
{
xmlSetError(pDom,
pDom->arcDOM,
papcszAttribs[i],
FALSE); // validation
break;
}
}
// OK, now we got all attributes:
// now look for defaults in the DTD,
// if we shall validate
if ( (pDom->pDocTypeNode)
&& (!pDom->arcDOM)
&& (pAttribDeclBase)
)
ValidateAllAttributes(pDom,
pAttribDeclBase,
pNew);
}
}
pDom->pLastWasTextNode = NULL;
}
}
/*
*@@ EndElementHandler:
* @expat handler for when parsing an element is done.
* We pop the element off of our stack then.
*/
static void EXPATENTRY EndElementHandler(void *pUserData, // in: our PXMLDOM really
const XML_Char *name)
{
PXMLDOM pDom = (PXMLDOM)pUserData;
// continue parsing only if we had no errors so far
if (!pDom->arcDOM)
{
PLISTNODE pStackLN = NULL;
PDOMSTACKITEM pSI = PopElementStack(pDom,
&pStackLN);
if (!pDom->arcDOM)
{
// shall we validate?
/* if (pDom->pDocTypeNode)
// yes:
ValidateElementChildren(pDom,
pSI->pDomNode); */
lstRemoveNode(&pDom->llElementStack, pStackLN); // auto-free
}
else
pDom->arcDOM = ERROR_DOM_INTEGRITY;
pDom->pLastWasTextNode = NULL;
}
}
/*
*@@ CharacterDataHandler:
* @expat handler for character data (@content).
*
* Note: expat passes chunks of content without zero-terminating
* them. We must concatenate the chunks to a full text node.
*/
static void EXPATENTRY CharacterDataHandler(void *pUserData, // in: our PXMLDOM really
const XML_Char *s,
int len)
{
PXMLDOM pDom = (PXMLDOM)pUserData;
// continue parsing only if we had no errors so far
if ( (!pDom->arcDOM)
&& (len)
)
{
// we need a new text node:
PDOMSTACKITEM pSI = PopElementStack(pDom,
NULL); // no free
if (!pDom->arcDOM)
{
PDOMNODE pParent = pSI->pDomNode;
// pNew = NULL;
BOOL fIsWhitespace = FALSE;
// shall we validate?
if (pDom->pDocTypeNode)
{
// yes: check if the parent element allows
// for content at all (must be "mixed" model)
// get the element decl from the tree
PCMELEMENTDECLNODE pElementDecl;
if (pElementDecl = pSI->pElementDecl)
{
switch (pElementDecl->Particle.NodeBase.ulNodeType)
{
case ELEMENTPARTICLE_ANY:
case ELEMENTPARTICLE_MIXED:
// those two are okay
break;
case ELEMENTPARTICLE_EMPTY:
// that's an error for sure
pDom->arcDOM = ERROR_ELEMENT_CANNOT_HAVE_CONTENT;
break;
default:
{
// ELEMENTPARTICLE_CHOICE:
// ELEMENTPARTICLE_SEQ:
// with these two, we accept whitespace, but nothing
// else... so if we have characters other than
// whitespace, terminate
ULONG ul;
const char *p = s;
if (pDom->flParserFlags & DF_DROP_WHITESPACE)
fIsWhitespace = TRUE;
for (ul = 0;
ul < len;
ul++, p++)
if (!strchr("\r\n\t ", *p))
{
// other character:
xmlSetError(pDom,
ERROR_ELEMENT_CANNOT_HAVE_CONTENT,
pParent->NodeBase.strNodeName.psz,
TRUE);
fIsWhitespace = FALSE;
break;
}
}
}
}
} // end if (pDom->pDocTypeNode)
if (!fIsWhitespace)
// this is false if any of the following
// is true:
// -- we are not validating at all
// -- we are validating, but the the element
// can have mixed content
// -- we are validating and the element does
// _not_ have mixed content and DF_DROP_WHITESPACE
// is set, but the string is whitespace only
// --> drop it then
if (pDom->pLastWasTextNode)
// we had a text node, and no elements or other
// stuff in between:
xstrcat(pDom->pLastWasTextNode->pstrNodeValue,
s,
len);
else
pDom->arcDOM = xmlCreateTextNode(pParent,
s,
len,
&pDom->pLastWasTextNode);
}
}
}
/*
*@@ CommentHandler:
* @expat handler for @comments.
*
* Note: This is only set if DF_PARSECOMMENTS is
* flagged with xmlCreateDOM.
*
*@@added V0.9.9 (2001-02-14) [umoeller]
*/
static void EXPATENTRY CommentHandler(void *pUserData, // in: our PXMLDOM really
const XML_Char *data)
{
PXMLDOM pDom = (PXMLDOM)pUserData;
// continue parsing only if we had no errors so far
if (!pDom->arcDOM)
{
// we need a new text node:
PDOMSTACKITEM pSI = PopElementStack(pDom,
NULL); // no free
if (!pDom->arcDOM)
{
PDOMNODE pParent = pSI->pDomNode,
pNew = NULL;
pDom->arcDOM = xmlCreateCommentNode(pParent,
data,
&pNew);
}
}
}
/*
*@@ StartDoctypeDeclHandler:
* @expat handler that is called at the start of a DOCTYPE
* declaration, before any external or internal subset is
* parsed.
*
* Both pcszSysid and pcszPubid may be NULL. "fHasInternalSubset"
* will be non-zero if the DOCTYPE declaration has an internal subset.
*
*@@added V0.9.9 (2001-02-14) [umoeller]
*/
static void EXPATENTRY StartDoctypeDeclHandler(void *pUserData,
const XML_Char *pcszDoctypeName,
const XML_Char *pcszSysid,
const XML_Char *pcszPubid,
int fHasInternalSubset)
{
PXMLDOM pDom = (PXMLDOM)pUserData;
// continue parsing only if we had no errors so far
if (!pDom->arcDOM)
{
// get the document node
PDOMDOCUMENTNODE pDocumentNode = pDom->pDocumentNode;
if (!pDocumentNode)
pDom->arcDOM = ERROR_DOM_NO_DOCUMENT;
else
{
// doctype must be null
if (pDom->pDocTypeNode)
pDom->arcDOM = ERROR_DOM_DUPLICATE_DOCTYPE;
else
pDom->arcDOM = xmlCreateDocumentTypeNode(pDocumentNode,
pcszDoctypeName,
pcszSysid,
pcszPubid,
fHasInternalSubset,
&pDom->pDocTypeNode);
}
}
}
/*
*@@ EndDoctypeDeclHandler:
* @expat handler that is called at the end of a DOCTYPE
* declaration, after parsing any external subset.
*
*@@added V0.9.9 (2001-02-14) [umoeller]
*/
static void EXPATENTRY EndDoctypeDeclHandler(void *pUserData) // in: our PXMLDOM really
{
PXMLDOM pDom = (PXMLDOM)pUserData;
// continue parsing only if we had no errors so far
if (!pDom->arcDOM)
{
}
}
/*
*@@ NotationDeclHandler:
* @expat handler for @notation_declarations.
*
* @@todo
*
*@@added V0.9.9 (2001-02-14) [umoeller]
*/
static void EXPATENTRY NotationDeclHandler(void *pUserData, // in: our PXMLDOM really
const XML_Char *pcszNotationName,
const XML_Char *pcszBase,
const XML_Char *pcszSystemId,
const XML_Char *pcszPublicId)
{
PXMLDOM pDom = (PXMLDOM)pUserData;
// continue parsing only if we had no errors so far
if (!pDom->arcDOM)
{
}
}
/*
*@@ ExternalEntityRefHandler:
* @expat handler for references to @external_entities.
*
* This handler is also called for processing an external DTD
* subset if parameter entity parsing is in effect.
* (See XML_SetParamEntityParsing.)
*
* The pcszContext argument specifies the parsing context in the
* format expected by the context argument to
* XML_ExternalEntityParserCreate; pcszContext is valid only until
* the handler returns, so if the referenced entity is to be
* parsed later, it must be copied.
*
* The pcszBase parameter is the base to use for relative system
* identifiers. It is set by XML_SetBase and may be null.
*
* The pcszPublicId parameter is the public id given in the entity
* declaration and may be null (since XML doesn't require public
* identifiers).
*
* The pcszSystemId is the system identifier specified in the
* entity declaration and is never null. This is an exact copy
* of what was specified in the reference.
*
* There are a couple of ways in which this handler differs
* from others. First, this handler returns an integer. A
* non-zero value should be returned for successful handling
* of the external entity reference. Returning a zero indicates
* failure, and causes the calling parser to return an
* ERROR_EXPAT_EXTERNAL_ENTITY_HANDLING error.
*
* Second, instead of having pUserData as its first argument,
* it receives the parser that encountered the entity reference.
* This, along with the context parameter, may be used as
* arguments to a call to XML_ExternalEntityParserCreate.
* Using the returned parser, the body of the external entity
* can be recursively parsed.
*
* Since this handler may be called recursively, it should not
* be saving information into global or static variables.
*
* Your handler isn't actually responsible for parsing the entity,
* but it is responsible for creating a subsidiary parser with
* XML_ExternalEntityParserCreate that will do the job. That returns
* an instance of XML_Parser that has handlers and other data
* structures initialized from the parent parser. You may then use
* XML_Parse or XML_ParseBuffer calls against that parser. Since
* external entities may refer to other external entities, your
* handler should be prepared to be called recursively.
*
*@@added V0.9.14 (2001-08-09) [umoeller]
*@@changed V0.9.20 (2002-07-06) [umoeller]: added automatic doctype support
*/
static int EXPATENTRY ExternalEntityRefHandler(void *pUserData, // in: our PXMLDOM really
XML_Parser parser,
const XML_Char *pcszContext,
const XML_Char *pcszBase,
const XML_Char *pcszSystemId,
const XML_Char *pcszPublicId)
{
PXMLDOM pDom = (PXMLDOM)pUserData;
int i = 0; // return error per default
// store the previous parser because
// all the callbacks use the parser pointer
XML_Parser pOldParser = pDom->pParser;
pDom->pParser = NULL;
if ( ( (pDom->pfnExternalHandler)
|| (pDom->cSystemIds) // V0.9.20 (2002-07-06) [umoeller]
)
// create sub-parser and replace the one
// in the DOM with it
&& (pDom->pParser = XML_ExternalEntityParserCreate(parser,
pcszContext,
"US-ASCII"))
)
{
// run through the predefined doctypes given to us
// in xmlCreateDOM, if any
// V0.9.20 (2002-07-06) [umoeller]
BOOL fCallExternal = TRUE;
ULONG ul;
for (ul = 0;
ul < pDom->cSystemIds;
++ul)
{
const STATICSYSTEMID *pThis = &pDom->paSystemIds[ul];
if (!strcmp(pThis->pcszSystemId, pcszSystemId))
{
// this one matches:
// then parse the corresponding entry given
// to us
if (XML_Parse(pDom->pParser,
pThis->pcszContent,
strlen(pThis->pcszContent),
TRUE))
i = 1; // success
fCallExternal = FALSE;
break;
}
}
if ( (fCallExternal) // not handled above
&& (pDom->pfnExternalHandler) // user handler set
)
{
APIRET arc;
if (!(arc = pDom->pfnExternalHandler(pDom,
pDom->pParser,
pcszSystemId,
pcszPublicId)))
i = 1; // success
else
{
// error:
// now this needs special handling, since we're
// dealing with a sub-handler here...
if (arc == -1)
// parser error: well, then xmlSetError has been
// called from somewhere in the callbacks already,
// and we can safely ignore this
;
else
{
pDom->arcDOM = arc;
if (pcszSystemId)
{
if (!pDom->pxstrFailingNode)
pDom->pxstrFailingNode = xstrCreate(0);
xstrcpy(pDom->pxstrFailingNode, pcszSystemId, 0);
}
pDom->pcszErrorDescription = xmlDescribeError(arc);
pDom->ulErrorLine = XML_GetCurrentLineNumber(pDom->pParser);
pDom->ulErrorColumn = XML_GetCurrentColumnNumber(pDom->pParser);
}
}
}
}
else
xmlSetError(pDom,
ERROR_DOM_INVALID_EXTERNAL_HANDLER,
NULL,
FALSE);
if (pDom->pParser)
XML_ParserFree(pDom->pParser);
pDom->pParser = pOldParser;
return (i);
}
/*
*@@ ElementDeclHandler:
* @expat handler for element declarations in a DTD. The
* handler gets called with the name of the element in
* the declaration and a pointer to a structure that contains
* the element model.
*
* It is the application's responsibility to free this data
* structure. @@todo
*
* The XML spec defines that no element may be declared more
* than once.
*
*@@added V0.9.9 (2001-02-14) [umoeller]
*/
static void EXPATENTRY ElementDeclHandler(void *pUserData, // in: our PXMLDOM really
const XML_Char *pcszName,
XMLCONTENT *pModel)
{
PXMLDOM pDom = (PXMLDOM)pUserData;
// continue parsing only if we had no errors so far
if (!pDom->arcDOM)
{
// OK, we're in a DOCTYPE node:
PDOMDOCTYPENODE pDocType = pDom->pDocTypeNode;
if (!pDocType)
xmlSetError(pDom,
ERROR_DOM_ELEMENT_DECL_OUTSIDE_DOCTYPE,
pcszName,
TRUE);
else
{
// create an element declaration and push it unto the
// declarations tree
PCMELEMENTDECLNODE pNew = NULL;
if (!(pDom->arcDOM = xmlCreateElementDecl(pcszName,
pModel,
&pNew)))
// this recurses!!
// after this, pModel is invalid
{
// add this to the doctype's declarations tree
if (treeInsert(&pDocType->ElementDeclsTree,
NULL,
(TREE*)pNew,
CompareXStrings))
// element already declared:
// according to the XML specs, this is a validity
// constraint, so we report a validation error
xmlSetError(pDom,
ERROR_DOM_DUPLICATE_ELEMENT_DECL,
pNew->Particle.NodeBase.strNodeName.psz,
TRUE);
}
}
}
}
/*
*@@ AddEnum:
*
*@@added V0.9.9 (2001-02-16) [umoeller]
*/
static APIRET AddEnum(PCMATTRIBUTEDECL pDecl,
const char *p, // in: start of name
const char *pNext) // in: end of name (not included)
{
// PSZ pszType = strhSubstr(p, pNext);
PNODEBASE pNew = NULL;
APIRET arc;
if (!(arc = xmlCreateNodeBase(ATTRIBUTE_DECLARATION_ENUM,
sizeof(NODEBASE),
p,
(pNext - p),
&pNew)))
treeInsert(&pDecl->ValuesTree,
NULL,
(TREE*)pNew,
CompareXStrings);
return arc;
}
/*
*@@ AttlistDeclHandler:
* @expat handler for attlist declarations in the DTD.
*
* This handler is called for each attribute. So a single attlist
* declaration with multiple attributes declared will generate
* multiple calls to this handler.
*
* -- pcszElementName is the name of the element for which the
* attribute is being declared.
*
* -- pcszAttribName has the attribute name being declared.
*
* -- pcszAttribType is the attribute type.
* It is the string representing the type in the declaration
* with whitespace removed.
*
* -- pcszDefault holds the default value. It will be
* NULL in the case of "#IMPLIED" or "#REQUIRED" attributes.
* You can distinguish these two cases by checking the
* fIsRequired parameter, which will be true in the case of
* "#REQUIRED" attributes. Attributes which are "#FIXED"
* will have also have a TRUE fIsRequired, but they will have
* the non-NULL fixed value in the pcszDefault parameter.
*
*@@added V0.9.9 (2001-02-14) [umoeller]
*/
static void EXPATENTRY AttlistDeclHandler(void *pUserData, // in: our PXMLDOM really
const XML_Char *pcszElementName,
const XML_Char *pcszAttribName,
const XML_Char *pcszAttribType,
const XML_Char *pcszDefault,
int fIsRequired)
{
PXMLDOM pDom = (PXMLDOM)pUserData;
// continue parsing only if we had no errors so far
if (!pDom->arcDOM)
{
// OK, we're in a DOCTYPE node:
PDOMDOCTYPENODE pDocType = pDom->pDocTypeNode;
if (!pDocType)
xmlSetError(pDom,
ERROR_DOM_ATTLIST_DECL_OUTSIDE_DOCTYPE,
pcszElementName,
TRUE);
else
{
PCMATTRIBUTEDECLBASE pThis = NULL,
pCache = pDom->pAttListDeclCache;
// check if this is for the same attlist as the previous
// call (we cache the pointer for speed)
if ( (pCache)
&& (!strhcmp(pCache->NodeBase.strNodeName.psz,
pcszElementName))
)
// this attdecl is for the same element:
// use that (we won't have to search the tree)
pThis = pDom->pAttListDeclCache;
if (!pThis)
{
// cache didn't match: look up attributes tree then...
// note: cheap trick, we need an XSTRING for treeFind
// but don't want malloc, so we use xstrInitSet
XSTRING strElementName;
xstrInitSet(&strElementName, (PSZ)pcszElementName);
if (!(pThis = (PCMATTRIBUTEDECLBASE)treeFind(
pDocType->AttribDeclBasesTree,
(ULONG)&strElementName,
CompareXStrings)))
{
// still not found:
// we need a new node then
if (!(pDom->arcDOM = xmlCreateNodeBase(ATTRIBUTE_DECLARATION_BASE,
sizeof(CMATTRIBUTEDECLBASE),
strElementName.psz,
strElementName.ulLength,
(PNODEBASE*)&pThis)))
{
// initialize the subtree
treeInit(&pThis->AttribDeclsTree, NULL);
treeInsert(&pDocType->AttribDeclBasesTree,
NULL,
(TREE*)pThis,
CompareXStrings);
}
}
pDom->pAttListDeclCache = pThis;
}
if (pThis)
{
// pThis now has either an existing or a new CMATTRIBUTEDECLBASE;
// add a new attribute def (CMATTRIBUTEDEDECL) to that
PCMATTRIBUTEDECL pNew = NULL;
if (!(pDom->arcDOM = xmlCreateNodeBase(ATTRIBUTE_DECLARATION,
sizeof(CMATTRIBUTEDECL),
pcszAttribName,
0,
(PNODEBASE*)&pNew)))
{
treeInit(&pNew->ValuesTree, NULL);
// check the type... expat is too lazy to parse this for
// us, so we must check manually. Expat only normalizes
// the "type" string to kick out whitespace, so we get:
// (TYPE1|TYPE2|TYPE3)
if (*pcszAttribType == '(')
{
// enumeration:
const char *p = pcszAttribType + 1,
*pNext;
while ( (pNext = strchr(p, '|'))
&& (!pDom->arcDOM)
)
{
pDom->arcDOM = AddEnum(pNew, p, pNext);
p = pNext + 1;
}
if (!pDom->arcDOM)
{
pNext = strchr(p, ')');
AddEnum(pNew, p, pNext);
pNew->ulAttrType = CMAT_ENUM;
}
}
else if (!strcmp(pcszAttribType, "CDATA"))
pNew->ulAttrType = CMAT_CDATA;
else if (!strcmp(pcszAttribType, "ID"))
pNew->ulAttrType = CMAT_ID;
else if (!strcmp(pcszAttribType, "IDREF"))
pNew->ulAttrType = CMAT_IDREF;
else if (!strcmp(pcszAttribType, "IDREFS"))
pNew->ulAttrType = CMAT_IDREFS;
else if (!strcmp(pcszAttribType, "ENTITY"))
pNew->ulAttrType = CMAT_ENTITY;
else if (!strcmp(pcszAttribType, "ENTITIES"))
pNew->ulAttrType = CMAT_ENTITIES;
else if (!strcmp(pcszAttribType, "NMTOKEN"))
pNew->ulAttrType = CMAT_NMTOKEN;
else if (!strcmp(pcszAttribType, "NMTOKENS"))
pNew->ulAttrType = CMAT_NMTOKENS;
if (!pDom->arcDOM)
{
if (pcszDefault)
{
// fixed or default:
if (fIsRequired)
// fixed:
pNew->ulConstraint = CMAT_FIXED_VALUE;
else
pNew->ulConstraint = CMAT_DEFAULT_VALUE;
pNew->pstrDefaultValue = xstrCreate(0);
xstrcpy(pNew->pstrDefaultValue, pcszDefault, 0);
}
else
// implied or required:
if (fIsRequired)
pNew->ulConstraint = CMAT_REQUIRED;
else
pNew->ulConstraint = CMAT_IMPLIED;
if (treeInsert(&pThis->AttribDeclsTree,
NULL,
(TREE*)pNew,
CompareXStrings))
xmlSetError(pDom,
ERROR_DOM_DUPLICATE_ATTRIBUTE_DECL,
pcszAttribName,
TRUE);
}
}
}
}
}
}
/*
*@@ EntityDeclHandler:
* @expat handler that will be called for all entity declarations.
*
* The fIsParameterEntity argument will be non-zero in the case
* of parameter entities and zero otherwise.
*
* For internal entities (<!ENTITY foo "bar">), pcszValue will be
* non-NULL and pcszSystemId, pcszPublicId, and pcszNotationName
* will all be NULL. The value string is not NULL terminated; the
* length is provided in the iValueLength parameter. Do not use
* iValueLength to test for internal entities, since it is legal
* to have zero-length values. Instead check for whether or not
* pcszValue is NULL.
*
* The pcszNotationName argument will have a non-NULL value only
* for unparsed entity declarations.
*
*@@added V0.9.9 (2001-02-14) [umoeller]
*/
static void EXPATENTRY EntityDeclHandler(void *pUserData, // in: our PXMLDOM really
const XML_Char *pcszEntityName,
int fIsParameterEntity,
const XML_Char *pcszValue,
int iValueLength,
const XML_Char *pcszBase,
const XML_Char *pcszSystemId,
const XML_Char *pcszPublicId,
const XML_Char *pcszNotationName)
{
PXMLDOM pDom = (PXMLDOM)pUserData;
// continue parsing only if we had no errors so far
if (!pDom->arcDOM)
{
}
}
/* ******************************************************************
*
* DOM parser APIs
*
********************************************************************/
/*
*@@ xmlCreateDOM:
* creates an XMLDOM instance, which can be used for parsing
* an XML document and building a @DOM tree from it at the
* same time.
*
* Pass the XMLDOM returned here to xmlParse afterwards.
*
* Simplest possible usage:
*
* 1) Create a DOM instance.
*
+ PXMLDOM pDom = NULL;
+ APIRET arc = xmlCreateDOM(flags, NULL, NULL, NULL, &pDom);
+
* 2) Give chunks of data (or an entire buffer)
* to the DOM instance for parsing.
*
+ arc = xmlParse(pDom,
+ pBuf,
+ TRUE); // if last, this will clean up the parser
*
* 3) Process the data in the DOM tree.
*
* Look at the DOMNODE definition to see how you
* can traverse the data. Essentially, everything
* is based on linked lists and string maps.
*
* A few helper functions have been added for
* quick lookup. See xmlGetRootElement,
* xmlGetFirstChild, xmlGetLastChild, xmlGetFirstText,
* xmlGetElementsByTagName, xmlGetAttribute.
*
* 4) When done, call xmlFreeDOM, which will free all memory.
*
* The above code has limitations: only a few character
* @encodings are supported, and @external_entities are
* silently ignored.
*
* This function supports a number of callbacks and flags
* to allow for maximum flexibility. Note however that
* not all @expat features are supported yet.
*
* flParserFlags is any combination of the following:
*
* -- DF_PARSECOMMENTS: XML @comments are to be returned in
* the DOM tree. Otherwise they are discarded.
*
* -- DF_PARSEDTD: add the @DTD of the document into the DOM tree
* as well and validate the document, if a DTD was found.
* Otherwise just parse and do not validate.
*
* DF_PARSEDTD is required for external entities to work
* also.
*
* -- DF_FAIL_IF_NO_DTD: fail if no @DTD was found. Useful
* if you want to enforce validation. @@todo
*
* -- DF_DROP_WHITESPACE: discard all @whitespace for those
* elements that can only have element content. Whitespace
* will be preserved only for elements that can have
* mixed content. -- If this flag is not set, all whitespace
* is preserved.
*
* The following callbacks can be specified (any of these
* can be NULL):
*
* -- pfnGetCPData should be specified if you want to
* support character @encodings other than the
* four that built into expat itself (see
* XML_SetUnknownEncodingHandler). This is probably
* a good idea to do under OS/2 since most OS/2
* documents are in a PC-specific codepage such as
* CP 850.
*
* This callback must have the following prototype:
*
+ int APIENTRY FNGETCPDATA(PXMLDOM pDom, ULONG ulCP, int *piMap)
*
* The callback will only be called once for each
* document if the "encoding" attribute of the
* XML @text_declaration starts with "cp" (e.g.
* "cp850") and will then receive the following
* parameters:
*
* -- "pDom" will be the XMLDOM created by this function.
*
* -- ulCP has the IBM code page number, such as "850".
*
* -- piMap is an array of 256 integer values which must
* be filled with the callback. Each array item index
* is the codepage value, and the value of each field
* is the corresponding Unicode value, or -1 if the
* character is invalid (shouldn't happen with codepages).
*
* For example, the German o-umlaut character is
* 0x94 in CP850 and 0x00f6 in Unicode. So set
* the int at index 0x94 to 0x00f6.
*
* -- pvCallbackUser is a user parameter which is simply stored
* in the XMLDOM struct which is returned. Since the XMLDOM
* is passed to all the callbacks, you can access that pointer
* from them.
*
*@@added V0.9.9 (2001-02-14) [umoeller]
*@@changed V0.9.14 (2001-08-09) [umoeller]: added DF_DROP_WHITESPACE support
*/
APIRET xmlCreateDOM(ULONG flParserFlags, // in: DF_* parser flags
const STATICSYSTEMID *paSystemIds, // in: array of STATICSYSTEMID's or NULL
ULONG cSystemIds, // in: array item count
PFNGETCPDATA pfnGetCPData, // in: codepage callback or NULL
PFNEXTERNALHANDLER pfnExternalHandler, // in: external entity callback or NULL
PVOID pvCallbackUser, // in: user param for callbacks
PXMLDOM *ppDom) // out: XMLDOM struct created
{
APIRET arc = NO_ERROR;
PXMLDOM pDom = (PXMLDOM)malloc(sizeof(*pDom));
if (!pDom)
arc = ERROR_NOT_ENOUGH_MEMORY;
else
{
PDOMNODE pDocument = NULL;
memset(pDom, 0, sizeof(XMLDOM));
pDom->flParserFlags = flParserFlags;
pDom->pfnGetCPData = pfnGetCPData;
pDom->pfnExternalHandler = pfnExternalHandler;
pDom->pvCallbackUser = pvCallbackUser;
// these added with V0.9.20 (2002-07-06) [umoeller]
pDom->paSystemIds = paSystemIds;
pDom->cSystemIds = cSystemIds;
lstInit(&pDom->llElementStack,
TRUE); // auto-free
// create the document node
if (!(arc = xmlCreateDomNode(NULL, // no parent
DOMNODE_DOCUMENT,
NULL,
0,
&pDocument)))
{
// store the document in the DOM
pDom->pDocumentNode = (PDOMDOCUMENTNODE)pDocument;
// push the document on the stack so the handlers
// will append to that
PushElementStack(pDom,
pDocument);
pDom->pParser = XML_ParserCreate(NULL);
if (!pDom->pParser)
arc = ERROR_NOT_ENOUGH_MEMORY;
else
{
if (pfnGetCPData)
XML_SetUnknownEncodingHandler(pDom->pParser,
UnknownEncodingHandler,
pDom); // user data
XML_SetParamEntityParsing(pDom->pParser,
XML_PARAM_ENTITY_PARSING_ALWAYS);
XML_SetElementHandler(pDom->pParser,
StartElementHandler,
EndElementHandler);
XML_SetCharacterDataHandler(pDom->pParser,
CharacterDataHandler);
// XML_SetProcessingInstructionHandler(XML_Parser parser,
// XML_ProcessingInstructionHandler handler);
if (flParserFlags & DF_PARSECOMMENTS)
XML_SetCommentHandler(pDom->pParser,
CommentHandler);
if ( (pfnExternalHandler)
|| (cSystemIds) // V0.9.20 (2002-07-06) [umoeller]
)
XML_SetExternalEntityRefHandler(pDom->pParser,
ExternalEntityRefHandler);
if (flParserFlags & DF_PARSEDTD)
{
XML_SetDoctypeDeclHandler(pDom->pParser,
StartDoctypeDeclHandler,
EndDoctypeDeclHandler);
XML_SetNotationDeclHandler(pDom->pParser,
NotationDeclHandler);
XML_SetElementDeclHandler(pDom->pParser,
ElementDeclHandler);
XML_SetAttlistDeclHandler(pDom->pParser,
AttlistDeclHandler);
XML_SetEntityDeclHandler(pDom->pParser,
EntityDeclHandler);
XML_SetParamEntityParsing(pDom->pParser,
XML_PARAM_ENTITY_PARSING_ALWAYS);
}
// XML_SetXmlDeclHandler ... do we care for this? I guess not
// pass the XMLDOM as user data to the handlers
XML_SetUserData(pDom->pParser,
pDom);
}
}
}
if (arc == NO_ERROR)
*ppDom = pDom;
else
xmlFreeDOM(pDom);
return arc;
}
/*
*@@ xmlParse:
* parses another chunk of XML data.
*
* If (fIsLast == TRUE), the internal @expat parser
* will be freed, but not the DOM itself.
*
* You can pass an XML document to this function
* in one flush. Set fIsLast = TRUE on the first
* and only call then.
*
* This returns NO_ERROR if the chunk was successfully
* parsed. Otherwise one of the following errors is
* returned:
*
* -- ERROR_INVALID_PARAMETER
*
* -- ERROR_DOM_PARSING: an @expat parsing error occured.
* This might also be memory problems.
* With this error code, you will find specific
* error information in the XMLDOM fields.
*
* -- ERROR_DOM_VALIDITY: the document is not @valid.
* This can only happen if @DTD parsing was enabled
* with xmlCreateDOM.
* With this error code, you will find specific
* error information in the XMLDOM fields.
*
*@@added V0.9.9 (2001-02-14) [umoeller]
*/
APIRET xmlParse(PXMLDOM pDom, // in: DOM created by xmlCreateDOM
const char *pcszBuf, // in: chunk of XML document data (or full document)
ULONG cb, // in: size of that chunk (required)
BOOL fIsLast) // in: set to TRUE if this is the last chunk
{
APIRET arc = NO_ERROR;
if (!pDom)
arc = ERROR_INVALID_PARAMETER;
else
{
// go parse then
if (!XML_Parse(pDom->pParser,
pcszBuf,
cb,
fIsLast))
{
// expat parsing error:
xmlSetError(pDom,
XML_GetErrorCode(pDom->pParser),
NULL,
FALSE);
if (pDom->pDocumentNode)
{
xmlDeleteNode((PNODEBASE)pDom->pDocumentNode);
pDom->pDocumentNode = NULL;
}
arc = ERROR_DOM_PARSING;
}
else if (pDom->fInvalid)
{
// expat was doing OK, but the handlers' validation failed:
arc = ERROR_DOM_VALIDITY;
// error info has already been set
}
else
// expat was doing OK, but maybe we have integrity errors
// from our DOM callbacks:
if (pDom->arcDOM)
arc = pDom->arcDOM;
if (arc != NO_ERROR || fIsLast)
{
// last call or error: clean up
XML_ParserFree(pDom->pParser);
pDom->pParser = NULL;
// clean up the stack (but not the DOM itself)
lstClear(&pDom->llElementStack);
}
}
return arc;
}
#ifdef __DEBUG__
/*
*@@ Dump:
*
*@@added V0.9.20 (2002-07-03) [umoeller]
*/
static VOID Dump(int iIndent,
PDOMNODE pDomNode)
{
PLISTNODE pChildNode;
int i;
for (i = 0;
i < iIndent;
++i)
{
printf(" ");
}
switch (pDomNode->NodeBase.ulNodeType)
{
#define DUMPTYPE(t) case t: printf(#t); break;
DUMPTYPE(DOMNODE_ELEMENT)
DUMPTYPE(DOMNODE_ATTRIBUTE)
DUMPTYPE(DOMNODE_TEXT)
DUMPTYPE(DOMNODE_PROCESSING_INSTRUCTION)
DUMPTYPE(DOMNODE_COMMENT)
DUMPTYPE(DOMNODE_DOCUMENT)
DUMPTYPE(DOMNODE_DOCUMENT_TYPE)
DUMPTYPE(ELEMENTPARTICLE_EMPTY)
DUMPTYPE(ELEMENTPARTICLE_ANY)
DUMPTYPE(ELEMENTPARTICLE_MIXED)
DUMPTYPE(ELEMENTPARTICLE_CHOICE)
DUMPTYPE(ELEMENTPARTICLE_SEQ)
DUMPTYPE(ELEMENTPARTICLE_NAME)
DUMPTYPE(ATTRIBUTE_DECLARATION_BASE)
DUMPTYPE(ATTRIBUTE_DECLARATION)
DUMPTYPE(ATTRIBUTE_DECLARATION_ENUM)
}
printf(" \"%s\"\n", STRINGORNULL(pDomNode->NodeBase.strNodeName.psz));
++iIndent;
for (pChildNode = lstQueryFirstNode(&pDomNode->llChildren);
pChildNode;
pChildNode = pChildNode->pNext)
{
Dump(iIndent, (PDOMNODE)pChildNode->pItemData);
}
--iIndent;
}
#endif
/*
*@@ xmlDump:
* debug function which dumps the DOM to stdout.
*
*@@added V0.9.20 (2002-07-03) [umoeller]
*/
VOID xmlDump(PXMLDOM pDom)
{
#ifdef __DEBUG__
if (!pDom)
{
printf(__FUNCTION__ ": pDom is NULL\n");
return;
}
printf(__FUNCTION__ ": dumping document node ");
Dump(0, (PDOMNODE)pDom->pDocumentNode);
#endif
}
/*
*@@ xmlFreeDOM:
* cleans up all resources allocated by
* xmlCreateDOM and xmlParse, including
* the entire DOM tree.
*
* If you wish to keep any data, make
* a copy of the respective pointers in pDom
* or subitems and set them to NULL before
* calling this function.
*
*@@added V0.9.9 (2001-02-14) [umoeller]
*/
APIRET xmlFreeDOM(PXMLDOM pDom)
{
APIRET arc = NO_ERROR;
if (pDom)
{
// if the parser is still alive for some reason, close it.
if (pDom->pParser)
{
XML_ParserFree(pDom->pParser);
pDom->pParser = NULL;
}
xmlDeleteNode((PNODEBASE)pDom->pDocumentNode);
if (pDom->pxstrSystemID)
xstrFree(&pDom->pxstrSystemID);
if (pDom->pxstrFailingNode)
xstrFree(&pDom->pxstrFailingNode);
lstClear(&pDom->llElementStack);
free(pDom);
}
return arc;
}
/* ******************************************************************
*
* DOM lookup
*
********************************************************************/
/*
*@@ xmlFindElementDecl:
* returns the CMELEMENTDECLNODE for the element
* with the specified name or NULL if there's none.
*
*@@added V0.9.9 (2001-02-16) [umoeller]
*/
PCMELEMENTDECLNODE xmlFindElementDecl(PXMLDOM pDom,
const XSTRING *pcstrElementName)
{
PCMELEMENTDECLNODE pElementDecl = NULL;
PDOMDOCTYPENODE pDocTypeNode = pDom->pDocTypeNode;
if ( (pDocTypeNode)
&& (pcstrElementName)
&& (pcstrElementName->ulLength)
)
{
pElementDecl = (PCMELEMENTDECLNODE)treeFind(
pDocTypeNode->ElementDeclsTree,
(ULONG)pcstrElementName,
CompareXStrings);
}
return (pElementDecl);
}
/*
*@@ xmlFindAttribDeclBase:
* returns the CMATTRIBUTEDECLBASE for the specified
* element name, or NULL if none exists.
*
* To find a specific attribute declaration from both
* an element and an attribute name, use xmlFindAttribDecl
* instead.
*
*@@added V0.9.9 (2001-02-16) [umoeller]
*/
PCMATTRIBUTEDECLBASE xmlFindAttribDeclBase(PXMLDOM pDom,
const XSTRING *pstrElementName)
{
PDOMDOCTYPENODE pDocTypeNode = pDom->pDocTypeNode;
if ( (pDocTypeNode)
&& (pstrElementName)
&& (pstrElementName->ulLength)
)
{
return ((PCMATTRIBUTEDECLBASE)treeFind(
pDocTypeNode->AttribDeclBasesTree,
(ULONG)pstrElementName,
CompareXStrings));
}
return NULL;
}
/*
*@@ xmlFindAttribDecl:
* returns the CMATTRIBUTEDEDECL for the specified
* element and attribute name, or NULL if none exists.
*
*@@added V0.9.9 (2001-02-16) [umoeller]
*/
PCMATTRIBUTEDECL xmlFindAttribDecl(PXMLDOM pDom,
const XSTRING *pstrElementName,
const XSTRING *pstrAttribName,
PCMATTRIBUTEDECLBASE *ppAttribDeclBase)
// in/out: attr decl base cache;
// the pointer pointed to by this
// must be NULL on the first call
{
if (pstrElementName && pstrAttribName)
{
if (!*ppAttribDeclBase)
// first call for this:
*ppAttribDeclBase = xmlFindAttribDeclBase(pDom,
pstrElementName);
if (*ppAttribDeclBase)
{
return ((PCMATTRIBUTEDECL)treeFind(
((**ppAttribDeclBase).AttribDeclsTree),
(ULONG)pstrAttribName,
CompareXStrings));
}
}
return NULL;
}
/*
*@@ xmlGetRootElement:
* returns the root element node from the specified
* DOM. Useful helper to start enumerating elements.
*
*@@added V0.9.11 (2001-04-22) [umoeller]
*@@changed V0.9.20 (2002-07-03) [umoeller]: this never worked with DTDs, fixed
*/
PDOMNODE xmlGetRootElement(PXMLDOM pDom)
{
PDOMDOCUMENTNODE pDocumentNode;
PLISTNODE pListNode;
if ( (pDom)
&& (pDocumentNode = pDom->pDocumentNode)
&& (pListNode = lstQueryFirstNode(&pDocumentNode->DomNode.llChildren))
)
{
// V0.9.20 (2002-07-03) [umoeller]:
// we can't just return the first node on the
// list, because if we have DTD, this might
// be the doctype... so loop until we find
// an element, which must be the root element
while (pListNode)
{
PDOMNODE pDomNode = (PDOMNODE)pListNode->pItemData;
if (pDomNode->NodeBase.ulNodeType == DOMNODE_ELEMENT)
return (pDomNode);
pListNode = pListNode->pNext;
}
}
return NULL;
}
/*
*@@ xmlGetFirstChild:
* returns the first child node of pDomNode.
* See DOMNODE for what a "child" can be for the
* various node types.
*
*@@added V0.9.9 (2001-02-14) [umoeller]
*/
PDOMNODE xmlGetFirstChild(PDOMNODE pDomNode)
{
PLISTNODE pListNode = lstQueryFirstNode(&pDomNode->llChildren);
if (pListNode)
return ((PDOMNODE)pListNode->pItemData);
return (0);
}
/*
*@@ xmlGetLastChild:
* returns the last child node of pDomNode.
* See DOMNODE for what a "child" can be for the
* various node types.
*
*@@added V0.9.9 (2001-02-14) [umoeller]
*/
PDOMNODE xmlGetLastChild(PDOMNODE pDomNode)
{
PLISTNODE pListNode = lstQueryLastNode(&pDomNode->llChildren);
if (pListNode)
return ((PDOMNODE)pListNode->pItemData);
return (0);
}
/*
*@@ xmlGetFirstText:
* returns the first text (character data) node
* of pElement or NULL if there's none.
*
*@@added V0.9.11 (2001-04-22) [umoeller]
*/
PDOMNODE xmlGetFirstText(PDOMNODE pElement)
{
PLISTNODE pNode;
PDOMNODE pDomNodeThis;
for (pNode = lstQueryFirstNode(&pElement->llChildren);
pNode;
pNode = pNode->pNext)
{
if ( (pDomNodeThis = (PDOMNODE)pNode->pItemData)
&& (pDomNodeThis->NodeBase.ulNodeType == DOMNODE_TEXT)
)
return (pDomNodeThis);
}
return NULL;
}
/*
*@@ xmlGetElementsByTagName:
* returns a linked list of DOMNODE nodes which
* match the specified element name. The special name
* "*" matches all elements.
*
* pParent must be the parent element DOMNODE...
* the only allowed exception is
*
* The caller must free the list by calling lstFree.
* Returns NULL if no such elements could be found.
*
*@@added V0.9.9 (2001-02-14) [umoeller]
*/
PLINKLIST xmlGetElementsByTagName(PDOMNODE pParent,
const char *pcszName)
{
PLINKLIST pll = lstCreate(FALSE); // no free
if (pll)
{
ULONG cItems = 0;
BOOL fFindAll = !strcmp(pcszName, "*");
PLISTNODE pNode;
PDOMNODE pDomNodeThis;
for (pNode = lstQueryFirstNode(&pParent->llChildren);
pNode;
pNode = pNode->pNext)
{
if ( (pDomNodeThis = (PDOMNODE)pNode->pItemData)
&& (pDomNodeThis->NodeBase.ulNodeType == DOMNODE_ELEMENT)
&& ( (fFindAll)
|| (!strcmp(pcszName, pDomNodeThis->NodeBase.strNodeName.psz))
)
)
{
// element matches:
lstAppendItem(pll, pDomNodeThis);
cItems++;
}
}
if (cItems)
return (pll);
else
lstFree(&pll);
}
return (0);
}
/*
*@@ xmlGetAttribute:
* returns the value of pElement's attribute
* with the given name or NULL.
*
* This is a const pointer into the element's
* attribute list.
*
*@@added V0.9.11 (2001-04-22) [umoeller]
*/
const XSTRING* xmlGetAttribute(PDOMNODE pElement,
const char *pcszAttribName)
{
XSTRING str;
PDOMNODE pAttrNode;
xstrInitSet(&str, (PSZ)pcszAttribName);
// note, cheap trick: no malloc here, but we need
// an XSTRING for treeFind
if (pAttrNode = (PDOMNODE)treeFind(pElement->AttributesMap,
(ULONG)&str,
CompareXStrings))
return (pAttrNode->pstrNodeValue);
return NULL;
}
/* ******************************************************************
*
* DOM build
*
********************************************************************/
/*
*@@ xmlCreateDocument:
* creates a new XML document.
*
* This is the first step in creating a DOM
* tree in memory.
*
* This function creates a DOCUMENT node
* (which is returned) and a root ELEMENT node
* within the document. For convenience, the
* root ELEMENT node is returned as well.
*
* This does not create a DOCTYPE node in
* the document.
*
* After this, you can add sub-elements and
* attributes to the root element using
* xmlCreateElementNode and xmlCreateAttributeNode.
*
* Use xmlDeleteNode on the DOCUMENT node
* to delete the entire DOM tree.
*
*@@added V0.9.12 (2001-05-21) [umoeller]
*/
APIRET xmlCreateDocument(const char *pcszRootElementName, // in: root element name
PDOMDOCUMENTNODE *ppDocument, // out: DOCUMENT node
PDOMNODE *ppRootElement) // out: root ELEMENT node within DOCUMENT
{
APIRET arc = NO_ERROR;
PDOMDOCUMENTNODE pDocument = NULL;
PDOMNODE pRootElement = NULL;
if ( (!pcszRootElementName) || (!ppDocument) || (!ppRootElement) )
arc = ERROR_INVALID_PARAMETER;
else
// create the document node
if (!(arc = xmlCreateDomNode(NULL, // no parent
DOMNODE_DOCUMENT,
NULL,
0,
(PDOMNODE*)&pDocument)))
if (!(arc = xmlCreateDomNode((PDOMNODE)pDocument, // parent
DOMNODE_ELEMENT,
pcszRootElementName,
0,
&pRootElement)))
{
*ppDocument = pDocument;
*ppRootElement = pRootElement;
}
return arc;
}
/*
*@@ WriteNodes:
* internal helper for writing out the nodes.
* This recurses.
*
*@@added V0.9.12 (2001-05-21) [umoeller]
*/
static VOID WriteNodes(PXSTRING pxstr,
PDOMNODE pDomNode) // in: node whose children are to be written (initially DOCUMENT)
{
PLISTNODE pListNode;
BOOL fMixedContent = (xmlGetFirstText(pDomNode) != NULL);
for (pListNode = lstQueryFirstNode(&pDomNode->llChildren);
(pListNode);
pListNode = pListNode->pNext)
{
PDOMNODE pChildNode = (PDOMNODE)pListNode->pItemData;
switch (pChildNode->NodeBase.ulNodeType)
{
case DOMNODE_ELEMENT:
{
PDOMNODE pAttribNode;
// write out opening ELEMENT tag
// add a line break if this does NOT have mixed
// content
if (!fMixedContent)
xstrcatc(pxstr, '\n');
xstrcatc(pxstr, '<');
xstrcats(pxstr, &pChildNode->NodeBase.strNodeName);
// go through attributes
for (pAttribNode = (PDOMNODE)treeFirst(pChildNode->AttributesMap);
(pAttribNode);
pAttribNode = (PDOMNODE)treeNext((TREE*)pAttribNode))
{
xstrcat(pxstr, "\n ", 0);
xstrcats(pxstr, &pAttribNode->NodeBase.strNodeName);
xstrcat(pxstr, "=\"", 0);
xstrcats(pxstr, pAttribNode->pstrNodeValue);
xstrcatc(pxstr, '\"');
}
// now check... do we have child nodes?
if (lstCountItems(&pChildNode->llChildren))
{
// yes:
xstrcatc(pxstr, '>');
// recurse into this child element
WriteNodes(pxstr, pChildNode);
if (!fMixedContent)
xstrcatc(pxstr, '\n');
// write closing tag
xstrcat(pxstr, "</", 0);
xstrcats(pxstr, &pChildNode->NodeBase.strNodeName);
xstrcatc(pxstr, '>');
}
else
{
// no child nodes:
// mark this tag as "empty"
xstrcat(pxstr, "/>", 0);
}
}
break;
case DOMNODE_TEXT:
case DOMNODE_COMMENT:
// that's simple
xstrcats(pxstr, pChildNode->pstrNodeValue);
break;
case DOMNODE_DOCUMENT_TYPE:
// @@todo
break;
}
}
}
/*
*@@ xmlWriteDocument:
* creates a complete XML document in the specified
* string buffer from the specified DOMDOCUMENTNODE.
*
* This creates a full XML document, starting with
* the <?xml...?> header, the DTD (if present),
* and the elements and attributes.
*
* The input XSTRING must be initialized. Its
* contents will be overwritten, if any exists.
*
* Sooo... to write a full XML document to disk,
* do the following:
*
* 1) Call xmlCreateDocument to have an empty
* document with a root element created.
*
* 2) Add elements, subelements, and attributes
* using xmlCreateElementNode and
* xmlCreateAttributeNode.
*
* 3) Call xmlWriteDocument to have the XML
* document written into an XSTRING.
*
* 4) Write the XSTRING to disk, e.g. using
* fwrite().
*
* Note: You can also use doshWriteTextFile,
* but you should then first convert the
* line format using xstrConvertLineFormat.
*
* Example:
*
+ APIRET arc = NO_ERROR;
+ PDOMDOCUMENTNODE pDocument = NULL;
+ PDOMNODE pRootElement = NULL;
+
+ // create a DOM
+ if (!(arc = xmlCreateDocument("MYROOTNODE",
+ &pDocument,
+ &pRootElement)))
+ {
+ // add subelements to the root element
+ PDOMNODE pSubelement;
+ if (!(arc = xmlCreateElementNode(pRootElement,
+ "MYSUBELEMENT",
+ &pSubelement)))
+ {
+ // add an attribute
+ PDOMNODE pAttribute;
+ if (!(arc = xmlCreateAttributeNode(pSubElement,
+ "MYATTRIB",
+ "VALUE",
+ &pAttribute)))
+ {
+ // alright, turn this into a string
+ XSTRING str;
+ xstrInit(&str, 1000);
+ if (!(arc = xmlWriteDocument(pDocument,
+ "ISO-8859-1",
+ NULL, // or DOCTYPE
+ &str)))
+ {
+ FILE *file = fopen("myfile.xml", "w");
+ fwrite(str.psz,
+ 1,
+ str.ulLength,
+ file);
+ fclose(file);
+ }
+ }
+ }
+
+ // this kills the entire tree
+ xmlDeleteNode((PNODEBASE)pDocument);
+
+ }
+
*
* A note about whitespace handling. Presently, this
* adds line breaks after the opening tag of an
* element if the element has element content only.
* However, if the element has mixed content, this
* line break is NOT automatically added because
* white space may then be significant.
*
*@@added V0.9.12 (2001-05-21) [umoeller]
*/
APIRET xmlWriteDocument(PDOMDOCUMENTNODE pDocument, // in: document node
const char *pcszEncoding, // in: encoding string (e.g. "ISO-8859-1")
const char *pcszDoctype, // in: entire DOCTYPE statement or NULL
PXSTRING pxstr) // out: document
{
APIRET arc = NO_ERROR;
if ( (!pDocument) || (!pcszEncoding) || (!pxstr) )
arc = ERROR_INVALID_PARAMETER;
else
{
// <?xml version="1.0" encoding="ISO-8859-1"?>
xstrcpy(pxstr, "<?xml version=\"1.0\" encoding=\"", 0);
xstrcat(pxstr, pcszEncoding, 0);
xstrcat(pxstr, "\"?>\n", 0);
// write entire DOCTYPE statement
if (pcszDoctype)
{
xstrcatc(pxstr, '\n');
xstrcat(pxstr, pcszDoctype, 0);
xstrcatc(pxstr, '\n');
}
// write out children
WriteNodes(pxstr, (PDOMNODE)pDocument);
xstrcatc(pxstr, '\n');
}
return arc;
}