home *** CD-ROM | disk | FTP | other *** search
- /*
- parser.c - HTML Parser
-
- (c) 1998 (W3C) MIT, INRIA, Keio University
- See tidy.c for the copyright notice.
- */
-
- #include "platform.h" /* platform independent stuff */
- #include "html.h" /* to pull in definition of nodes */
-
- int SeenBodyEndTag; /* could be moved into lexer structure */
-
- void InsertNode(Node *element, Node *node)
- {
- node->parent = element;
- node->prev = element->last;
-
- if (element->last != null)
- element->last->next = node;
- else
- element->content = node;
-
- element->last = node;
- }
-
- void InsertNodeBeforeElement(Node *element, Node *node)
- {
- Node *parent;
-
- parent = element->parent;
- node->parent = parent;
- node->next = element;
- node->prev = element->prev;
- element->prev = node;
-
- if (node->prev)
- node->prev->next = node;
-
- if (parent->content == element)
- parent->content = node;
- }
-
- void InsertNodeAfterElement(Node *element, Node *node)
- {
- Node *parent;
-
- parent = element->parent;
- node->parent = parent;
-
- if (parent->last == element)
- parent->last = node;
- else
- node->next = element->next;
-
- element->next = node;
- node->prev = element;
- }
-
- void DiscardElement(Lexer *lexer, Node *element)
- {
- Node *parent;
-
- parent = element->parent;
-
- if (parent->last == element)
- parent->last = element->prev;
-
- if (parent->content == element)
- parent->content = element->next;
-
- if (element->prev)
- element->prev->next = element->next;
-
- if (element->next)
- element->next->prev = element->prev;
-
- element->next = null;
- FreeNode(element);
- }
-
- void TrimEmptyElement(Lexer *lexer, Node *element)
- {
- if (element->content == null &&
- (element->tag != tag_a || element->attributes == null))
- {
- if (element->type == TextNode ||
- (element->tag != null &&
- element->tag != tag_layer &&
- !(element->tag->model & CM_ROW)))
- {
-
- if (element->type != TextNode)
- ReportWarning(lexer, element, null, TRIM_EMPTY_ELEMENT);
-
- DiscardElement(lexer, element);
- }
- }
- }
-
- /*
- If last child of element is a text node
- then trim trailing white space character.
- */
- void TrimSpace(Lexer *lexer, Node *last)
- {
- unsigned char c;
-
- if (last != null && last->type == TextNode && last->end > last->start)
- {
- while (last->end > last->start)
- {
- c = (unsigned char)lexer->lexbuf[last->end - 1];
-
- if (c == 160) /* non breaking space */
- {
- if (last->parent->tag == tag_td || last->parent->tag == tag_th)
- {
- if (last->end > last->start + 1)
- last->end -= 1;
- else
- break;
- }
- else
- last->end -= 1;
- }
- else if (c == ' ')
- last->end -= 1;
- else
- break;
- }
-
- if (last->end < last->start)
- tidy_out(lexer->errout, "TrimSpace: screwed up text node\n");
-
- /* if empty string then delete from parse tree */
- if (last->start == last->end)
- TrimEmptyElement(lexer, last);
- }
- }
-
- /*
- This maps
- <em>hello </em><strong>world</strong>
- to
- <em>hello</em> <strong>world</strong>
-
- If last child of element is a text node
- then trim trailing white space character
- moving it to after element's end tag.
- */
- void TrimTrailingSpace(Lexer *lexer, Node *last)
- {
- unsigned char c;
-
- if (last != null && last->type == TextNode && last->end > last->start)
- {
- c = (unsigned char)lexer->lexbuf[last->end - 1];
-
- if (c == ' ' || c == 160)
- {
- last->end -= 1;
-
- if (last->parent->tag->model & CM_INLINE)
- lexer->insertspace = yes;
- }
-
- /* if empty string then delete from parse tree */
- if (last->start == last->end)
- TrimEmptyElement(lexer, last);
- }
- }
-
- /*
- This maps
- <p>hello<em> world</em>
- to
- <p>hello <em>world</em>
-
- Trims initial space, by moving it before the
- start tag, or if this element is the first in
- parent's content, then by discarding the space
- */
- void TrimInitialSpace(Lexer *lexer, Node *element, Node *text)
- {
- Node *prev, *node;
-
- if (text->type == TextNode && lexer->lexbuf[text->start] == ' ')
- {
- if (element->tag->model & CM_INLINE &&
- element->parent->content != element)
- {
- prev = element->prev;
-
- if (prev->type == TextNode)
- {
- if (lexer->lexbuf[prev->end - 1] != ' ')
- lexer->lexbuf[(prev->end)++] = ' ';
-
- ++(element->start);
- }
- else /* create new node */
- {
- node = NewNode();
- node->start = (element->start)++;
- node->end = element->start;
- lexer->lexbuf[node->start] = ' ';
- node->prev = prev;
- prev->next = node;
- node->next = element;
- element->prev = node;
- node->parent = element->parent;
- }
- }
-
- /* discard the space in current node */
- ++(text->start);
- }
- }
-
- Bool DescendantOf(Node *element, Dict *tag)
- {
- Node *parent;
-
- for (parent = element->parent;
- parent != null; parent = parent->parent)
- {
- if (parent->tag == tag)
- return yes;
- }
-
- return no;
- }
-
- void ParseTag(Lexer *lexer, Node *node, uint mode)
- {
- if (node->tag->model & CM_EMPTY)
- {
- lexer->waswhite = no;
- return;
- }
- else if (!(node->tag->model & CM_INLINE))
- lexer->insertspace = no;
-
- if (node->tag->parser == null || node->type == StartEndTag)
- return;
-
- (*node->tag->parser)(lexer, node, mode);
- }
-
- /*
- the doctype has been found after other tags,
- and needs moving to before the html element
- */
- void InsertDocType(Lexer *lexer, Node *element, Node *doctype)
- {
- ReportWarning(lexer, element, doctype, DOCTYPE_AFTER_TAGS);
-
- while (element->tag != tag_html)
- element = element->parent;
-
- InsertNodeBeforeElement(element, doctype);
- }
-
- void MoveToHead(Lexer *lexer, Node *element, Node *node)
- {
- Node *head;
-
-
- if (node->type == StartTag || node->type == StartEndTag)
- {
- ReportWarning(lexer, element, node, TAG_NOT_ALLOWED_IN);
-
- while (element->tag != tag_html)
- element = element->parent;
-
- for (head = element->content; head; head = head->next)
- {
- if (head->tag == tag_head)
- {
- InsertNode(head, node);
- break;
- }
- }
-
- if (node->tag->parser)
- ParseTag(lexer, node, IgnoreWhitespace);
- }
- else
- {
- ReportWarning(lexer, element, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- }
- }
-
- /*
- element is node created by the lexer
- upon seeing the start tag, or by the
- parser when the start tag is inferred
- */
- void ParseBlock(Lexer *lexer, Node *element, uint mode)
- {
- Node *node, *parent;
- Bool checkstack;
- uint istackbase;
-
- checkstack = yes;
-
- if (element->tag->model & CM_EMPTY)
- return;
-
- if (element->tag == tag_form && DescendantOf(element, tag_form))
- ReportWarning(lexer, element, null, ILLEGAL_NESTING);
-
- /*
- InlineDup() asks the lexer to insert inline emphasis tags
- currently pushed on the istack, but take care to avoid
- propagating inline emphasis inside OBJECT or APPLET.
- For these elements a fresh inline stack context is created
- and disposed of upon reaching the end of the element.
- They thus behave like table cells in this respect.
- */
- if (element->tag->model & CM_OBJECT)
- {
- istackbase = lexer->istackbase;
- lexer->istackbase = lexer->istacksize;
- }
-
- if (!(element->tag->model & CM_MIXED))
- InlineDup(lexer, null);
-
- mode = IgnoreWhitespace;
-
- while ((node = GetToken(lexer, mode /*MixedContent*/)) != null)
- {
- /* end tag for this element */
- if (node->type == EndTag &&
- (node->tag == element->tag || element->was == node->tag))
- {
- FreeNode(node);
-
- if (element->tag->model & CM_OBJECT)
- {
- /* pop inline stack */
- while (lexer->istacksize > lexer->istackbase)
- PopInline(lexer, null);
- lexer->istackbase = istackbase;
- }
-
- TrimSpace(lexer, element->last);
- TrimEmptyElement(lexer, element);
- return;
- }
-
- if (node->tag == tag_html || node->tag == tag_head || node->tag == tag_body)
- {
- if (node->type == StartTag || node->type == StartEndTag)
- ReportWarning(lexer, element, node, DISCARDING_UNEXPECTED);
-
- FreeNode(node);
- continue;
- }
-
- /*
- if this is the end tag for an ancestor element
- then infer end tag for this element
- */
- if (node->type == EndTag)
- {
- for (parent = element->parent;
- parent != null; parent = parent->parent)
- {
- if (node->tag == parent->tag)
- {
- if (!(element->tag->model & CM_OPT))
- ReportWarning(lexer, element, node, MISSING_ENDTAG_BEFORE);
-
- UngetToken(lexer);
-
- if (element->tag->model & CM_OBJECT)
- {
- /* pop inline stack */
- while (lexer->istacksize > lexer->istackbase)
- PopInline(lexer, null);
- lexer->istackbase = istackbase;
- }
-
- TrimSpace(lexer, element->last);
- TrimEmptyElement(lexer, element);
- return;
- }
- }
- }
-
- /* mixed content model permits text */
- if (node->type == TextNode)
- {
- if (checkstack)
- {
- checkstack = no;
-
- if (!(element->tag->model & CM_MIXED))
- {
- if (InlineDup(lexer, node) > 0)
- continue;
- }
- }
-
- InsertNode(element, node);
- mode = MixedContent;
- continue;
- }
-
- if (node->type == CommentTag ||
- node->type == ProcInsTag ||
- node->type == AspTag)
- {
- InsertNode(element, node);
- continue;
- }
-
- /* allow PARAM elements? */
- if (node->tag == tag_param)
- {
- if ((element->tag->model & CM_PARAM) &&
- node->type == StartTag)
- {
- InsertNode(element, node);
- continue;
- }
-
- /* otherwise discard it */
- ReportWarning(lexer, element, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- continue;
- }
-
- /* allow AREA elements? */
- if (node->tag == tag_area)
- {
- if ((element->tag == tag_map) &&
- (node->type == StartTag || node->type == StartEndTag))
- {
- InsertNode(element, node);
- continue;
- }
-
- /* otherwise discard it */
- ReportWarning(lexer, element, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- continue;
- }
-
- /* ignore unknown start/end tags */
- if (node->tag == null)
- {
- ReportWarning(lexer, element, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- continue;
- }
-
- /*
- Allow CM_INLINE elements here.
-
- Allow CM_BLOCK elements here unless
- lexer->excludeBlocks is yes.
-
- LI and DD are special cased.
-
- Otherwise infer end tag for this element.
- */
-
- if (!(node->tag->model & CM_INLINE))
- {
- if (node->type != StartTag && node->type != StartEndTag)
- {
- ReportWarning(lexer, element, node, DISCARDING_UNEXPECTED);
- continue;
- }
-
- if (element->tag == tag_td || element->tag == tag_th)
- {
- /* if parent is a table cell, avoid inferring the end of the cell */
-
- if (node->tag->model & CM_HEAD)
- {
- MoveToHead(lexer, element, node);
- continue;
- }
-
- if (node->tag->model & CM_LIST)
- {
- UngetToken(lexer);
- node = InferredTag(lexer, "ul");
- lexer->excludeBlocks = yes;
- }
- else if (node->tag->model & CM_DEFLIST)
- {
- UngetToken(lexer);
- node = InferredTag(lexer, "dl");
- lexer->excludeBlocks = yes;
- }
-
- /* infer end of current table cell */
- if (!(node->tag->model & CM_BLOCK))
- {
- UngetToken(lexer);
- TrimSpace(lexer, element->last);
- TrimEmptyElement(lexer, element);
- return;
- }
- }
- else if (node->tag->model & CM_BLOCK)
- {
- if (lexer->excludeBlocks)
- {
- if (!(element->tag->model & CM_OPT))
- ReportWarning(lexer, element, node, MISSING_ENDTAG_BEFORE);
-
- UngetToken(lexer);
-
- if (element->tag->model & CM_OBJECT)
- lexer->istackbase = istackbase;
-
- TrimSpace(lexer, element->last);
- TrimEmptyElement(lexer, element);
- return;
- }
- }
- else /* things like list items */
- {
- if (!(element->tag->model & CM_OPT))
- ReportWarning(lexer, element, node, MISSING_ENDTAG_BEFORE);
-
- if (node->tag->model & CM_HEAD)
- {
- MoveToHead(lexer, element, node);
- continue;
- }
-
- UngetToken(lexer);
-
- if (node->tag->model & CM_LIST)
- {
- if (element->parent->tag == tag_ul || element->parent->tag == tag_ol)
- {
- TrimSpace(lexer, element->last);
- TrimEmptyElement(lexer, element);
- return;
- }
-
- node = InferredTag(lexer, "ul");
- }
- else if (node->tag->model & CM_DEFLIST)
- {
- if (element->parent->tag == tag_dl)
- {
- TrimSpace(lexer, element->last);
- TrimEmptyElement(lexer, element);
- return;
- }
-
- node = InferredTag(lexer, "dl");
- }
- else if (node->tag->model & CM_TABLE || node->tag->model & CM_ROW)
- {
- node = InferredTag(lexer, "table");
- }
- else if (element->tag->model & CM_OBJECT)
- {
- /* pop inline stack */
- while (lexer->istacksize > lexer->istackbase)
- PopInline(lexer, null);
- lexer->istackbase = istackbase;
- TrimSpace(lexer, element->last);
- TrimEmptyElement(lexer, element);
- return;
-
- }
- else
- {
- TrimSpace(lexer, element->last);
- TrimEmptyElement(lexer, element);
- return;
- }
- }
- }
-
- /* parse known element */
- if (node->type == StartTag || node->type == StartEndTag)
- {
- if (node->tag->model & CM_INLINE)
- {
- if (checkstack && !node->implicit)
- {
- checkstack = no;
-
- if (InlineDup(lexer, node) > 0)
- continue;
- }
-
- mode = MixedContent;
- }
- else
- {
- checkstack = yes;
- mode = IgnoreWhitespace;
- }
-
- /* trim white space before <br> */
- if (node->tag == tag_br)
- TrimSpace(lexer, element->last);
-
- InsertNode(element, node);
-
- if (node->implicit)
- ReportWarning(lexer, element, node, INSERTING_TAG);
-
- ParseTag(lexer, node, IgnoreWhitespace /*MixedContent*/);
- continue;
- }
-
- /* discard unexpected tags */
- if (node->type == EndTag)
- PopInline(lexer, node); /* if inline end tag */
-
- ReportWarning(lexer, element, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- }
-
- if (!(element->tag->model & CM_OPT))
- ReportWarning(lexer, element, node, MISSING_ENDTAG_FOR);
-
- if (element->tag->model & CM_OBJECT)
- {
- /* pop inline stack */
- while (lexer->istacksize > lexer->istackbase)
- PopInline(lexer, null);
- lexer->istackbase = istackbase;
- }
-
- TrimSpace(lexer, element->last);
- TrimEmptyElement(lexer, element);
- }
-
- void ParseInline(Lexer *lexer, Node *element, uint mode)
- {
- Node *node, *parent;
-
- if (element->tag->model & CM_EMPTY)
- return;
-
- if (element->tag == tag_a)
- {
- if (element->attributes == null)
- {
- ReportWarning(lexer, element->parent, element, DISCARDING_UNEXPECTED);
- DiscardElement(lexer, element);
- return;
- }
- }
-
- /*
- ParseInline is used for some block level elements like H1 to H6
- For such elements we need to insert inline emphasis tags currently
- on the inline stack. For Inline elements, we normally push them
- onto the inline stack provided they aren't implicit or OBJECT/APPLET.
- This test is carried out in PushInline and PopInline, see istack.c
- */
- if (element->tag->model & CM_BLOCK)
- InlineDup(lexer, null);
- else if (element->tag->model & CM_INLINE)
- PushInline(lexer, element);
-
- if (element->tag == tag_nobr)
- lexer->badLayout |= USING_NOBR;
- else if (element->tag == tag_font)
- lexer->badLayout |= USING_FONT;
-
- /* Inline elements may or may not be within a preformatted element */
- if (mode != Preformatted)
- mode = MixedContent;
-
- while ((node = GetToken(lexer, mode)) != null)
- {
- /* end tag for current element */
- if (node->tag == element->tag && node->type == EndTag)
- {
- if (element->tag->model & CM_INLINE)
- PopInline(lexer, node);
-
- FreeNode(node);
- TrimTrailingSpace(lexer, element->last);
-
- /*
- if a font element wraps an anchor and nothing else
- then move the font element inside the anchor since
- otherwise it won't alter the anchor text color
- */
- if (element->tag == tag_font && element->content && element->content == element->last)
- {
- Node *child = element->content;
-
- if (child->tag == tag_a)
- {
- child->parent = element->parent;
- child->next = element->next;
- child->prev = element->prev;
-
- if (child->prev)
- child->prev->next = child;
- else
- child->parent->content = child;
-
- if (child->next)
- child->next->prev = child;
- else
- child->parent->last = child;
-
- element->prev = element->last = null;
- element->parent = child;
- element->content = child->content;
- child->content = child->last = element;
- }
- }
-
- TrimEmptyElement(lexer, element);
- return;
- }
-
- if (node->type == TextNode)
- {
- /* only called for 1st child */
- if (element->content == null)
- TrimInitialSpace(lexer, element, node);
-
- if (node->start >= node->end)
- {
- FreeNode(node);
- continue;
- }
-
- InsertNode(element, node);
- continue;
- }
-
- /* mixed content model so allow text */
- if (node->type == CommentTag ||
- node->type == ProcInsTag ||
- node->type == AspTag)
- {
- InsertNode(element, node);
- continue;
- }
-
- /* deal with HTML tags */
- if (node->tag == tag_html)
- {
- if (node->type == StartTag || node->type == StartEndTag)
- {
- ReportWarning(lexer, element, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- continue;
- }
-
- /* otherwise infer end of inline element */
- UngetToken(lexer);
- TrimTrailingSpace(lexer, element->last);
- TrimEmptyElement(lexer, element);
- return;
- }
-
- /* ignore unknown and PARAM tags */
- if (node->tag == null || node->tag == tag_param)
- {
- ReportWarning(lexer, element, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- continue;
- }
-
- /* allow any inline end tag to end current element */
- if (node->type == EndTag && node->tag->model & CM_INLINE
- && !(node->tag->model & CM_OBJECT)
- && element->tag->model & CM_INLINE)
- {
- PopInline(lexer, element);
-
- if (node->tag == tag_a && node->tag != element->tag)
- {
- ReportWarning(lexer, element, node, MISSING_ENDTAG_BEFORE);
- UngetToken(lexer);
- }
- else
- {
- ReportWarning(lexer, element, node, NON_MATCHING_ENDTAG);
- FreeNode(node);
- }
-
- TrimTrailingSpace(lexer, element->last);
- TrimEmptyElement(lexer, element);
- return;
- }
-
- /* allow any header tag to end current header */
- if (node->tag->model & CM_HEADING && element->tag->model & CM_HEADING)
- {
- ReportWarning(lexer, element, node, NON_MATCHING_ENDTAG);
- FreeNode(node);
- TrimTrailingSpace(lexer, element->last);
- TrimEmptyElement(lexer, element);
- return;
- }
-
- /*
- an <A> tag to ends any open <A> element
- but <A href=...> is mapped to </A><A href=...>
- */
- if (node->tag == tag_a && !node->implicit && IsPushed(lexer, node))
- {
- /* coerce <a> to </a> unless it has some attributes */
- if (node->attributes == null)
- {
- node->type = EndTag;
- ReportWarning(lexer, element, node, FORCED_END_ANCHOR);
- PopInline(lexer, node);
- UngetToken(lexer);
- continue;
- }
-
- UngetToken(lexer);
- ReportWarning(lexer, element, node, MISSING_ENDTAG_BEFORE);
- PopInline(lexer, element);
- TrimTrailingSpace(lexer, element->last);
- TrimEmptyElement(lexer, element);
- return;
- }
-
- /*
- if this is the end tag for an ancestor element
- then infer end tag for this element
- */
- if (node->type == EndTag)
- {
- for (parent = element->parent;
- parent != null; parent = parent->parent)
- {
- if (node->tag == parent->tag)
- {
- if (!(element->tag->model & CM_OPT) && !element->implicit)
- ReportWarning(lexer, element, node, MISSING_ENDTAG_BEFORE);
-
- if (element->tag == tag_a)
- PopInline(lexer, element);
-
- UngetToken(lexer);
- TrimTrailingSpace(lexer, element->last);
- TrimEmptyElement(lexer, element);
- return;
- }
- }
- }
-
- if (node->tag == tag_hr && (element->tag->model & CM_HEADING))
- {
- ReportWarning(lexer, element, node, TAG_NOT_ALLOWED_IN);
-
- /*
- if the HR is the 1st thing in the heading then
- simply insert it before the heading element
- */
-
- if (element->content == null)
- {
- parent = element->parent;
-
- if (parent->content == element)
- {
- parent->content = node;
- node->prev = null;
- }
- else /* element isn't first node in parent's content */
- {
- element->prev->next = node;
- node->prev = element->prev;
- }
-
- node->parent = parent;
- node->next = element;
- element->prev = node;
- continue;
- }
-
- /*
- otherwise close the heading, insert the HR
- and then continue with a new heading element
- */
-
- element->next = node;
- node->prev = element;
- node->parent = element->parent;
- TrimSpace(lexer, element->last);
-
- element = CloneNode(lexer, element);
- element->prev = node;
- element->parent = node->parent;
- node->next = element;
- node->parent->last = element;
- continue;
- }
-
- /* block level tags end this element */
- if (!(node->tag->model & CM_INLINE))
- {
- if (node->type != StartTag)
- {
- ReportWarning(lexer, element, node, DISCARDING_UNEXPECTED);
- continue;
- }
-
- if (!(element->tag->model & CM_OPT))
- ReportWarning(lexer, element, node, MISSING_ENDTAG_BEFORE);
-
- if (node->tag->model & CM_HEAD && !(node->tag->model & CM_BLOCK))
- {
- MoveToHead(lexer, element, node);
- continue;
- }
-
- /*
- prevent anchors from propagating into block tags
- except for headings h1 to h6
- */
- if (element->tag == tag_a)
- {
- if (node->tag && !(node->tag->model & CM_HEADING))
- PopInline(lexer, element);
- else if (!(element->content))
- {
- DiscardElement(lexer, element);
- UngetToken(lexer);
- return;
- }
- }
-
- UngetToken(lexer);
- TrimTrailingSpace(lexer, element->last);
- TrimEmptyElement(lexer, element);
- return;
- }
-
- /* parse inline element */
- if (node->type == StartTag || node->type == StartEndTag)
- {
- if (node->implicit)
- ReportWarning(lexer, element, node, INSERTING_TAG);
-
- /* trim white space before <br> */
- if (node->tag == tag_br)
- TrimSpace(lexer, element->last);
-
- InsertNode(element, node);
- ParseTag(lexer, node, mode);
- continue;
- }
-
- /* discard unexpected tags */
- ReportWarning(lexer, element, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- }
-
- if (!(element->tag->model & CM_OPT))
- ReportWarning(lexer, element, node, MISSING_ENDTAG_FOR);
-
- TrimEmptyElement(lexer, element);
- }
-
- void ParseDefList(Lexer *lexer, Node *list, uint mode)
- {
- Node *node, *parent;
-
- if (list->tag->model & CM_EMPTY)
- return;
-
- lexer->insert = null; /* defer implicit inline start tags */
-
- while ((node = GetToken(lexer, IgnoreWhitespace)) != null)
- {
- if (node->tag == list->tag && node->type == EndTag)
- {
- FreeNode(node);
- TrimEmptyElement(lexer, list);
- return;
- }
-
- /* deal with comments */
- if (node->type == CommentTag ||
- node->type == ProcInsTag ||
- node->type == AspTag)
- {
- InsertNode(list, node);
- continue;
- }
-
- if (node->type == TextNode)
- {
- UngetToken(lexer);
- node = InferredTag(lexer, "dd");
- ReportWarning(lexer, list, node, MISSING_STARTTAG);
- }
-
- if (node->tag == null)
- {
- ReportWarning(lexer, list, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- continue;
- }
-
- /*
- if this is the end tag for an ancestor element
- then infer end tag for this element
- */
- if (node->type == EndTag)
- {
- if (node->tag == tag_form)
- {
- lexer->badForm = yes;
- ReportWarning(lexer, list, node, DISCARDING_UNEXPECTED);
- continue;
- }
-
- for (parent = list->parent;
- parent != null; parent = parent->parent)
- {
- if (node->tag == parent->tag)
- {
- ReportWarning(lexer, list, node, MISSING_ENDTAG_BEFORE);
-
- UngetToken(lexer);
- TrimEmptyElement(lexer, list);
- return;
- }
- }
- }
-
- if (!(node->tag == tag_dt || node->tag == tag_dd))
- {
- UngetToken(lexer);
-
- if (!(node->tag->model & (CM_BLOCK | CM_INLINE)))
- {
- ReportWarning(lexer, list, node, TAG_NOT_ALLOWED_IN);
- TrimEmptyElement(lexer, list);
- return;
- }
-
- /* if DD appeared directly in BODY then exclude blocks */
- if (!(node->tag->model & CM_INLINE) && lexer->excludeBlocks)
- {
- TrimEmptyElement(lexer, list);
- return;
- }
-
- node = InferredTag(lexer, "dd");
- ReportWarning(lexer, list, node, MISSING_STARTTAG);
- }
-
- if (node->type == EndTag)
- {
- ReportWarning(lexer, list, node, DISCARDING_UNEXPECTED);
- continue;
- }
-
- /* node should be <DT> or <DD>*/
- InsertNode(list, node);
- ParseTag(lexer, node, IgnoreWhitespace);
- }
-
- ReportWarning(lexer, list, node, MISSING_ENDTAG_FOR);
- TrimEmptyElement(lexer, list);
- }
-
- void ParseList(Lexer *lexer, Node *list, uint mode)
- {
- Node *node, *parent;
- Bool first = yes;
-
- if (list->tag->model & CM_EMPTY)
- return;
-
-
- lexer->insert = null; /* defer implicit inline start tags */
-
- while ((node = GetToken(lexer, IgnoreWhitespace)) != null)
- {
- if (node->tag == list->tag && node->type == EndTag)
- {
- FreeNode(node);
-
- /*
- a bit of a hack, this code is made more
- complex by the fact that ReportWarning takes
- 2 nodes rather than a node and a string
- */
- if (MakeClean && (list->tag->model & CM_OBSOLETE))
- {
- /* create a ul node for the error message */
- node = CloneNode(lexer, list);
- MemFree(node->element);
- node->element = wstrdup("ul");
- node->tag = tag_ul;
- ReportWarning(lexer, list, node, OBSOLETE_ELEMENT);
-
- node->parent = node->prev = node->next = null;
- FreeNode(node); /* only used for error message */
-
- /* and coerce listing to pre */
- MemFree(list->element);
- list->element = wstrdup("ul");
- list->tag = tag_ul;
- }
-
- TrimEmptyElement(lexer, list);
- return;
- }
-
- if (node->type == TextNode)
- {
- UngetToken(lexer);
-
- /*
- the illegal form <ul>some text</ul> is sometimes used to get an
- indent; map it to <blockquote class="indent">some text</blockquote>
- */
-
- if (first)
- {
- ReportWarning(lexer, list, node, TAG_NOT_ALLOWED_IN);
- MemFree(list->element);
- list->element = wstrdup("blockquote");
- list->was = list->tag;
- list->tag = tag_blockquote;
- list->implicit = yes;
- ParseBlock(lexer, list, mode);
- return;
- }
-
- node = InferredTag(lexer, "li");
- ReportWarning(lexer, list, node, MISSING_STARTTAG);
- }
-
- /* deal with comments */
- if (node->type == CommentTag ||
- node->type == ProcInsTag ||
- node->type == AspTag)
- {
- InsertNode(list, node);
- continue;
- }
-
- if (node->tag == null)
- {
- ReportWarning(lexer, list, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- continue;
- }
-
- /*
- if this is the end tag for an ancestor element
- then infer end tag for this element
- */
- if (node->type == EndTag)
- {
- if (node->tag == tag_form)
- {
- lexer->badForm = yes;
- ReportWarning(lexer, list, node, DISCARDING_UNEXPECTED);
- continue;
- }
-
- for (parent = list->parent;
- parent != null; parent = parent->parent)
- {
- if (node->tag == parent->tag)
- {
- ReportWarning(lexer, list, node, MISSING_ENDTAG_BEFORE);
-
- UngetToken(lexer);
- TrimEmptyElement(lexer, list);
- return;
- }
- }
- }
-
- if (!(node->tag == tag_li))
- {
- /*
- if node is <ul> or <ol> and we have at least one <li>
- then parse node and append to the previous <li>
- */
-
- if (node->type != EndTag &&
- (node->tag->parser == ParseList ||
- node->tag->parser == ParseDefList))
- {
- Node *blockquote;
-
- if (first)
- {
- ReportWarning(lexer, list, node, TAG_NOT_ALLOWED_IN);
- blockquote = InferredTag(lexer, "blockquote");
- InsertNodeBeforeElement(list, blockquote);
- InsertNode(blockquote, node);
- ParseTag(lexer, node, mode);
- continue;
- }
-
- ReportWarning(lexer, list, node, TAG_NOT_ALLOWED_IN);
- InsertNode(list->last, node);
- ParseTag(lexer, node, IgnoreWhitespace);
- continue;
- }
-
-
- UngetToken(lexer);
-
- if (!(node->tag->model & (CM_BLOCK | CM_INLINE)))
- {
- ReportWarning(lexer, list, node, TAG_NOT_ALLOWED_IN);
- TrimEmptyElement(lexer, list);
- return;
- }
-
- /* if LI appeared directly in BODY then exclude blocks */
- if (!(node->tag->model & CM_INLINE) && lexer->excludeBlocks)
- {
- TrimEmptyElement(lexer, list);
- return;
- }
-
- if (node->type == EndTag)
- {
- ReportWarning(lexer, list, node, DISCARDING_UNEXPECTED);
- continue;
- }
-
- /*
- the illegal form <ul>some text</ul> is sometimes used to get an
- indent; map it to <blockquote class="indent">some text</blockquote>
- */
-
- if (first)
- {
- ReportWarning(lexer, list, node, TAG_NOT_ALLOWED_IN);
- MemFree(list->element);
- list->element = wstrdup("blockquote");
- list->was = list->tag;
- list->tag = tag_blockquote;
- list->implicit = yes;
- ParseBlock(lexer, list, mode);
- return;
- }
-
- node = InferredTag(lexer, "li");
- ReportWarning(lexer, list, node, MISSING_STARTTAG);
- }
-
- if (node->type == EndTag)
- {
- ReportWarning(lexer, list, node, DISCARDING_UNEXPECTED);
- continue;
- }
-
- /* node should be <LI> */
- InsertNode(list,node);
- ParseTag(lexer, node, IgnoreWhitespace);
- first = no;
- }
-
- ReportWarning(lexer, list, node, MISSING_ENDTAG_FOR);
- TrimEmptyElement(lexer, list);
- }
-
- /*
- unexpected content in table row is moved to just before
- the table in accordance with Netscape and IE. This code
- assumes that node hasn't been inserted into the row.
- */
- void MoveBeforeTable(Node *row, Node *node)
- {
- Node *table;
-
- /* first find the table element */
- for (table = row->parent; table; table = table->parent)
- {
- if (table->tag == tag_table)
- {
- node->prev = table->prev;
- node->next = table;
- table->prev = node;
- node->parent = table->parent;
-
- if (node->prev)
- node->prev->next = node;
-
- break;
- }
- }
- }
-
- void ParseRow(Lexer *lexer, Node *row, uint mode)
- {
- Node *node, *parent;
- Bool exclude_state;
-
- if (row->tag->model & CM_EMPTY)
- return;
-
- while ((node = GetToken(lexer, IgnoreWhitespace)) != null)
- {
- if (node->tag == row->tag)
- {
- if (node->type == EndTag)
- {
- FreeNode(node);
- TrimEmptyElement(lexer, row);
- return;
- }
-
- UngetToken(lexer);
- TrimEmptyElement(lexer, row);
- return;
- }
-
- /*
- if this is the end tag for an ancestor element
- then infer end tag for this element
- */
- if (node->type == EndTag)
- {
- if (node->tag == tag_form)
- {
- lexer->badForm = yes;
- ReportWarning(lexer, row, node, DISCARDING_UNEXPECTED);
- continue;
- }
-
- if (node->tag == tag_td || node->tag == tag_th)
- {
- ReportWarning(lexer, row, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- continue;
- }
-
- for (parent = row->parent;
- parent != null; parent = parent->parent)
- {
- if (node->tag == parent->tag)
- {
- UngetToken(lexer);
- TrimEmptyElement(lexer, row);
- return;
- }
- }
- }
-
- /* deal with comments */
- if (node->type == CommentTag ||
- node->type == ProcInsTag ||
- node->type == AspTag)
- {
- InsertNode(row, node);
- continue;
- }
-
- /* discard unknown tags */
- if (node->tag == null && node->type != TextNode)
- {
- ReportWarning(lexer, row, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- continue;
- }
-
- /* discard unexpected <table> element */
- if (node->tag == tag_table)
- {
- ReportWarning(lexer, row, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- continue;
- }
-
- /* THEAD, TFOOT or TBODY */
- if (node->tag && (node->tag->model & CM_ROWGRP))
- {
- UngetToken(lexer);
- TrimEmptyElement(lexer, row);
- return;
- }
-
- if (node->type == EndTag)
- {
- ReportWarning(lexer, row, node, DISCARDING_UNEXPECTED);
- continue;
- }
-
- if (!(node->tag == tag_td || node->tag == tag_th))
- {
- MoveBeforeTable(row, node);
- ReportWarning(lexer, row, node, TAG_NOT_ALLOWED_IN);
-
- if (node->type != TextNode)
- ParseTag(lexer, node, IgnoreWhitespace);
-
- continue;
- #if 0
- /* previous code inferred either </tr> or <td> */
- if (!(node->tag->model & (CM_BLOCK | CM_INLINE)))
- {
- ReportWarning(lexer, row, node, MISSING_ENDTAG_BEFORE);
- TrimEmptyElement(lexer, row);
- return;
- }
-
- node = InferredTag(lexer, "td");
- ReportWarning(lexer, row, node, MISSING_STARTTAG);
- #endif
- }
-
- /* node should be <TD> or <TH> */
- InsertNode(row, node);
- exclude_state = lexer->excludeBlocks;
- lexer->excludeBlocks = no;
- ParseTag(lexer, node, IgnoreWhitespace);
- lexer->excludeBlocks = exclude_state;
-
- /* pop inline stack */
-
- while (lexer->istacksize > lexer->istackbase)
- PopInline(lexer, null);
- }
-
- TrimEmptyElement(lexer, row);
- }
-
- void ParseRowGroup(Lexer *lexer, Node *rowgroup, uint mode)
- {
- Node *node, *parent;
-
- if (rowgroup->tag->model & CM_EMPTY)
- return;
-
- while ((node = GetToken(lexer, IgnoreWhitespace)) != null)
- {
- if (node->tag == rowgroup->tag)
- {
- if (node->type == EndTag)
- {
- TrimEmptyElement(lexer, rowgroup);
- FreeNode(node);
- return;
- }
-
- UngetToken(lexer);
- return;
- }
-
- /* if </table> infer end tag */
- if (node->tag == tag_table && node->type == EndTag)
- {
- UngetToken(lexer);
- TrimEmptyElement(lexer, rowgroup);
- return;
- }
-
- /* discard unknown tags */
- if (node->tag == null && node->type != TextNode)
- {
- ReportWarning(lexer, rowgroup, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- continue;
- }
-
- /* if TD or TH or text or inline or block then infer <TR> */
-
- if (node->type == StartTag &&
- (node->tag == tag_td ||
- node->tag == tag_th ||
- (node->tag->model & (CM_BLOCK | CM_INLINE)))
- || node->type == TextNode)
- {
- UngetToken(lexer);
- node = InferredTag(lexer, "tr");
- ReportWarning(lexer, rowgroup, node, MISSING_STARTTAG);
- }
-
- if (node->type == CommentTag ||
- node->type == ProcInsTag ||
- node->type == AspTag)
- {
- InsertNode(rowgroup, node);
- continue;
- }
-
- /*
- if this is the end tag for ancestor element
- then infer end tag for this element
- */
- if (node->type == EndTag)
- {
- if (node->tag == tag_form)
- {
- lexer->badForm = yes;
- ReportWarning(lexer, rowgroup, node, DISCARDING_UNEXPECTED);
- continue;
- }
-
- if (node->tag == tag_tr || node->tag == tag_td || node->tag == tag_th)
- {
- ReportWarning(lexer, rowgroup, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- continue;
- }
-
- for (parent = rowgroup->parent;
- parent != null; parent = parent->parent)
- {
- if (node->tag == parent->tag)
- {
- UngetToken(lexer);
- TrimEmptyElement(lexer, rowgroup);
- return;
- }
- }
- }
-
- /*
- if THEAD, TFOOT or TBODY then implied end tag
-
- */
- if (node->tag->model & CM_ROWGRP)
- {
- if (node->type != EndTag)
- UngetToken(lexer);
-
- TrimEmptyElement(lexer, rowgroup);
- return;
- }
-
- if (node->type == EndTag)
- {
- ReportWarning(lexer, rowgroup, node, DISCARDING_UNEXPECTED);
- continue;
- }
-
- if (!(node->tag == tag_tr))
- {
- node = InferredTag(lexer, "tr");
- ReportWarning(lexer, rowgroup, node, MISSING_STARTTAG);
- UngetToken(lexer);
- }
-
- /* node should be <TR> */
- InsertNode(rowgroup, node);
- ParseTag(lexer, node, IgnoreWhitespace);
- }
-
- TrimEmptyElement(lexer, rowgroup);
- }
-
- void ParseColGroup(Lexer *lexer, Node *colgroup, uint mode)
- {
- Node *node, *parent;
-
- if (colgroup->tag->model & CM_EMPTY)
- return;
-
- while ((node = GetToken(lexer, IgnoreWhitespace)) != null)
- {
- if (node->tag == colgroup->tag && node->type == EndTag)
- {
- FreeNode(node);
- return;
- }
-
- /*
- if this is the end tag for an ancestor element
- then infer end tag for this element
- */
- if (node->type == EndTag)
- {
- if (node->tag == tag_form)
- {
- lexer->badForm = yes;
- ReportWarning(lexer, colgroup, node, DISCARDING_UNEXPECTED);
- continue;
- }
-
- for (parent = colgroup->parent;
- parent != null; parent = parent->parent)
- {
-
- if (node->tag == parent->tag)
- {
- UngetToken(lexer);
- return;
- }
- }
- }
-
- if (node->type == TextNode)
- {
- UngetToken(lexer);
- return;
- }
-
- /* deal with comments */
- if (node->type == CommentTag ||
- node->type == ProcInsTag ||
- node->type == AspTag)
- {
- InsertNode(colgroup, node);
- continue;
- }
-
- /* discard unknown tags */
- if (node->tag == null)
- {
- ReportWarning(lexer, colgroup, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- continue;
- }
-
- if (node->tag != tag_col)
- {
- UngetToken(lexer);
- return;
- }
-
- if (node->type == EndTag)
- {
- ReportWarning(lexer, colgroup, node, DISCARDING_UNEXPECTED);
- continue;
- }
-
- /* node should be <COL> */
- InsertNode(colgroup, node);
- ParseTag(lexer, node, IgnoreWhitespace);
- }
- }
-
- void ParseTableTag(Lexer *lexer, Node *table, uint mode)
- {
- Node *node, *parent;
- uint istackbase;
-
- istackbase = lexer->istackbase;
- lexer->istackbase = lexer->istacksize;
-
- while ((node = GetToken(lexer, IgnoreWhitespace)) != null)
- {
- if (node->tag == table->tag && node->type == EndTag)
- {
- FreeNode(node);
- lexer->istackbase = istackbase;
- TrimEmptyElement(lexer, table);
- return;
- }
-
- if (node->type == CommentTag ||
- node->type == ProcInsTag ||
- node->type == AspTag)
- {
- InsertNode(table, node);
- continue;
- }
-
- /* discard unknown tags */
- if (node->tag == null && node->type != TextNode)
- {
- ReportWarning(lexer, table, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- continue;
- }
-
- /* if TD or TH or text or inline or block then infer <TR> */
-
- if (node->type != EndTag)
- {
- if (node->tag == tag_td ||
- node->tag == tag_th ||
- node->type == TextNode ||
- (node->tag->model & (CM_BLOCK | CM_INLINE)))
- {
- UngetToken(lexer);
- node = InferredTag(lexer, "tr");
- ReportWarning(lexer, table, node, MISSING_STARTTAG);
- }
- }
-
- /*
- if this is the end tag for an ancestor element
- then infer end tag for this element
- */
- if (node->type == EndTag)
- {
- if (node->tag == tag_form)
- {
- lexer->badForm = yes;
- ReportWarning(lexer, table, node, DISCARDING_UNEXPECTED);
- continue;
- }
-
- if (node->tag && node->tag->model & (CM_TABLE|CM_ROW))
- {
- ReportWarning(lexer, table, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- continue;
- }
-
- for (parent = table->parent;
- parent != null; parent = parent->parent)
- {
- if (node->tag == parent->tag)
- {
- ReportWarning(lexer, table, node, MISSING_ENDTAG_BEFORE);
- UngetToken(lexer);
- lexer->istackbase = istackbase;
- TrimEmptyElement(lexer, table);
- return;
- }
- }
- }
-
- if (!(node->tag->model & CM_TABLE))
- {
- UngetToken(lexer);
- ReportWarning(lexer, table, node, TAG_NOT_ALLOWED_IN);
- lexer->istackbase = istackbase;
- TrimEmptyElement(lexer, table);
- return;
- }
-
- if (node->type == StartTag || node->type == StartEndTag)
- {
- InsertNode(table, node);;
- ParseTag(lexer, node, IgnoreWhitespace);
- continue;
- }
-
- /* discard unexpected text nodes and end tags */
- ReportWarning(lexer, table, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- }
-
- ReportWarning(lexer, table, node, MISSING_ENDTAG_FOR);
- TrimEmptyElement(lexer, table);
- lexer->istackbase = istackbase;
- }
-
- void ParsePre(Lexer *lexer, Node *pre, uint mode)
- {
- Node *node, *parent;
-
- if (pre->tag->model & CM_EMPTY)
- return;
-
- /*
- a bit of a hack, this code is made more
- complex by the fact that ReportWarning takes
- 2 nodes rather than a node and a string
- */
- if (pre->tag->model & CM_OBSOLETE)
- {
- /* create a pre node for the error message */
- node = CloneNode(lexer, pre);
- MemFree(node->element);
- node->element = wstrdup("pre");
- node->tag = tag_pre;
- ReportWarning(lexer, pre, node, OBSOLETE_ELEMENT);
-
- node->parent = node->prev = node->next = null;
- FreeNode(node); /* only used for error message */
-
- /* and coerce listing to pre */
- MemFree(pre->element);
- pre->element = wstrdup("pre");
- pre->tag = tag_pre;
- }
-
- InlineDup(lexer, null); /* tell lexer to insert inlines if needed */
-
- while ((node = GetToken(lexer, Preformatted)) != null)
- {
- if (node->tag == pre->tag && node->type == EndTag)
- {
- FreeNode(node);
- TrimSpace(lexer, pre);
- TrimEmptyElement(lexer, pre);
- return;
- }
-
- if (node->tag == tag_html)
- {
- if (node->type == StartTag || node->type == StartEndTag)
- ReportWarning(lexer, pre, node, DISCARDING_UNEXPECTED);
-
- FreeNode(node);
- continue;
- }
-
- if (node->type == TextNode)
- {
- /* if first check for inital newline */
- if (pre->content == null)
- {
- if (lexer->lexbuf[node->start] == '\n')
- ++(node->start);
-
- if (node->start >= node->end)
- {
- FreeNode(node);
- continue;
- }
- }
-
- InsertNode(pre, node);
- continue;
- }
-
- if (node->type == CommentTag ||
- node->type == ProcInsTag ||
- node->type == AspTag)
- {
- InsertNode(pre, node);
- continue;
- }
-
- /* discard unknown and PARAM tags */
- if (node->tag == null || node->tag == tag_param)
- {
- ReportWarning(lexer, pre, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- continue;
- }
-
- if (node->tag == tag_p && node->type == StartTag)
- {
- ReportWarning(lexer, pre, node, USING_BR_INPLACE_OF);
-
- /* trim white space before <p> in <pre>*/
- TrimSpace(lexer, pre->last);
-
- /* coerce <p> to <br> */
- node->tag = tag_br;
- MemFree(node->element);
- node->element = wstrdup("br");
- InsertNode(pre, node);
- continue;
- }
-
- if (node->tag->model & CM_HEAD && !(node->tag->model & CM_BLOCK))
- {
- MoveToHead(lexer, pre, node);
- continue;
- }
-
- /*
- if this is the end tag for an ancestor element
- then infer end tag for this element
- */
- if (node->type == EndTag)
- {
- if (node->tag == tag_form)
- {
- lexer->badForm = yes;
- ReportWarning(lexer, pre, node, DISCARDING_UNEXPECTED);
- continue;
- }
-
- for (parent = pre->parent;
- parent != null; parent = parent->parent)
- {
- if (node->tag == parent->tag)
- {
- ReportWarning(lexer, pre, node, MISSING_ENDTAG_BEFORE);
-
- UngetToken(lexer);
- TrimSpace(lexer, pre);
- TrimEmptyElement(lexer, pre);
- return;
- }
- }
- }
-
- /* what about head content, HEAD, BODY tags etc? */
- if (!(node->tag->model & CM_INLINE))
- {
- if (node->type != StartTag)
- {
- ReportWarning(lexer, pre, node, DISCARDING_UNEXPECTED);
- continue;
- }
-
- ReportWarning(lexer, pre, node, MISSING_ENDTAG_BEFORE);
- lexer->excludeBlocks = yes;
-
- /* check if we need to infer a container */
- if (node->tag->model & CM_LIST)
- {
- UngetToken(lexer);
- node = InferredTag(lexer, "ul");
- }
- else if (node->tag->model & CM_DEFLIST)
- {
- UngetToken(lexer);
- node = InferredTag(lexer, "dl");
- }
- else if (node->tag->model & CM_TABLE)
- {
- UngetToken(lexer);
- node = InferredTag(lexer, "table");
- }
-
- InsertNodeAfterElement(pre, node);
- pre = InferredTag(lexer, "pre");
- InsertNodeAfterElement(node, pre);
- ParseTag(lexer, node, IgnoreWhitespace);
- lexer->excludeBlocks = no;
- continue;
- }
- #if 0
- if (!(node->tag->model & CM_INLINE))
- {
- ReportWarning(lexer, pre, node, MISSING_ENDTAG_BEFORE);
- UngetToken(lexer);
- return;
- }
- #endif
- if (node->type == StartTag || node->type == StartEndTag)
- {
- /* trim white space before <br> */
- if (node->tag == tag_br)
- TrimSpace(lexer, pre->last);
-
- InsertNode(pre, node);
- ParseTag(lexer, node, Preformatted);
- continue;
- }
-
- /* discard unexpected tags */
- ReportWarning(lexer, pre, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- }
-
- ReportWarning(lexer, pre, node, MISSING_ENDTAG_FOR);
- TrimEmptyElement(lexer, pre);
- }
-
- void ParseOptGroup(Lexer *lexer, Node *field, uint mode)
- {
- Node *node;
-
- lexer->insert = null; /* defer implicit inline start tags */
-
- while ((node = GetToken(lexer, IgnoreWhitespace)) != null)
- {
- if (node->tag == field->tag && node->type == EndTag)
- {
- FreeNode(node);
- TrimSpace(lexer, field->last);
- return;
- }
-
- if (node->type == CommentTag ||
- node->type == ProcInsTag ||
- node->type == AspTag)
- {
- InsertNode(field, node);
- continue;
- }
-
- if (node->type == StartTag &&
- (node->tag == tag_option || node->tag == tag_optgroup))
- {
- if (node->tag == tag_optgroup)
- ReportWarning(lexer, field, node, CANT_BE_NESTED);
-
- InsertNode(field, node);
- ParseTag(lexer, node, MixedContent);
- continue;
- }
-
- /* discard unexpected tags */
- ReportWarning(lexer, field, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- }
- }
-
-
- void ParseSelect(Lexer *lexer, Node *field, uint mode)
- {
- Node *node;
-
- lexer->insert = null; /* defer implicit inline start tags */
-
- while ((node = GetToken(lexer, IgnoreWhitespace)) != null)
- {
- if (node->tag == field->tag && node->type == EndTag)
- {
- FreeNode(node);
- TrimSpace(lexer, field->last);
- return;
- }
-
- if (node->type == CommentTag ||
- node->type == ProcInsTag ||
- node->type == AspTag)
- {
- InsertNode(field, node);
- continue;
- }
-
- if (node->type == StartTag &&
- (node->tag == tag_option || node->tag == tag_optgroup))
- {
- InsertNode(field, node);
- ParseTag(lexer, node, IgnoreWhitespace);
- continue;
- }
-
- /* discard unexpected tags */
- ReportWarning(lexer, field, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- }
-
- ReportWarning(lexer, field, node, MISSING_ENDTAG_FOR);
- }
-
- void ParseText(Lexer *lexer, Node *field, uint mode)
- {
- Node *node;
-
- lexer->insert = null; /* defer implicit inline start tags */
-
- while ((node = GetToken(lexer, Preformatted)) != null)
- {
- if (node->tag == field->tag && node->type == EndTag)
- {
- FreeNode(node);
- TrimSpace(lexer, field->last);
- return;
- }
-
- if (node->type == TextNode ||
- node->type == CommentTag ||
- node->type == ProcInsTag ||
- node->type == AspTag)
- {
- InsertNode(field, node);
- continue;
- }
-
- if (node->tag == tag_font)
- {
- ReportWarning(lexer, field, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- continue;
- }
-
- /* terminate element on other tags */
- if (!(field->tag->model & CM_OPT))
- ReportWarning(lexer, field, node, MISSING_ENDTAG_BEFORE);
-
- UngetToken(lexer);
- TrimSpace(lexer, field->last);
- return;
- }
-
- if (!(field->tag->model & CM_OPT))
- ReportWarning(lexer, field, node, MISSING_ENDTAG_FOR);
- }
-
-
- void ParseTitle(Lexer *lexer, Node *title, uint mode)
- {
- Node *node;
-
- while ((node = GetToken(lexer, MixedContent)) != null)
- {
- if (node->tag == title->tag && node->type == EndTag)
- {
- FreeNode(node);
- TrimSpace(lexer, title->last);
- return;
- }
-
- if (node->type == TextNode)
- {
- /* only called for 1st child */
- if (title->content == null)
- TrimInitialSpace(lexer, title, node);
-
- if (node->start >= node->end)
- {
- FreeNode(node);
- continue;
- }
-
- InsertNode(title, node);
- continue;
- }
-
- if (node->type == CommentTag ||
- node->type == ProcInsTag ||
- node->type == AspTag)
- {
- InsertNode(title, node);
- continue;
- }
-
- /* discard unknown tags */
- if (node->tag == null)
- {
- ReportWarning(lexer, title, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- continue;
- }
-
- /* pushback unexpected tokens */
- ReportWarning(lexer, title, node, MISSING_ENDTAG_BEFORE);
- UngetToken(lexer);
- TrimSpace(lexer, title->last);
- return;
- }
-
- ReportWarning(lexer, title, node, MISSING_ENDTAG_FOR);
- }
-
- /*
- This isn't quite right for CDATA content as it recognises
- tags within the content and parses them accordingly.
- This will unfortunately screw up scripts which include
- < + letter, < + !, < + ? or < + / + letter
- */
-
- void ParseScript(Lexer *lexer, Node *script, uint mode)
- {
- Node *node;
-
- node = GetCDATA(lexer, script);
-
- if (node)
- InsertNode(script, node);
- }
-
- Bool IsJavaScript(Node *node)
- {
- Bool result = no;
- AttVal *attr;
-
- if (node->attributes == null)
- return yes;
-
- for (attr = node->attributes; attr; attr = attr->next)
- {
- if ( (wstrcasecmp(attr->attribute, "language") == 0
- || wstrcasecmp(attr->attribute, "type") == 0)
- && wsubstr(attr->value, "javascript"))
- result = yes;
- }
-
- return result;
- }
-
- void ParseHead(Lexer *lexer, Node *head, uint mode)
- {
- Node *node;
- Bool HasTitle = no;
-
- while ((node = GetToken(lexer, IgnoreWhitespace)) != null)
- {
- if (node->tag == head->tag && node->type == EndTag)
- {
- FreeNode(node);
- break;
- }
-
- if (node->type == TextNode)
- {
- UngetToken(lexer);
- break;
- }
-
- if (node->type == CommentTag ||
- node->type == ProcInsTag ||
- node->type == AspTag)
- {
- InsertNode(head, node);
- continue;
- }
-
- if (node->type == DocTypeTag)
- {
- InsertDocType(lexer, head, node);
- continue;
- }
-
- /* discard unknown tags */
- if (node->tag == null)
- {
- ReportWarning(lexer, head, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- continue;
- }
-
- if (!(node->tag->model & CM_HEAD))
- {
- UngetToken(lexer);
- break;
- }
-
- if (node->type == StartTag || node->type == StartEndTag)
- {
- if (node->tag == tag_title)
- HasTitle = yes;
- else if (node->tag == tag_noscript)
- ReportWarning(lexer, head, node, TAG_NOT_ALLOWED_IN);
-
- InsertNode(head, node);
- ParseTag(lexer, node, IgnoreWhitespace);
- continue;
- }
-
- /* discard unexpected text nodes and end tags */
- ReportWarning(lexer, head, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- }
-
- if (!HasTitle)
- {
- ReportWarning(lexer, head, null, MISSING_TITLE_ELEMENT);
- InsertNode(head, InferredTag(lexer, "title"));
- }
- }
-
- void ParseBody(Lexer *lexer, Node *body, uint mode)
- {
- Node *node;
- Bool checkstack, iswhitenode;
-
- mode = IgnoreWhitespace;
- checkstack = yes;
-
- while ((node = GetToken(lexer, mode)) != null)
- {
- if (node->tag == body->tag && node->type == EndTag)
- {
- TrimSpace(lexer, body->last);
- FreeNode(node);
- SeenBodyEndTag = 1;
- mode = IgnoreWhitespace;
-
- if (body->parent->tag == tag_noframes)
- break;
-
- continue;
- }
-
- if (node->tag == tag_html)
- {
- if (node->type == StartTag || node->type == StartEndTag)
- ReportWarning(lexer, body, node, DISCARDING_UNEXPECTED);
-
- FreeNode(node);
- continue;
- }
-
- iswhitenode = no;
-
- if (node->type == TextNode &&
- node->end <= node->start + 1 &&
- lexer->lexbuf[node->start] == ' ')
- iswhitenode = yes;
-
- if (node->type == CommentTag ||
- node->type == ProcInsTag ||
- node->type == AspTag)
- {
- InsertNode(body, node);
- continue;
- }
-
- if (SeenBodyEndTag == 1 && !iswhitenode)
- {
- ++SeenBodyEndTag;
- ReportWarning(lexer, body, node, CONTENT_AFTER_BODY);
- }
-
- /* mixed content model permits text */
- if (node->type == TextNode)
- {
- if (iswhitenode && mode == IgnoreWhitespace)
- {
- FreeNode(node);
- continue;
- }
-
- if (checkstack)
- {
- checkstack = no;
-
- if (InlineDup(lexer, node) > 0)
- continue;
- }
-
- InsertNode(body, node);
- mode = MixedContent;
- continue;
- }
-
- if (node->type == DocTypeTag)
- {
- InsertDocType(lexer, body, node);
- continue;
- }
- /* discard unknown and PARAM tags */
- if (node->tag == null || node->tag == tag_param)
- {
- ReportWarning(lexer, body, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- continue;
- }
-
- /*
- Netscape allows LI and DD directly in BODY
- We infer UL or DL respectively and use this
- Bool to exclude block-level elements so as
- to match Netscape's observed behaviour.
- */
- lexer->excludeBlocks = no;
-
- if (!(node->tag->model & CM_BLOCK) &&
- !(node->tag->model & CM_INLINE))
- {
- /* avoid this error message being issued twice */
- if (!node->tag->model & CM_HEAD)
- ReportWarning(lexer, body, node, TAG_NOT_ALLOWED_IN);
-
- if (node->tag->model & CM_HTML)
- {
- /* copy body attributes if current body was inferred */
- if (node->tag == tag_body && body->implicit
- && body->attributes == null)
- {
- body->attributes = node->attributes;
- node->attributes = null;
- }
-
- FreeNode(node);
- continue;
- }
-
- if (node->tag->model & CM_HEAD)
- {
- MoveToHead(lexer, body, node);
- continue;
- }
-
- if (node->tag->model & CM_LIST)
- {
- UngetToken(lexer);
- node = InferredTag(lexer, "ul");
- lexer->excludeBlocks = yes;
- }
- else if (node->tag->model & CM_DEFLIST)
- {
- UngetToken(lexer);
- node = InferredTag(lexer, "dl");
- lexer->excludeBlocks = yes;
- }
- else if (node->tag->model & (CM_TABLE | CM_ROWGRP | CM_ROW))
- {
- UngetToken(lexer);
- node = InferredTag(lexer, "table");
- lexer->excludeBlocks = yes;
- }
- else
- {
- if (!node->tag->model & (CM_ROW | CM_FIELD))
- {
- UngetToken(lexer);
- return;
- }
-
- /* ignore </td> </th> <option> etc. */
- continue;
- }
- }
-
- if (node->type == StartTag || node->type == StartEndTag)
- {
- if ((node->tag->model & CM_INLINE) && !(node->tag->model & CM_MIXED))
- {
- if (checkstack && !node->implicit)
- {
- checkstack = no;
-
- if (InlineDup(lexer, node) > 0)
- continue;
- }
-
- mode = MixedContent;
- }
- else
- {
- checkstack = yes;
- mode = IgnoreWhitespace;
- }
-
- if (node->implicit)
- ReportWarning(lexer, body, node, INSERTING_TAG);
-
- InsertNode(body, node);
- ParseTag(lexer, node, mode /* IgnoreWhitespace /*MixedContent*/);
- continue;
- }
- else if (node->type == EndTag)
- PopInline(lexer, node); /* if inline end tag */
-
- /* discard unexpected end tags */
- ReportWarning(lexer, body, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- }
- }
-
- void ParseNoFrames(Lexer *lexer, Node *noframes, uint mode)
- {
- Node *node;
- Bool checkstack;
-
- lexer->badAccess |= USING_NOFRAMES;
- mode = IgnoreWhitespace;
- checkstack = yes;
-
- while ((node = GetToken(lexer, mode)) != null)
- {
- if (node->tag == noframes->tag && node->type == EndTag)
- {
- FreeNode(node);
- TrimSpace(lexer, noframes->last);
- return;
- }
-
- if (node->tag == tag_html)
- {
- if (node->type == StartTag || node->type == StartEndTag)
- ReportWarning(lexer, noframes, node, DISCARDING_UNEXPECTED);
-
- FreeNode(node);
- continue;
- }
-
- if (node->type == CommentTag ||
- node->type == ProcInsTag ||
- node->type == AspTag)
- {
- InsertNode(noframes, node);
- continue;
- }
-
- if (node->tag == tag_body && node->type == StartTag)
- {
- InsertNode(noframes, node);
- ParseTag(lexer, node, IgnoreWhitespace /*MixedContent*/);
- continue;
- }
-
- if (node->type == StartTag || node->type == StartEndTag || node->type == TextNode)
- {
- UngetToken(lexer);
- node = InferredTag(lexer, "body");
- ReportWarning(lexer, noframes, node, INSERTING_TAG);
- InsertNode(noframes, node);
- ParseTag(lexer, node, IgnoreWhitespace /*MixedContent*/);
- continue;
- }
- #if 0
- /* mixed content model permits text */
- if (node->type == TextNode)
- {
- if (checkstack)
- {
- checkstack = no;
-
- if (InlineDup(lexer, node) > 0)
- continue;
- }
-
- InsertNode(noframes, node);
- mode = MixedContent;
- continue;
- }
-
- /* discard unknown and PARAM tags */
- if (node->tag == null || node->tag == tag_param)
- {
- ReportWarning(lexer, noframes, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- continue;
- }
-
- /*
- Treat LI and DD etc. in same way as in BODY
- */
- lexer->excludeBlocks = no;
-
- if (!(node->tag->model & CM_BLOCK) &&
- !(node->tag->model & CM_INLINE))
- {
- ReportWarning(lexer, noframes, node, TAG_NOT_ALLOWED_IN);
-
- if (node->tag->model & CM_HTML)
- {
- FreeNode(node);
- continue;
- }
-
- if (node->tag->model & CM_HEAD)
- {
- MoveToHead(lexer, noframes, node);
- continue;
- }
-
- UngetToken(lexer);
-
- if (node->tag->model & CM_LIST)
- {
- node = InferredTag(lexer, "ul");
- lexer->excludeBlocks = yes;
- }
- else if (node->tag->model & CM_DEFLIST)
- {
- node = InferredTag(lexer, "dl");
- lexer->excludeBlocks = yes;
- }
- else if (node->tag->model & CM_TABLE)
- {
- node = InferredTag(lexer, "table");
- lexer->excludeBlocks = yes;
- }
- else
- return;
- }
-
- if (node->type == StartTag || node->type == StartEndTag)
- {
- if (node->tag->model & CM_INLINE)
- {
- if (checkstack && !node->implicit)
- {
- checkstack = no;
-
- if (InlineDup(lexer, node) > 0)
- continue;
- }
-
- mode = MixedContent;
- }
- else
- {
- checkstack = yes;
- mode = IgnoreWhitespace;
- }
-
- if (node->implicit)
- ReportWarning(lexer, noframes, node, INSERTING_TAG);
-
- InsertNode(noframes, node);
- ParseTag(lexer, node, IgnoreWhitespace /*MixedContent*/);
- continue;
- }
- else if (node->type == EndTag)
- PopInline(lexer, node); /* if inline end tag */
- #endif
- /* discard unexpected end tags */
- ReportWarning(lexer, noframes, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- }
-
- ReportWarning(lexer, noframes, node, MISSING_ENDTAG_FOR);
- }
-
- void ParseFrameSet(Lexer *lexer, Node *frameset, uint mode)
- {
- Node *node;
-
- lexer->badAccess |= USING_FRAMES;
-
- while ((node = GetToken(lexer, IgnoreWhitespace)) != null)
- {
- if (node->tag == frameset->tag && node->type == EndTag)
- {
- FreeNode(node);
- TrimSpace(lexer, frameset->last);
- return;
- }
-
- if (node->type == CommentTag ||
- node->type == ProcInsTag ||
- node->type == AspTag)
- {
- InsertNode(frameset, node);
- continue;
- }
-
- if (node->tag == null)
- {
- ReportWarning(lexer, frameset, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- continue;
- }
-
- if (node->tag == tag_body)
- {
- UngetToken(lexer);
- node = InferredTag(lexer, "noframes");
- ReportWarning(lexer, frameset, node, INSERTING_TAG);
- }
-
- if (node->type == StartTag && node->tag->model & CM_FRAMES)
- {
- InsertNode(frameset, node);
- lexer->excludeBlocks = no;
- ParseTag(lexer, node, MixedContent);
- continue;
- }
-
-
- /* discard unexpected tags */
- ReportWarning(lexer, frameset, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- }
-
- ReportWarning(lexer, frameset, node, MISSING_ENDTAG_FOR);
- }
-
- void ParseHTML(Lexer *lexer, Node *html, uint mode)
- {
- Node *node, *head;
- Node *frameset = null;
- Node *noframes = null;
-
- XmlTags = no;
- SeenBodyEndTag = 0;
-
- for (;;)
- {
- node = GetToken(lexer, IgnoreWhitespace);
-
- if (node == null)
- {
- node = InferredTag(lexer, "head");
- break;
- }
-
- if (node->tag == tag_head)
- break;
-
- if (node->tag == html->tag && node->type == EndTag)
- {
- ReportWarning(lexer, html, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- continue;
- }
-
- if (node->type == CommentTag ||
- node->type == ProcInsTag ||
- node->type == AspTag)
- {
- InsertNode(html, node);
- continue;
- }
-
- UngetToken(lexer);
- node = InferredTag(lexer, "head");
- break;
- }
-
- head = node;
- InsertNode(html, head);
- ParseHead(lexer, head, mode);
-
- for (;;)
- {
- node = GetToken(lexer, IgnoreWhitespace);
-
- if (node == null)
- {
- if (frameset == null) /* create an empty body */
- node = InferredTag(lexer, "body");
-
- return;
- }
-
- /* robustly handle html tags */
- if (node->tag == html->tag)
- {
- if (node->type != StartTag && frameset == null)
- ReportWarning(lexer, html, node, DISCARDING_UNEXPECTED);
-
- FreeNode(node);
- continue;
- }
-
- if (node->type == CommentTag ||
- node->type == ProcInsTag ||
- node->type == AspTag)
- {
- InsertNode(html, node);
- continue;
- }
-
- /* if frameset document coerce <body> to <noframes> */
- if (node->tag == tag_body)
- {
- if (node->type != StartTag)
- {
- ReportWarning(lexer, html, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- continue;
- }
-
- if (frameset != null)
- {
- UngetToken(lexer);
-
- if (noframes == null)
- {
- noframes = InferredTag(lexer, "noframes");
- InsertNode(frameset, noframes);
- ReportWarning(lexer, html, noframes, INSERTING_TAG);
- }
-
- ParseTag(lexer, noframes, mode);
- continue;
- }
-
- break; /* to parse body */
- }
-
- /* flag an error if we see more than one frameset */
- if (node->tag == tag_frameset)
- {
- if (node->type != StartTag)
- {
- ReportWarning(lexer, html, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- continue;
- }
-
- if (frameset != null)
- ReportError(lexer, html, node, DUPLICATE_FRAMESET);
- else
- frameset = node;
-
- InsertNode(html, node);
- ParseTag(lexer, node, mode);
-
- /*
- see if it includes a noframes element so
- that we can merge subsequent noframes elements
- */
-
- for (node = frameset->content; node; node = node->next)
- {
- if (node->tag == tag_noframes)
- noframes = node;
- }
- continue;
- }
-
- /* if not a frameset document coerce <noframes> to <body> */
- if (node->tag == tag_noframes)
- {
- if (node->type != StartTag)
- {
- ReportWarning(lexer, html, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- continue;
- }
-
- if (frameset == null)
- {
- ReportWarning(lexer, html, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- node = InferredTag(lexer, "body");
- break;
- }
-
- if (noframes == null)
- {
- noframes = node;
- InsertNode(frameset, noframes);
- }
- else
- FreeNode(node);
-
- ParseTag(lexer, noframes, mode);
- continue;
- }
-
- UngetToken(lexer);
-
- /* insert other content into noframes element */
-
- if (frameset)
- {
- if (noframes == null)
- {
- noframes = InferredTag(lexer, "noframes");
- InsertNode(frameset, noframes);
- }
- else
- ReportWarning(lexer, html, node, NOFRAMES_CONTENT);
-
- ParseTag(lexer, noframes, mode);
- continue;
- }
-
- node = InferredTag(lexer, "body");
- break;
- }
-
- /* node must be body */
-
- InsertNode(html, node);
- ParseTag(lexer, node, mode);
- }
-
- /*
- HTML is the top level element
- */
- Node *ParseDocument(Lexer *lexer)
- {
- Node *node, *document, *html;
-
- document = NewNode();
- document->type = RootNode;
-
- while ((node = GetToken(lexer, IgnoreWhitespace)) != null)
- {
- if (node->type == CommentTag ||
- node->type == ProcInsTag ||
- node->type == AspTag ||
- node->type == DocTypeTag)
- {
- InsertNode(document, node);
- continue;
- }
-
- if (node->type == EndTag)
- {
- ReportWarning(lexer, RootNode, node, DISCARDING_UNEXPECTED);
- FreeNode(node);
- continue;
- }
-
- if (node->type != StartTag || node->tag != tag_html)
- {
- UngetToken(lexer);
- html = InferredTag(lexer, "html");
- }
- else
- html = node;
-
- InsertNode(document, html);
- ParseHTML(lexer, html, no);
- break;
- }
-
- return document;
- }
-
- Bool XMLPreserveWhiteSpace(Node *element)
- {
- AttVal *attribute;
-
- /* search attributes for xml:space */
- for (attribute = element->attributes; attribute; attribute = attribute->next)
- {
- if (wstrcmp(attribute->attribute, "xml:space") == 0)
- {
- if (wstrcmp(attribute->value, "preserve") == 0)
- return yes;
-
- return no;
- }
- }
-
- /* kludge for html docs without explicit xml:space attribute */
- if (wstrcasecmp(element->element, "pre") == 0
- || wstrcasecmp(element->element, "script") == 0
- || wstrcasecmp(element->element, "style") == 0)
- return yes;
-
- /* kludge for XSL docs */
- if (wstrcasecmp(element->element, "xsl:text") == 0)
- return yes;
-
- return no;
- }
-
- /*
- XML documents
- */
- void ParseXMLElement(Lexer *lexer, Node *element, uint mode)
- {
- Node *node;
-
- /* Jeff Young's kludge for XSL docs */
-
- if (wstrcasecmp(element->element, "xsl:text") == 0)
- return;
-
- /* if node is pre or has xml:space="preserve" then do so */
-
- if (XMLPreserveWhiteSpace(element))
- mode = Preformatted;
-
- while ((node = GetToken(lexer, mode)) != null)
- {
- if (node->type == EndTag && wstrcmp(node->element, element->element) == 0)
- {
- FreeNode(node);
- break;
- }
-
- /* discard unexpected end tags */
- if (node->type == EndTag)
- {
- ReportWarning(lexer, element, node, UNEXPECTED_ENDTAG);
- FreeNode(node);
- continue;
- }
-
- /* parse content on seeing start tag */
- if (node->type == StartTag)
- ParseXMLElement(lexer, node, mode);
-
- InsertNode(element, node);
- }
-
- /*
- if first child is text then trim initial space and
- delete text node if it is empty.
- */
-
- node = element->content;
-
- if (node && node->type == TextNode && mode != Preformatted)
- {
- if (lexer->lexbuf[node->start] == ' ')
- {
- node->start++;
-
- if (node->start >= node->end)
- DiscardElement(lexer, node);
- }
- }
-
- /*
- if last child is text then trim final space and
- delete the text node if it is empty
- */
-
- node = element->last;
-
- if (node && node->type == TextNode && mode != Preformatted)
- {
- if (lexer->lexbuf[node->end - 1] == ' ')
- {
- node->end--;
-
- if (node->start >= node->end)
- DiscardElement(lexer, node);
- }
- }
- }
-
- Node *ParseXMLDocument(Lexer *lexer)
- {
- Node *node, *document, *last;
-
- document = NewNode();
- document->type = RootNode;
- last = null;
- XmlTags = yes;
-
- while ((node = GetToken(lexer, IgnoreWhitespace)) != null)
- {
- /* discard unexpected end tags */
- if (node->type == EndTag)
- {
- ReportWarning(lexer, null, node, UNEXPECTED_ENDTAG);
- FreeNode(node);
- continue;
- }
-
- /* if start tag then parse element's content */
- if (node->type == StartTag)
- ParseXMLElement(lexer, node, IgnoreWhitespace);
-
- if (last != null)
- last->next = node;
- else
- document->content = node;
-
- last = node;
- }
-
- if (doctype_mode == doctype_omit)
- DiscardDocType(document);
-
- /* ensure presence of initial <?XML version="1.0"?> */
- if (XmlPi)
- FixXMLPI(lexer, document);
-
- return document;
- }
-
-