home *** CD-ROM | disk | FTP | other *** search
- // MDF - PORT FROM NCSA VERSION 2.1
-
- /****************************************************************************
- * NCSA Mosaic for the X Window System *
- * Software Development Group *
- * National Center for Supercomputing Applications *
- * University of Illinois at Urbana-Champaign *
- * 605 E. Springfield, Champaign IL 61820 *
- * mosaic@ncsa.uiuc.edu *
- * *
- * Copyright (C) 1993, Board of Trustees of the University of Illinois *
- * *
- * NCSA Mosaic software, both binary and source (hereafter, Software) is *
- * copyrighted by The Board of Trustees of the University of Illinois *
- * (UI), and ownership remains with the UI. *
- * *
- * The UI grants you (hereafter, Licensee) a license to use the Software *
- * for academic, research and internal business purposes only, without a *
- * fee. Licensee may distribute the binary and source code (if released) *
- * to third parties provided that the copyright notice and this statement *
- * appears on all copies and that no charge is associated with such *
- * copies. *
- * *
- * Licensee may make derivative works. However, if Licensee distributes *
- * any derivative work based on or derived from the Software, then *
- * Licensee will (1) notify NCSA regarding its distribution of the *
- * derivative work, and (2) clearly notify users that such derivative *
- * work is a modified version and not the original NCSA Mosaic *
- * distributed by the UI. *
- * *
- * Any Licensee wishing to make commercial use of the Software should *
- * contact the UI, c/o NCSA, to negotiate an appropriate license for such *
- * commercial use. Commercial use includes (1) integration of all or *
- * part of the source code into a product for sale or license by or on *
- * behalf of Licensee to third parties, or (2) distribution of the binary *
- * code or source code to third parties that need it to utilize a *
- * commercial product sold or licensed by or on behalf of Licensee. *
- * *
- * UI MAKES NO REPRESENTATIONS ABOUT THE SUITABILITY OF THIS SOFTWARE FOR *
- * ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED *
- * WARRANTY. THE UI SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY THE *
- * USERS OF THIS SOFTWARE. *
- * *
- * By using or copying this Software, Licensee agrees to abide by the *
- * copyright law and all other applicable laws of the U.S. including, but *
- * not limited to, export control laws, and the terms of this license. *
- * UI shall have the right to terminate this license immediately by *
- * written notice upon Licensee's breach of, or non-compliance with, any *
- * of its terms. Licensee may be held legally responsible for any *
- * copyright infringement that is caused or encouraged by Licensee's *
- * failure to abide by the terms of this license. *
- * *
- * Comments and questions are welcome and can be sent to *
- * mosaic-x@ncsa.uiuc.edu. *
- ****************************************************************************/
-
- #ifdef TIMING
- #include <sys/time.h>
- struct timeval Tv;
- struct timezone Tz;
- #endif
-
- #include <stdio.h>
- #include <ctype.h>
- #ifndef sun
- /* To get atoi. */
- #include <stdlib.h>
- #endif
- #include "HTML.h"
- #include "HTMLamp.h"
-
- #ifdef _AMIGA
- #include <string.h>
- #endif
-
- extern void FreeObjList(struct mark_up *);
- extern struct mark_up *AddObj(struct mark_up **, struct mark_up *,
- struct mark_up *, int);
-
-
- #ifdef NOT_ASCII
- #define TOLOWER(x) (tolower(x))
- #else
-
- /*
- * A hack to speed up caseless_equal. Thanks to Quincey Koziol for
- * developing it for me
- */
- unsigned char map_table[256]={
- 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,
- 24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,
- 45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,97,98,
- 99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,
- 116,117,118,119,120,121,122,91,92,93,94,95,96,97,98,99,100,101,102,
- 103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,
- 120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,
- 137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,
- 154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,
- 171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,
- 188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,
- 205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,
- 222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
- 239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255};
-
- #define TOLOWER(x) (map_table[x])
- #endif /* NOT_ASCII */
-
- /* ANSI declarations added by MDF */
-
- int ParseMarkType(char *);
- // FreeObjList
- // AddObj
-
-
- /*
- * Check if two strings are equal, ignoring case.
- * The strings must be of the same length to be equal.
- * return 1 if equal, 0 otherwise.
- */
- int
- caseless_equal(char *str1, char *str2)
- {
- if ((str1 == NULL)||(str2 == NULL))
- {
- return(0);
- }
-
- while ((*str1 != '\0')&&(*str2 != '\0'))
- {
- if (TOLOWER(*str1) != TOLOWER(*str2))
- {
- return(0);
- }
- str1++;
- str2++;
- }
-
- if ((*str1 == '\0')&&(*str2 == '\0'))
- {
- return(1);
- }
- else
- {
- return(0);
- }
- }
-
-
- /*
- * Check if two strings are equal in the first count characters, ignoring case.
- * The strings must both be at least of length count to be equal.
- * return 1 if equal, 0 otherwise.
- */
- int
- caseless_equal_prefix(char *str1, char *str2, int cnt)
- {
- int i;
-
- if ((str1 == NULL)||(str2 == NULL))
- {
- return(0);
- }
-
- if (cnt < 1)
- {
- return(1);
- }
-
- for (i=0; i < cnt; i++)
- {
- if (TOLOWER(*str1) != TOLOWER(*str2))
- {
- return(0);
- }
- str1++;
- str2++;
- }
-
- return(1);
- }
-
-
- /*
- * Clean up the white space in a string.
- * Remove all leading and trailing whitespace, and turn all
- * internal whitespace into single spaces separating words.
- * The cleaning is done by rearranging the chars in the passed
- * txt buffer. The resultant string will probably be shorter,
- * it can never get longer.
- */
- void
- clean_white_space(char *txt)
- {
- char *ptr;
- char *start;
-
- start = txt;
- ptr = txt;
-
- /*
- * Remove leading white space
- */
- while (isspace((int)*ptr))
- {
- ptr++;
- }
-
- /*
- * find a word, copying if we removed some space already
- */
- if (start == ptr)
- {
- while ((!isspace((int)*ptr))&&(*ptr != '\0'))
- {
- ptr++;
- }
- start = ptr;
- }
- else
- {
- while ((!isspace((int)*ptr))&&(*ptr != '\0'))
- {
- *start++ = *ptr++;
- }
- }
-
- while (*ptr != '\0')
- {
- /*
- * Remove trailing whitespace.
- */
- while (isspace((int)*ptr))
- {
- ptr++;
- }
- if (*ptr == '\0')
- {
- break;
- }
-
- /*
- * If there are more words, insert a space and if space was
- * removed move up remaining text.
- */
- *start++ = ' ';
- if (start == ptr)
- {
- while ((!isspace((int)*ptr))&&(*ptr != '\0'))
- {
- ptr++;
- }
- start = ptr;
- }
- else
- {
- while ((!isspace((int)*ptr))&&(*ptr != '\0'))
- {
- *start++ = *ptr++;
- }
- }
- }
-
- *start = '\0';
- }
-
-
- /*
- * parse an amperstand escape, and return the appropriate character, or
- * '\0' on error.
- * we should really only use caseless_equal_prefix for unterminated, and use
- * caseless_equal otherwise, but since there are so many escapes, and I
- * don't want to type everything twice, I always use caseless_equal_prefix
- * Turns out the escapes are case sensitive, use strncmp.
- */
- char
- ExpandEscapes(char *esc, char **endp, int unterminated)
- {
- int cnt;
- char val;
-
- esc++;
- if (*esc == '#')
- {
- if (unterminated)
- {
- char *tptr;
- char tchar;
-
- tptr = (char *)(esc + 1);
- while (isdigit((int)*tptr))
- {
- tptr++;
- }
- tchar = *tptr;
- *tptr = '\0';
- val = (char)atoi((esc + 1));
- *tptr = tchar;
- *endp = tptr;
- }
- else
- {
- val = (char)atoi((esc + 1));
- *endp = (char *)(esc + strlen(esc));
- }
- }
- else
- {
- cnt = 0;
- while (AmpEscapes[cnt].tag != NULL)
- {
- if (strncmp(esc, AmpEscapes[cnt].tag,
- strlen(AmpEscapes[cnt].tag)) == 0)
- {
- val = AmpEscapes[cnt].value;
- *endp = (char *)(esc +
- strlen(AmpEscapes[cnt].tag));
- break;
- }
- cnt++;
- }
- if (AmpEscapes[cnt].tag == NULL)
- {
- #ifdef VERBOSE
- fprintf(stderr, "Error bad & string\n");
- #endif
- val = '\0';
- *endp = (char *)NULL;
- }
- }
-
- return(val);
- }
-
-
- /*
- * Clean the special HTML character escapes out of the text and replace
- * them with the appropriate characters "<" = "<", ">" = ">",
- * "&" = "&"
- * GAG: apperantly < etc. can be left unterminated, what a nightmare.
- * the '&' character must be immediately followed by a letter to be
- * a valid escape sequence. Other &'s are left alone.
- * The cleaning is done by rearranging chars in the passed txt buffer.
- * if any escapes are replaced, the string becomes shorter.
- */
- void
- clean_text(char *txt)
- {
- int unterminated;
- char *ptr;
- char *ptr2;
- // char *ptr3;
- char *start;
- char *text;
- char *tend;
- char tchar;
- char val;
-
- if (txt == NULL)
- {
- return;
- }
-
- /*
- * Quick scan to find escape sequences.
- * Escape is '&' followed by a letter (or a hash mark).
- * return if there are none.
- */
- ptr = txt;
- while (*ptr != '\0')
- {
- if ((*ptr == '&')&&
- ((isalpha((int)*(ptr + 1)))||(*(ptr + 1) == '#')))
- {
- break;
- }
- ptr++;
- }
- if (*ptr == '\0')
- {
- return;
- }
-
- /*
- * Loop, replaceing escape sequences, and moving up remaining
- * text.
- */
- ptr2 = ptr;
- while (*ptr != '\0')
- {
-
- unterminated = 0;
- /*
- * Extract the escape sequence from start to ptr
- */
- start = ptr;
- while ((*ptr != ';')&&(*ptr != '\0'))
- {
- ptr++;
- }
- if (*ptr == '\0')
- {
- #ifdef VERBOSE
- fprintf(stderr, "warning: unterminated & (%s)\n",
- start);
- #endif
- unterminated = 1;
- }
-
- /*
- * Copy the escape sequence into a separate buffer.
- * Then clean spaces so the "& lt ;" = "<" etc.
- * The cleaning should be unnecessary.
- */
- tchar = *ptr;
- *ptr = '\0';
- text = (char *)malloc(strlen(start) + 1);
- if (text == NULL)
- {
- fprintf(stderr, "Cannot malloc space for & text\n");
- *ptr = tchar;
- return;
- }
- strcpy(text, start);
- *ptr = tchar;
- clean_white_space(text);
-
- /*
- * Replace escape sequence with appropriate character
- */
- val = ExpandEscapes(text, &tend, unterminated);
- if (val != '\0')
- {
- if (unterminated)
- {
- tchar = *tend;
- *tend = '\0';
- ptr = (char *)(start + strlen(text) - 1);
- *tend = tchar;
- }
- *ptr2 = val;
- //mjw unterminated = 0;
- }
- /*
- * invalid escape sequence. skip it.
- */
- else
- {
- #ifdef VERBOSE
- fprintf(stderr, "Error bad & string\n");
- #endif
- ptr = start;
- *ptr2 = *ptr;
- }
- free(text);
-
- /*
- * Copy forward remaining text until you find the next
- * escape sequence
- */
- ptr2++;
- ptr++;
- while (*ptr != '\0')
- {
- if ((*ptr == '&')&&
- ((isalpha((int)*(ptr + 1)))||(*(ptr + 1) == '#')))
- {
- break;
- }
- *ptr2++ = *ptr++;
- }
- }
- *ptr2 = '\0';
- }
-
-
- /*
- * Get a block of text from a HTML document.
- * All text from start to the end, or the first mark
- * (a mark is '<' or '</' followed by any letter or a '!')
- * is returned in a malloced buffer. Also, endp returns
- * a pointer to the next '<' or '\0'
- * The returned text has already expanded '&' escapes.
- */
- char *
- get_text(char *start, char **endp)
- {
- char *ptr;
- char *text;
- char tchar;
-
- if (start == NULL)
- {
- return(NULL);
- }
-
- /*
- * Copy text up to beginning of a mark, or the end
- */
- ptr = start;
- while (*ptr != '\0')
- {
- if (*ptr == '<')
- {
- if (isalpha((int)(*(ptr + 1))))
- {
- break;
- }
- else if (*(ptr + 1) == '/')
- {
- if (isalpha((int)(*(ptr + 2))))
- {
- break;
- }
- }
- else if (*(ptr + 1) == '!') /* a comment */
- {
- break;
- }
- }
- ptr++;
- }
- *endp = ptr;
-
- if (ptr == start)
- {
- return(NULL);
- }
-
- /*
- * Copy the text into its own buffer, and clean it
- * of escape sequences.
- */
- tchar = *ptr;
- *ptr = '\0';
- text = (char *)malloc(strlen(start) + 1);
- if (text == NULL)
- {
- fprintf(stderr, "Cannot malloc space for text\n");
- *ptr = tchar;
- return(NULL);
- }
- strcpy(text, start);
- *ptr = tchar;
- clean_text(text);
-
- return(text);
- }
-
-
- /*
- * Get the mark text between '<' and '>'. From the text, determine
- * its type, and fill in a mark_up structure to return. Also returns
- * endp pointing to the ttrailing '>' in the original string.
- */
- struct mark_up *
- get_mark(char *start, char **endp)
- {
- char *ptr;
- char *text;
- char tchar;
- // int type;
- struct mark_up *mark;
-
- if (start == NULL)
- {
- return(NULL);
- }
-
- if (*start != '<')
- {
- return(NULL);
- }
-
- start++;
-
- mark = (struct mark_up *)malloc(sizeof(struct mark_up));
- if (mark == NULL)
- {
- fprintf(stderr, "Cannot malloc space for mark_up struct\n");
- return(NULL);
- }
-
- /*
- * Grab the mark text
- */
- ptr = start;
- while ((*ptr != '>')&&(*ptr != '\0'))
- {
- #if 0
- if (*ptr == '"') {
- ptr += 1 ;
- while (*ptr && *ptr != '"') {
- if (*ptr == '\n' || *ptr == '\r') *ptr = ' ' ;
- ptr += 1 ;
- }
- }
- if (*ptr == '\'') {
- ptr += 1 ;
- while (*ptr && *ptr != '\'') {
- if (*ptr == '\n' || *ptr == '\r') *ptr = ' ' ;
- ptr += 1 ;
- }
- }
- if (*ptr == '-' && ptr[1] == '-') { /* Comment */
- ptr += 2 ;
- for (;;) {
- while (*ptr && *ptr != '-')
- ptr += 1 ;
- if (!*ptr || ptr[1] == '-') break ;
- }
- }
- #endif
- ptr++;
- }
- *endp = ptr;
-
- if (*ptr != '>')
- {
- #ifdef VERBOSE
- fprintf(stderr, "error: bad mark format\n");
- #endif
- return(NULL);
- }
-
- /*
- * Copy the mark text to its own buffer, and
- * clean it of escapes, and odd white space.
- */
- tchar = *ptr;
- *ptr = '\0';
- text = (char *)malloc(strlen(start) + 1);
- if (text == NULL)
- {
- fprintf(stderr, "Cannot malloc space for mark\n");
- *ptr = tchar;
- return(NULL);
- }
- strcpy(text, start);
- *ptr = tchar;
- clean_text(text);
- /*
- * No longer needed because the parsing code is now smarter
- *
- clean_white_space(text);
- *
- */
-
- /*
- * Set whether this is the start or end of a mark
- * block, as well as determining its type.
- */
- if (*text == '/')
- {
- mark->is_end = 1;
- mark->type = ParseMarkType((char *)(text + 1));
- mark->start = NULL;
- mark->text = NULL;
- mark->end = text;
- }
- else
- {
- mark->is_end = 0;
- mark->type = ParseMarkType(text);
- mark->start = text;
- mark->text = NULL;
- mark->end = NULL;
- }
- mark->text = NULL;
- mark->next = NULL;
-
- return(mark);
- }
-
-
- /*
- * Special version of get_text. It reads all text up to the
- * end of the plain text mark, or the end of the file.
- */
- char *
- get_plain_text(char *start, char **endp)
- {
- char *ptr;
- char *text;
- char tchar;
-
- if (start == NULL)
- {
- return(NULL);
- }
-
- /*
- * Read until stopped by end plain text mark.
- */
- ptr = start;
- while (*ptr != '\0')
- {
- /*
- * Beginning of a mark is '<' followed by any letter,
- * or followed by '!' for a comment,
- * or '</' followed by any letter.
- */
- if ((*ptr == '<')&&
- ((isalpha((int)(*(ptr + 1))))||
- (*(ptr + 1) == '!')||
- ((*(ptr + 1) == '/')&&(isalpha((int)(*(ptr + 2)))))))
- {
- struct mark_up *mp;
- char *ep;
-
- /*
- * We think we found a mark. If it is the
- * end of plain text, break out
- */
- mp = get_mark(ptr, &ep);
- if (mp != NULL)
- {
- if (((mp->type == M_PLAIN_TEXT)||
- (mp->type == M_LISTING_TEXT))&&(mp->is_end))
- {
- if (mp->end != NULL)
- {
- free((char *)mp->end);
- }
- free((char *)mp);
- break;
- }
- if (mp->start != NULL)
- {
- free((char *)mp->start);
- }
- if (mp->end != NULL)
- {
- free((char *)mp->end);
- }
- free((char *)mp);
- }
- }
- ptr++;
- }
- *endp = ptr;
-
- if (ptr == start)
- {
- return(NULL);
- }
-
- /*
- * Copy text to its own malloced buffer, and clean it of
- * HTML escapes.
- */
- tchar = *ptr;
- *ptr = '\0';
- text = (char *)malloc(strlen(start) + 1);
- if (text == NULL)
- {
- fprintf(stderr, "Cannot malloc space for text\n");
- *ptr = tchar;
- return(NULL);
- }
- strcpy(text, start);
- *ptr = tchar;
- clean_text(text);
-
- return(text);
- }
-
-
- /*
- * Main parser of HTML text. Takes raw text, and produces a linked
- * list of mark objects. Mark objects are either text strings, or
- * starting and ending mark delimiters.
- * The old list is passed in so it can be freed, and in the future we
- * may want to add code to append to the old list.
- */
- struct mark_up *
- HTMLParse(struct mark_up *old_list, char *str)
- {
- int preformat;
- char *start, *end;
- char *text, *tptr;
- struct mark_up *mark;
- struct mark_up *list;
- struct mark_up *current;
- #ifdef TIMING
- gettimeofday(&Tv, &Tz);
- fprintf(stderr, "HTMLParse enter (%d.%d)\n", Tv.tv_sec, Tv.tv_usec);
- #endif
-
- preformat = 0;
-
- /*
- * Free up the previous Object List if one exists
- */
- FreeObjList(old_list);
-
- if (str == NULL)
- {
- return(NULL);
- }
-
- list = NULL;
- current = NULL;
-
- start = str;
- end = str;
-
- mark = NULL;
- while (*start != '\0')
- {
- /*
- * Get some text (if any). If our last mark was
- * a begin plain text we call different function
- * If last mark was <PLAINTEXT> we lump all the rest of
- * the text in.
- */
- if ((mark != NULL)&&(mark->type == M_PLAIN_FILE)&&
- (!mark->is_end))
- {
- text = start;
- end = text;
- while (*end != '\0')
- {
- end++;
- }
- /*
- * Copy text to its own malloced buffer, and clean it of
- * HTML escapes.
- */
- tptr = (char *)malloc(strlen(text) + 1);
- if (tptr == NULL)
- {
- fprintf(stderr,
- "Cannot malloc space for text\n");
- return(list);
- }
- strcpy(tptr, text);
- text = tptr;
- }
- else if ((mark != NULL)&&
- ((mark->type == M_PLAIN_TEXT)||
- (mark->type == M_LISTING_TEXT))&&
- (!mark->is_end))
- {
- text = get_plain_text(start, &end);
- }
- else
- {
- text = get_text(start, &end);
- }
-
- /*
- * If text is OK, put it into a mark structure, and add
- * it to the linked list.
- */
- if (text == NULL)
- {
- if (start != end)
- {
- fprintf(stderr, "error parsing text, bailing out\n");
- return(list);
- }
- }
- else
- {
- mark = (struct mark_up *)malloc(sizeof(struct mark_up));
- if (mark == NULL)
- {
- fprintf(stderr, "Cannot malloc for mark_up struct\n");
- return(list);
- }
- mark->type = M_NONE;
- mark->is_end = 0;
- mark->start = NULL;
- mark->text = text;
- mark->end = NULL;
- mark->next = NULL;
- current = AddObj(&list, current, mark, preformat);
- }
- start = end;
-
- if (*start == '\0')
- {
- break;
- }
-
- /*
- * Get the next mark if any, and if it is
- * valid, add it to the linked list.
- */
- mark = get_mark(start, &end);
- if (mark == NULL)
- {
- if (start != end)
- {
- fprintf(stderr, "error parsing mark, bailing out\n");
- return(list);
- }
- }
- else
- {
- mark->next = NULL;
- current = AddObj(&list, current, mark, preformat);
- }
-
- start = (char *)(end + 1);
-
- if ((mark != NULL)&&(mark->type == M_PLAIN_FILE)&&
- (!mark->is_end))
- {
- /*
- * A linefeed immediately after the <PLAINTEXT>
- * mark is to be ignored.
- */
- if (*start == '\n')
- {
- start++;
- }
- }
- else if ((mark != NULL)&&((mark->type == M_PLAIN_TEXT)||
- (mark->type == M_LISTING_TEXT))&&
- (!mark->is_end))
- {
- /*
- * A linefeed immediately after the <XMP>
- * or <LISTING> mark is to be ignored.
- */
- if (*start == '\n')
- {
- start++;
- }
- }
- /*
- * If we are parsing pre-formatted text we need to set a
- * flag so we don't throw out needed linefeeds.
- */
- else if ((mark != NULL)&&(mark->type == M_PREFORMAT))
- {
- if (mark->is_end)
- {
- preformat = 0;
- }
- else
- {
- preformat = 1;
- /*
- * A linefeed immediately after the <PRE>
- * mark is to be ignored.
- */
- if (*start == '\n')
- {
- start++;
- }
- }
- }
- }
- #ifdef TIMING
- gettimeofday(&Tv, &Tz);
- fprintf(stderr, "HTMLParse exit (%d.%d)\n", Tv.tv_sec, Tv.tv_usec);
- #endif
- return(list);
- }
-
-
- /*
- * Determine mark type from the identifying string passed
- */
- int
- ParseMarkType(char *str)
- {
- int type;
- char *tptr;
- char tchar;
-
- if (str == NULL)
- {
- return(M_NONE);
- }
-
- //mjw type = M_UNKNOWN;
- tptr = str;
- while (*tptr != '\0')
- {
- if (isspace((int)*tptr))
- {
- break;
- }
- tptr++;
- }
- tchar = *tptr;
- *tptr = '\0';
-
- if (caseless_equal(str, MT_ANCHOR))
- {
- type = M_ANCHOR;
- }
- else if (caseless_equal(str, MT_TITLE))
- {
- type = M_TITLE;
- }
- else if (caseless_equal(str, MT_FIXED))
- {
- type = M_FIXED;
- }
- else if (caseless_equal(str, MT_BOLD))
- {
- type = M_BOLD;
- }
- else if (caseless_equal(str, MT_ITALIC))
- {
- type = M_ITALIC;
- }
- else if (caseless_equal(str, MT_EMPHASIZED))
- {
- type = M_EMPHASIZED;
- }
- else if (caseless_equal(str, MT_STRONG))
- {
- type = M_STRONG;
- }
- else if (caseless_equal(str, MT_CODE))
- {
- type = M_CODE;
- }
- else if (caseless_equal(str, MT_SAMPLE))
- {
- type = M_SAMPLE;
- }
- else if (caseless_equal(str, MT_KEYBOARD))
- {
- type = M_KEYBOARD;
- }
- else if (caseless_equal(str, MT_VARIABLE))
- {
- type = M_VARIABLE;
- }
- else if (caseless_equal(str, MT_CITATION))
- {
- type = M_CITATION;
- }
- else if (caseless_equal(str, MT_STRIKEOUT))
- {
- type = M_STRIKEOUT;
- }
- else if (caseless_equal(str, MT_HEADER_1))
- {
- type = M_HEADER_1;
- }
- else if (caseless_equal(str, MT_HEADER_2))
- {
- type = M_HEADER_2;
- }
- else if (caseless_equal(str, MT_HEADER_3))
- {
- type = M_HEADER_3;
- }
- else if (caseless_equal(str, MT_HEADER_4))
- {
- type = M_HEADER_4;
- }
- else if (caseless_equal(str, MT_HEADER_5))
- {
- type = M_HEADER_5;
- }
- else if (caseless_equal(str, MT_HEADER_6))
- {
- type = M_HEADER_6;
- }
- else if (caseless_equal(str, MT_ADDRESS))
- {
- type = M_ADDRESS;
- }
- else if (caseless_equal(str, MT_PLAIN_TEXT))
- {
- type = M_PLAIN_TEXT;
- }
- else if (caseless_equal(str, MT_LISTING_TEXT))
- {
- type = M_LISTING_TEXT;
- }
- else if (caseless_equal(str, MT_PLAIN_FILE))
- {
- type = M_PLAIN_FILE;
- }
- else if (caseless_equal(str, MT_PARAGRAPH))
- {
- type = M_PARAGRAPH;
- }
- else if (caseless_equal(str, MT_UNUM_LIST))
- {
- type = M_UNUM_LIST;
- }
- else if (caseless_equal(str, MT_NUM_LIST))
- {
- type = M_NUM_LIST;
- }
- else if (caseless_equal(str, MT_MENU))
- {
- type = M_MENU;
- }
- else if (caseless_equal(str, MT_DIRECTORY))
- {
- type = M_DIRECTORY;
- }
- else if (caseless_equal(str, MT_LIST_ITEM))
- {
- type = M_LIST_ITEM;
- }
- else if (caseless_equal(str, MT_DESC_LIST))
- {
- type = M_DESC_LIST;
- }
- else if (caseless_equal(str, MT_DESC_TITLE))
- {
- type = M_DESC_TITLE;
- }
- else if (caseless_equal(str, MT_DESC_TEXT))
- {
- type = M_DESC_TEXT;
- }
- else if (caseless_equal(str, MT_PREFORMAT))
- {
- type = M_PREFORMAT;
- }
- else if (caseless_equal(str, MT_BLOCKQUOTE))
- {
- type = M_BLOCKQUOTE;
- }
- else if (caseless_equal(str, MT_INDEX))
- {
- type = M_INDEX;
- }
- else if (caseless_equal(str, MT_HRULE))
- {
- type = M_HRULE;
- }
- else if (caseless_equal(str, MT_BASE))
- {
- type = M_BASE;
- }
- else if (caseless_equal(str, MT_LINEBREAK))
- {
- type = M_LINEBREAK;
- }
- else if (caseless_equal(str, MT_IMAGE))
- {
- type = M_IMAGE;
- }
- else if (caseless_equal(str, MT_SELECT))
- {
- type = M_SELECT;
- }
- else if (caseless_equal(str, MT_OPTION))
- {
- type = M_OPTION;
- }
- else if (caseless_equal(str, MT_INPUT))
- {
- type = M_INPUT;
- }
- else if (caseless_equal(str, MT_TEXTAREA))
- {
- type = M_TEXTAREA;
- }
- else if (caseless_equal(str, MT_FORM))
- {
- type = M_FORM;
- }
- else
- {
- #ifdef VERBOSE
- fprintf(stderr, "warning: unknown mark (%s)\n", str);
- #endif
- type = M_UNKNOWN;
- }
-
- *tptr = tchar;
- return(type);
- }
-
-
- /*
- * Parse a single anchor tag. ptrp is a pointer to a pointer to the
- * string to be parsed. On return, the ptr should be changed to
- * point to after the text we have parsed.
- * On return start and end should point to the beginning, and just
- * after the end of the tag's name in the original anchor string.
- * Finally the function returns the tag value in a malloced buffer.
- */
- char *
- AnchorTag(char **ptrp, char **startp, char **endp)
- {
- char *tag_val;
- char *ptr;
- char *start;
- char tchar;
- int quoted;
- int has_value;
-
- quoted = 0;
-
- /*
- * remove leading spaces, and set start
- */
- ptr = *ptrp;
- while (isspace((int)*ptr))
- {
- ptr++;
- }
- *startp = ptr;
-
- /*
- * Find and set the end of the tag
- */
- while ((!isspace((int)*ptr))&&(*ptr != '=')&&(*ptr != '\0'))
- {
- ptr++;
- }
- *endp = ptr;
-
- if (*ptr == '\0')
- {
- *ptrp = ptr;
- return(NULL);
- }
-
- /*
- * Move to the start of the tag value, if there is one.
- * set the has_value flag.
- */
- has_value = 0;
- while ((isspace((int)*ptr))||(*ptr == '='))
- {
- if (*ptr == '=')
- {
- has_value = 1;
- }
- ptr++;
- }
-
- /*
- * For a tag with no value, this is a boolean flag.
- * Return the string "1" so we know the tag is there.
- */
- if (!has_value)
- {
- *ptrp = *endp;
- /*
- * set a tag value of 1.
- */
- tag_val = (char *)malloc(strlen("1") + 1);
- if (tag_val == NULL)
- {
- fprintf(stderr, "can't malloc space for tag value\n");
- return(NULL);
- }
- strcpy(tag_val, "1");
-
- return(tag_val);
- }
-
- if (*ptr == '\"')
- {
- quoted = 1;
- ptr++;
- }
-
- start = ptr;
- /*
- * Get tag value. Either a quoted string or a single word
- */
- if (quoted)
- {
- while ((*ptr != '\"')&&(*ptr != '\0'))
- {
- ptr++;
- }
- }
- else
- {
- while ((!isspace((int)*ptr))&&(*ptr != '\0'))
- {
- ptr++;
- }
- }
- if ((quoted)&&(*ptr == '\0'))
- {
- *ptrp = ptr;
- return(NULL);
- }
-
- /*
- * Copy the tag value out into a malloced string
- */
- tchar = *ptr;
- *ptr = '\0';
- tag_val = (char *)malloc(strlen(start) + 1);
- if (tag_val == NULL)
- {
- fprintf(stderr, "can't malloc space for tag value\n");
- *ptr = tchar;
- *ptrp = ptr;
- return(NULL);
- }
- strcpy(tag_val, start);
- *ptr = tchar;
- if (quoted)
- {
- ptr++;
- }
- *ptrp = ptr;
-
- return(tag_val);
- }
-
-
- /*
- * Parse mark text for the value associated with the
- * passed mark tag.
- * If the passed tag is not found, return NULL.
- * If the passed tag is found but has no value, return "".
- */
- char *
- ParseMarkTag(char *text, char *mtext, char *mtag)
- {
- char *ptr;
- char *start;
- char *end;
- char *tag_val;
- char tchar;
-
- if ((text == NULL)||(mtext == NULL)||(mtag == NULL))
- {
- return(NULL);
- }
-
- ptr = (char *)(text + strlen(mtext));
-
- while (*ptr != '\0')
- {
- tag_val = AnchorTag(&ptr, &start, &end);
-
- tchar = *end;
- *end = '\0';
- if (caseless_equal(start, mtag))
- {
- *end = tchar;
- if (tag_val == NULL)
- {
- tag_val = (char *)malloc(1);
- *tag_val = '\0';
- return(tag_val);
- }
- else
- {
- return(tag_val);
- }
- }
- *end = tchar;
- if (tag_val != NULL)
- {
- free(tag_val);
- }
- }
- return(NULL);
- }
-
-