home *** CD-ROM | disk | FTP | other *** search
- /*
- config.c - read config file and manage config properties
-
- (c) 1998, 1999 (W3C) MIT, INRIA, Keio University
- See tidy.c for the copyright notice.
- */
-
- /*
- config files associate a property name with a value.
-
- // comments can start at the beginning of a line
- name: short values fit onto one line
- name: a really long value that
- continues on the next line
-
- property names are case insensitive and should be less than
- 60 characters in length and must start at the begining of
- the line, as whitespace at the start of a line signifies a
- line continuation.
- */
-
- #include "platform.h"
- #include "html.h"
-
- typedef union
- {
- int *number;
- Bool *logical;
- char **string;
- } Location;
-
- typedef void (ParseProperty)(Location location);
-
- ParseProperty ParseInt; /* parser for integer values */
- ParseProperty ParseBool; /* parser for 'true' or 'false' or 'yes' or 'no' */
- ParseProperty ParseInvBool; /* parser for 'true' or 'false' or 'yes' or 'no' */
- ParseProperty ParseName; /* a string excluding whitespace */
- ParseProperty ParseString; /* a string including whitespace */
- ParseProperty ParseTagNames; /* a space separated list of tag names */
- ParseProperty ParseCharEncoding; /* RAW, ASCII, LATIN1, UTF8 or ISO2022 */
- ParseProperty ParseIndent; /* specific to the indent option */
- ParseProperty ParseDocType; /* omit | auto | strict | loose | <fpi> */
-
- uint spaces = 2; /* default indentation */
- uint wraplen = 68; /* default wrap margin */
- int CharEncoding = ASCII;
- int tabsize = 4;
-
- DocTypeMode doctype_mode = doctype_auto; /* see doctype property */
- char *slide_style = null; /* style sheet for slides */
- char *doctype_str = null; /* user specified doctype */
- char *errfile = null; /* file name to write errors to */
- Bool writeback = no; /* if true then output tidied markup */
-
- Bool OnlyErrors = no; /* if true normal output is suppressed */
- Bool ShowWarnings = yes; /* however errors are always shown */
- Bool IndentContent = no; /* indent content of appropriate tags */
- Bool SmartIndent = no; /* does text/block level content effect indentation */
- Bool HideEndTags = no; /* suppress optional end tags */
- Bool XmlTags = no; /* treat input as XML */
- Bool XmlOut = no; /* create output as XML */
- Bool xHTML = no; /* output extensible HTML */
- Bool XmlPi = yes; /* add <?xml?> for XML docs */
- Bool RawOut = no; /* avoid mapping values > 127 to entities */
- Bool UpperCaseTags = no; /* output tags in upper not lower case */
- Bool UpperCaseAttrs = no; /* output attributes in upper not lower case */
- Bool MakeClean = no; /* replace presentational clutter by style rules */
- Bool LogicalEmphasis = no; /* replace i by em and b by strong */
- Bool DropFontTags = no; /* discard presentation tags */
- Bool BreakBeforeBR = no; /* o/p newline before <br> or not? */
- Bool BurstSlides = no; /* create slides on each h2 element */
- Bool NumEntities = no; /* use numeric entities */
- Bool QuoteMarks = no; /* output " marks as " */
- Bool QuoteNbsp = yes; /* output non-breaking space as entity */
- Bool QuoteAmpersand = yes; /* output naked ampersand as & */
- Bool WrapScriptlets = no; /* wrap within JavaScript string literals */
- Bool WrapAsp = yes; /* wrap within ASP pseudo elements */
- Bool FixBackslash = yes; /* fix URLs by replacing \ with / */
- Bool IndentAttributes = no; /* newline+indent before each attribute */
- Bool XmlPIs = no; /* if set to yes PIs must end with ?> */
-
- typedef struct _lex PLex;
-
- static uint c; /* current char in input stream */
- static FILE *fp; /* file pointer for input stream */
-
- /* not used to store anything */
- static char *inline_tags;
- static char *block_tags;
- static char *empty_tags;
-
-
- typedef struct _plist PList;
-
- struct _plist
- {
- char *name; /* property name */
- Location location; /* place to store value */
- ParseProperty *parser; /* parsing method */
- PList *next; /* linear hash chaining */
- };
-
- #define HASHSIZE 101
-
- static PList *hashtable[HASHSIZE]; /* private hash table */
- static Bool initialized = no;
-
- static struct Flag
- {
- char *name; /* property name */
- Location location; /* place to store value */
- ParseProperty *parser; /* parsing method */
- } flags[] =
- {
- "indent-spaces", (int *)&spaces, ParseInt,
- "wrap", (int *)&wraplen, ParseInt,
- "wrap-script-literals", (int *)&WrapScriptlets, ParseBool,
- "wrap-asp", (int *)&WrapAsp, ParseBool,
- "tab-size", (int *)&tabsize, ParseInt,
- "markup", (int *)&OnlyErrors, ParseInvBool,
- "indent", (int *)&IndentContent, ParseIndent,
- "indent-attributes", (int *)&IndentAttributes, ParseBool,
- "hide-endtags", (int *)&HideEndTags, ParseBool,
- "input-xml", (int *)&XmlTags, ParseBool,
- "output-xml", (int *)&XmlOut, ParseBool,
- "output-xhtml", (int *)&xHTML, ParseBool,
- "add-xml-pi", (int *)&XmlPi, ParseBool,
- "assume-xml-procins", (int *)&XmlPIs, ParseBool,
- "raw", (int *)&RawOut, ParseBool,
- "uppercase-tags", (int *)&UpperCaseTags, ParseBool,
- "uppercase-attributes", (int *)&UpperCaseAttrs, ParseBool,
- "clean", (int *)&MakeClean, ParseBool,
- "logical-emphasis", (int *)&LogicalEmphasis, ParseBool,
- "drop-font-tags", (int *)&DropFontTags, ParseBool,
- "split", (int *)&BurstSlides, ParseBool,
- "break-before-br", (int *)&BreakBeforeBR, ParseBool,
- "numeric-entities", (int *)&NumEntities, ParseBool,
- "quote-marks", (int *)&QuoteMarks, ParseBool,
- "quote-nbsp", (int *)&QuoteNbsp, ParseBool,
- "quote-ampersand", (int *)&QuoteAmpersand, ParseBool,
- "write-back", (int *)&writeback, ParseBool,
- "show-warnings", (int *)&ShowWarnings, ParseBool,
- "error-file", (int *)&errfile, ParseName,
- "slide-style", (int *)&slide_style, ParseName,
- "new-inline-tags", (int *)&inline_tags, ParseTagNames,
- "new-blocklevel-tags", (int *)&block_tags, ParseTagNames,
- "new-empty-tags", (int *)&empty_tags, ParseTagNames,
- "char-encoding", (int *)&CharEncoding, ParseCharEncoding,
- "doctype", (int *)&doctype_str, ParseDocType,
- "fix-backslash", (int *)&FixBackslash, ParseBool,
- /* this must be the final entry */
- 0, 0, 0
- };
-
- static unsigned hash(char *s)
- {
- unsigned hashval;
-
- for (hashval = 0; *s != '\0'; s++)
- hashval = toupper(*s) + 31*hashval;
-
- return hashval % HASHSIZE;
- }
-
- static PList *lookup(char *s)
- {
- PList *np;
-
- for (np = hashtable[hash(s)]; np != null; np = np->next)
- if (wstrcmp(s, np->name) == 0)
- return np;
- return null;
- }
-
- static PList *install(char *name, Location location, ParseProperty *parser)
- {
- PList *np;
- unsigned hashval;
-
- if ((np = lookup(name)) == null)
- {
- np = (PList *)MemAlloc(sizeof(*np));
-
- if (np == null || (np->name = wstrdup(name)) == null)
- return null;
-
- hashval = hash(name);
- np->next = hashtable[hashval];
- hashtable[hashval] = np;
- }
-
- np->location = location;
- np->parser = parser;
- return np;
- }
-
- void InitConfig(void)
- {
- struct Flag *p;
-
- if (!initialized)
- {
- initialized = yes;
-
- for(p = flags; p->name != null; ++p)
- install(p->name, p->location, p->parser);
- }
- }
-
- void FreeConfig(void)
- {
- PList *prev, *next;
- int i;
-
- for (i = 0; i < HASHSIZE; ++i)
- {
- prev = null;
- next = hashtable[i];
-
- while(next)
- {
- prev = next->next;
- MemFree(next->name);
- MemFree(next);
- next = prev;
- }
- }
-
- if (slide_style)
- MemFree(slide_style);
-
- if (doctype_str)
- MemFree(doctype_str);
-
- if (errfile)
- MemFree(errfile);
- }
-
- static int AdvanceChar()
- {
- if (c != EOF)
- c = (uint)getc(fp);
- return c;
- }
-
- static int SkipWhite()
- {
- while (IsWhite((uint) c))
- c = (uint)getc(fp);
- return c;
- }
-
- /*
- skip over line continuations
- to start of next property
- */
- static int NextProperty()
- {
- do
- {
- /* skip to end of line */
- while (c != '\n' && c != '\r' && c != EOF)
- c = (uint)getc(fp);
-
- /* treat \r\n \r or \n as line ends */
- if (c == '\r')
- c = (uint)getc(fp);
-
- if (c == '\n')
- c = (uint)getc(fp);
- }
- while (IsWhite(c)); /* line continuation? */
-
- return c;
- }
-
- void ParseConfigFile(char *file)
- {
- int i;
- char name[64];
- PList *entry;
-
- /* setup property name -> parser table*/
-
- InitConfig();
-
- /* open the file and parse its contents */
-
- if ((fp = fopen(file, "r")) == null)
- fprintf(stderr, "Error: can't open config file: \"%s\"\n", file);
- else
- {
- AdvanceChar(); /* first char */
-
- while (c != EOF)
- {
- /* // starts a comment */
- while (c == '/')
- NextProperty();
-
- i = 0;
-
- while (c != ':' && c != EOF && i < 60)
- {
- name[i++] = (char)c;
- AdvanceChar();
- }
-
- name[i] = '\0';
- entry = lookup(name);
-
- if (c == ':' && entry)
- {
- AdvanceChar();
- entry->parser(entry->location);
- }
-
- NextProperty();
- }
-
- fclose(fp);
- }
- }
-
- /* ensure that config is self consistent */
- void AdjustConfig(void)
- {
- /* avoid the need to set IndentContent when SmartIndent is set */
-
- if (SmartIndent)
- IndentContent = yes;
-
- /* drop-font-tags requires clean option */
- if (DropFontTags)
- MakeClean = yes;
-
- /* logical-emphasis automatically invokes clean option */
- if (LogicalEmphasis)
- MakeClean = yes;
-
- /* XHTML is written in lower case */
- if (xHTML)
- {
- XmlOut = yes;
- UpperCaseTags = no;
- UpperCaseAttrs = no;
- }
-
- /* if XML in, then XML out */
- if (XmlTags)
- {
- XmlOut = yes;
- XmlPIs = yes;
- }
-
- /* XML requires end tags */
- if (XmlOut)
- {
- QuoteAmpersand = yes;
- HideEndTags = no;
- }
- }
-
- /* unsigned integers */
- void ParseInt(Location location)
- {
- int number = 0;
-
- SkipWhite();
-
- while(IsDigit(c))
- {
- number = c - '0' + (10 * number);
- AdvanceChar();
- }
-
- *location.number = number;
- }
-
- /* true/false or yes/no only looks at 1st char */
- void ParseBool(Location location)
- {
- Bool flag = no;
- SkipWhite();
-
- if (c == 't' || c == 'T' || c == 'y' || c == 'Y')
- flag = yes;
-
- *location.logical = flag;
- }
-
- void ParseInvBool(Location location)
- {
- Bool flag = no;
- SkipWhite();
-
- if (c == 't' || c == 'T' || c == 'y' || c == 'Y')
- flag = yes;
-
- *location.logical = (Bool)(!flag);
- }
-
- /* a string excluding whitespace */
- void ParseName(Location location)
- {
- char buf[256];
- int i = 0;
-
- SkipWhite();
-
- while (i < 254 && c != EOF && !IsWhite(c))
- {
- buf[i++] = c;
- AdvanceChar();
- }
-
- buf[i] = '\0';
-
- *location.string = wstrdup(buf);
- }
-
- /* a space or comma separated list of tag names */
- void ParseTagNames(Location location)
- {
- char buf[1024];
- int i = 0;
-
- do
- {
- SkipWhite();
-
- if (c == ',')
- {
- AdvanceChar();
- continue;
- }
-
- while (i < 1022 && c != EOF && !IsWhite(c) && c != ',')
- {
- buf[i++] = ToUpper(c);
- AdvanceChar();
- }
-
- buf[i] = '\0';
-
- /* add tag to dictionary */
-
- if(location.string == &inline_tags)
- DefineInlineTag(buf);
- else if (location.string == &block_tags)
- DefineBlockTag(buf);
- else if (location.string == &empty_tags)
- DefineEmptyTag(buf);
-
- i = 0;
- }
- while (c != EOF);
- }
-
- /* a string including whitespace */
- /* munges whitespace sequences */
- void ParseString(Location location)
- {
- char buf[8192];
- int i = 0;
- Bool waswhite = no;
-
- SkipWhite();
-
- while (i < 8190 && c != EOF)
- {
- /* treat \r\n \r or \n as line ends */
- if (c == '\r')
- {
- AdvanceChar();
-
- if (c != '\n' && !IsWhite(c))
- break;
- }
-
- if (c == '\n')
- {
- AdvanceChar();
-
- if (!IsWhite(c))
- break;
- }
-
- if (IsWhite(c))
- {
- if (waswhite)
- {
- AdvanceChar();
- continue;
- }
-
- c = ' ';
- }
- else
- waswhite = no;
-
- buf[i++] = c;
- AdvanceChar();
- }
-
- buf[i] = '\0';
-
- if (*location.string)
- MemFree(*location.string);
-
- *location.string = wstrdup(buf);
- }
-
- void ParseCharEncoding(Location location)
- {
- char buf[64];
- int i = 0;
-
- SkipWhite();
-
- while (i < 62 && c != EOF && !IsWhite(c))
- {
- buf[i++] = c;
- AdvanceChar();
- }
-
- buf[i] = '\0';
-
- if (wstrcasecmp(buf, "ascii") == 0)
- *location.number = ASCII;
- else if (wstrcasecmp(buf, "latin1") == 0)
- *location.number = LATIN1;
- else if (wstrcasecmp(buf, "raw") == 0)
- *location.number = RAW;
- else if (wstrcasecmp(buf, "utf8") == 0)
- *location.number = UTF8;
- else if (wstrcasecmp(buf, "iso2022") == 0)
- *location.number = ISO2022;
- }
-
- /* slight hack to avoid changes to pprint.c */
- void ParseIndent(Location location)
- {
- char buf[64];
- int i = 0;
-
- SkipWhite();
-
- while (i < 62 && c != EOF && !IsWhite(c))
- {
- buf[i++] = c;
- AdvanceChar();
- }
-
- buf[i] = '\0';
-
- if (wstrcasecmp(buf, "yes") == 0)
- {
- IndentContent = yes;
- SmartIndent = no;
- }
- else if (wstrcasecmp(buf, "true") == 0)
- {
- IndentContent = yes;
- SmartIndent = no;
- }
- else if (wstrcasecmp(buf, "no") == 0)
- {
- IndentContent = no;
- SmartIndent = no;
- }
- else if (wstrcasecmp(buf, "false") == 0)
- {
- IndentContent = no;
- SmartIndent = no;
- }
- else if (wstrcasecmp(buf, "auto") == 0)
- {
- IndentContent = yes;
- SmartIndent = yes;
- }
- }
-
- /*
- doctype: omit | auto | strict | loose | <fpi>
-
- where the fpi is a string similar to
-
- "-//ACME//DTD HTML 3.14159//EN"
- */
- void ParseDocType(Location location)
- {
- char buf[64];
- int i = 0;
-
- SkipWhite();
-
- /* "-//ACME//DTD HTML 3.14159//EN" or similar */
-
- if (c == '"')
- {
- ParseString(location);
- doctype_mode = doctype_user;
- return;
- }
-
- /* read first word */
- while (i < 62 && c != EOF && !IsWhite(c))
- {
- buf[i++] = c;
- AdvanceChar();
- }
-
- buf[i] = '\0';
-
- doctype_mode = doctype_auto;
-
- if (wstrcasecmp(buf, "omit") == 0)
- doctype_mode = doctype_omit;
- else if (wstrcasecmp(buf, "strict") == 0)
- doctype_mode = doctype_strict;
- else if (wstrcasecmp(buf, "loose") == 0 ||
- wstrcasecmp(buf, "transitional") == 0)
- doctype_mode = doctype_loose;
- }
-