home *** CD-ROM | disk | FTP | other *** search
- /* sgmldecl.c -
- SGML declaration parsing.
-
- Written by James Clark (jjc@jclark.com).
- */
-
- #include "sgmlincl.h"
-
- /* Symbolic names for the error numbers that are be generated only by
- this module. */
-
- #define E_STANDARD 163
- #define E_SIGNIFICANT 164
- #define E_BADLIT 165
- #define E_SCOPE 166
- #define E_XNUM 167
- #define E_BADVERSION 168
- #define E_NMUNSUP 169
- #define E_XNMLIT 170
- #define E_CHARDESC 171
- #define E_CHARDUP 172
- #define E_CHARRANGE 173
- #define E_7BIT 174
- #define E_CHARMISSING 175
- #define E_SHUNNED 176
- #define E_NONSGML 177
- #define E_CAPSET 178
- #define E_CAPMISSING 179
- #define E_SYNTAX 180
- #define E_CHARNUM 181
- #define E_SWITCHES 182
- #define E_INSTANCE 183
- #define E_ZEROFEATURE 184
- #define E_YESNO 185
- #define E_CAPACITY 186
- #define E_NOTSUPPORTED 187
- #define E_FORMAL 189
- #define E_BADCLASS 190
- #define E_MUSTBENON 191
- #define E_BADBASECHAR 199
- #define E_SYNREFUNUSED 200
- #define E_SYNREFUNDESC 201
- #define E_SYNREFUNKNOWN 202
- #define E_SYNREFUNKNOWNSET 203
- #define E_FUNDUP 204
- #define E_BADFUN 205
- #define E_FUNCHAR 206
- #define E_GENDELIM 207
- #define E_SRDELIM 208
- #define E_BADKEY 209
- #define E_BADQUANTITY 210
- #define E_BADNAME 211
- #define E_REFNAME 212
- #define E_DUPNAME 213
- #define E_QUANTITY 214
- #define E_QTOOBIG 215
- #define E_NMSTRTCNT 219
- #define E_NMCHARCNT 220
- #define E_NMDUP 221
- #define E_NMBAD 222
- #define E_NMMINUS 223
- #define E_UNKNOWNSET 227
-
- #define CANON_NMC '.' /* Canonical name character. */
- #define CANON_NMS 'A' /* Canonical name start character. */
- #define CANON_MIN ':' /* Canonical minimum data character. */
-
- #define SUCCESS 1
- #define FAIL 0
- #define SIZEOF(v) (sizeof(v)/sizeof(v[0]))
- #define matches(tok, str) (ustrcmp((tok)+1, (str)) == 0)
-
- static UNCH standard[] = "ISO 8879:1986";
-
- #define REFERENCE_SYNTAX "ISO 8879:1986//SYNTAX Reference//EN"
- #define CORE_SYNTAX "ISO 8879:1986//SYNTAX Core//EN"
-
- static UNCH (*newkey)[REFNAMELEN+1] = 0;
-
- struct pmap {
- char *name;
- UNIV value;
- };
-
- /* The reference capacity set. */
- #define REFCAPSET \
- { 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, \
- 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L }
-
- long refcapset[NCAPACITY] = REFCAPSET;
-
- /* A pmap of known capacity sets. */
-
- static struct pmap capset_map[] = {
- { "ISO 8879:1986//CAPACITY Reference//EN", (UNIV)refcapset },
- { 0 },
- };
-
- /* Table of capacity names. Must match *CAP in sgmldecl.h. */
-
- char *captab[] = {
- "TOTALCAP",
- "ENTCAP",
- "ENTCHCAP",
- "ELEMCAP",
- "GRPCAP",
- "EXGRPCAP",
- "EXNMCAP",
- "ATTCAP",
- "ATTCHCAP",
- "AVGRPCAP",
- "NOTCAP",
- "NOTCHCAP",
- "IDCAP",
- "IDREFCAP",
- "MAPCAP",
- "LKSETCAP",
- "LKNMCAP",
- };
-
- /* The default SGML declaration. */
- #define MAXNUMBER 99999999L
-
- /* Reference quantity set */
-
- #define REFATTCNT 40
- #define REFATTSPLEN 960
- #define REFBSEQLEN 960
- #define REFDTAGLEN 16
- #define REFDTEMPLEN 16
- #define REFENTLVL 16
- #define REFGRPCNT 32
- #define REFGRPGTCNT 96
- #define REFGRPLVL 16
- #define REFNORMSEP 2
- #define REFPILEN 240
- #define REFTAGLEN 960
- #define REFTAGLVL 24
-
- #define ALLOC_MAX 65534
-
- #define BIGINT 30000
-
- #define MAXATTCNT ((ALLOC_MAX/sizeof(struct ad)) - 2)
- #define MAXATTSPLEN BIGINT
- #define MAXBSEQLEN BIGINT
- #define MAXDTAGLEN 16
- #define MAXDTEMPLEN 16
- #define MAXENTLVL ((ALLOC_MAX/sizeof(struct source)) - 1)
- #define MAXGRPCNT MAXGRPGTCNT
- /* Must be between 96 and 253 */
- #define MAXGRPGTCNT 253
- #define MAXGRPLVL MAXGRPGTCNT
- #define MAXLITLEN BIGINT
- /* This guarantees that NAMELEN < LITLEN (ie there's always space for a name
- in a buffer intended for a literal.) */
- #define MAXNAMELEN (REFLITLEN - 1)
- #define MAXNORMSEP 2
- #define MAXPILEN BIGINT
- #define MAXTAGLEN BIGINT
- #define MAXTAGLVL ((ALLOC_MAX/sizeof(struct tag)) - 1)
-
- /* Table of quantity names. Must match Q* in sgmldecl.h. */
-
- static char *quantity_names[] = {
- "ATTCNT",
- "ATTSPLEN",
- "BSEQLEN",
- "DTAGLEN",
- "DTEMPLEN",
- "ENTLVL",
- "GRPCNT",
- "GRPGTCNT",
- "GRPLVL",
- "LITLEN",
- "NAMELEN",
- "NORMSEP",
- "PILEN",
- "TAGLEN",
- "TAGLVL",
- };
-
- static int max_quantity[] = {
- MAXATTCNT,
- MAXATTSPLEN,
- MAXBSEQLEN,
- MAXDTAGLEN,
- MAXDTEMPLEN,
- MAXENTLVL,
- MAXGRPCNT,
- MAXGRPGTCNT,
- MAXGRPLVL,
- MAXLITLEN,
- MAXNAMELEN,
- MAXNORMSEP,
- MAXPILEN,
- MAXTAGLEN,
- MAXTAGLVL,
- };
-
- static char *quantity_changed;
-
- /* Non-zero means the APPINFO parameter was not NONE. */
- static int appinfosw = 0;
-
- struct sgmldecl sd = {
- REFCAPSET, /* capacity */
- #ifdef SUPPORT_SUBDOC
- MAXNUMBER, /* subdoc */
- #else /* not SUPPORT_SUBDOC */
- 0, /* subdoc */
- #endif /* not SUPPORT_SUBDOC */
- 1, /* formal */
- 1, /* omittag */
- 1, /* shorttag */
- 1, /* shortref */
- { 1, 0 }, /* general/entity name case translation */
- { /* reference quantity set */
- REFATTCNT,
- REFATTSPLEN,
- REFBSEQLEN,
- REFDTAGLEN,
- REFDTEMPLEN,
- REFENTLVL,
- REFGRPCNT,
- REFGRPGTCNT,
- REFGRPLVL,
- REFLITLEN,
- REFNAMELEN,
- REFNORMSEP,
- REFPILEN,
- REFTAGLEN,
- REFTAGLVL,
- },
- };
-
- static int systemcharset[] = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
- 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
- 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
- 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
- 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
- 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
- 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
- 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
- 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
- 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
- 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
- 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
- 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
- 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
- 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255,
- };
-
- static struct pmap charset_map[] = {
- { "ESC 2/5 4/0", (UNIV)asciicharset }, /* ISO 646 IRV */
- { "ESC 2/8 4/2", (UNIV)asciicharset }, /* ISO Registration Number 6, ASCII */
- { SYSTEM_CHARSET_DESIGNATING_SEQUENCE, (UNIV)systemcharset },
- /* system character set */
- { 0 }
- };
-
- static int synrefcharset[256]; /* the syntax reference character set */
-
- #define CHAR_NONSGML 01
- #define CHAR_SIGNIFICANT 02
- #define CHAR_MAGIC 04
- #define CHAR_SHUNNED 010
-
- static UNCH char_flags[256];
- static int done_nonsgml = 0;
- static UNCH *nlextoke = 0; /* new lextoke */
- static UNCH *nlextran = 0; /* new lextran */
-
-
- static UNCH kcharset[] = "CHARSET";
- static UNCH kbaseset[] = "BASESET";
- static UNCH kdescset[] = "DESCSET";
- static UNCH kunused[] = "UNUSED";
- static UNCH kcapacity[] = "CAPACITY";
- static UNCH kpublic[] = "PUBLIC";
- static UNCH ksgmlref[] = "SGMLREF";
- static UNCH kscope[] = "SCOPE";
- static UNCH kdocument[] = "DOCUMENT";
- static UNCH kinstance[] = "INSTANCE";
- static UNCH ksyntax[] = "SYNTAX";
- static UNCH kswitches[] = "SWITCHES";
- static UNCH kfeatures[] = "FEATURES";
- static UNCH kminimize[] = "MINIMIZE";
- static UNCH kdatatag[] = "DATATAG";
- static UNCH komittag[] = "OMITTAG";
- static UNCH krank[] = "RANK";
- static UNCH kshorttag[] = "SHORTTAG";
- static UNCH klink[] = "LINK";
- static UNCH ksimple[] = "SIMPLE";
- static UNCH kimplicit[] = "IMPLICIT";
- static UNCH kexplicit[] = "EXPLICIT";
- static UNCH kother[] = "OTHER";
- static UNCH kconcur[] = "CONCUR";
- static UNCH ksubdoc[] = "SUBDOC";
- static UNCH kformal[] = "FORMAL";
- static UNCH kyes[] = "YES";
- static UNCH kno[] = "NO";
- static UNCH kappinfo[] = "APPINFO";
- static UNCH knone[] = "NONE";
- static UNCH kshunchar[] = "SHUNCHAR";
- static UNCH kcontrols[] = "CONTROLS";
- static UNCH kfunction[] = "FUNCTION";
- static UNCH krs[] = "RS";
- static UNCH kre[] = "RE";
- static UNCH kspace[] = "SPACE";
- static UNCH knaming[] = "NAMING";
- static UNCH klcnmstrt[] = "LCNMSTRT";
- static UNCH kucnmstrt[] = "UCNMSTRT";
- static UNCH klcnmchar[] = "LCNMCHAR";
- static UNCH kucnmchar[] = "UCNMCHAR";
- static UNCH knamecase[] = "NAMECASE";
- static UNCH kdelim[] = "DELIM";
- static UNCH kgeneral[] = "GENERAL";
- static UNCH kentity[] = "ENTITY";
- static UNCH kshortref[] = "SHORTREF";
- static UNCH knames[] = "NAMES";
- static UNCH kquantity[] = "QUANTITY";
-
- #define sderr mderr
-
- static UNIV pmaplookup P((struct pmap *, char *));
- static UNCH *ltous P((long));
- static VOID sdfixstandard P((UNCH *));
- static int sdparm P((UNCH *, struct parse *));
- static int sdname P((UNCH *, UNCH *));
- static int sdckname P((UNCH *, UNCH *));
- static int sdversion P((UNCH *));
- static int sdcharset P((UNCH *));
- static int sdcsdesc P((UNCH *, int *));
- static int sdpubcapacity P((UNCH *));
- static int sdcapacity P((UNCH *));
- static int sdscope P((UNCH *));
- static VOID setlexical P((void));
- static VOID noemptytag P((void));
- static int sdpubsyntax P((UNCH *));
- static int sdsyntax P((UNCH *));
- static int sdxsyntax P((UNCH *));
- static int sdtranscharnum P((UNCH *));
- static int sdtranschar P((int));
- static int sdshunchar P((UNCH *));
- static int sdsynref P((UNCH *));
- static int sdfunction P((UNCH *));
- static int sdnaming P((UNCH *));
- static int sddelim P((UNCH *));
- static int sdnames P((UNCH *));
- static int sdquantity P((UNCH *));
- static int sdfeatures P((UNCH *));
- static int sdappinfo P((UNCH *));
-
- static VOID bufsalloc P((void));
- static VOID bufsrealloc P((void));
-
- /* Parse the SGML declaration. Return non-zero if there was some appinfo. */
-
- int sgmldecl()
- {
- int i;
- int errsw = 0;
- UNCH endbuf[REFNAMELEN+2]; /* buffer for parsing terminating > */
- static int (*section[]) P((UNCH *)) = {
- sdversion,
- sdcharset,
- sdcapacity,
- sdscope,
- sdsyntax,
- sdfeatures,
- sdappinfo,
- };
- /* These are needed if we use mderr. */
- parmno = 0;
- mdname = sgmlkey;
- subdcl = NULL;
- for (i = 0; i < SIZEOF(section); i++)
- if ((*section[i])(tbuf) == FAIL) {
- errsw = 1;
- break;
- }
- if (!errsw)
- setlexical();
- bufsrealloc();
- /* Parse the >. Don't overwrite the appinfo. */
- if (!errsw)
- sdparm(endbuf, 0);
- /* We must exit if we hit end of document. */
- if (pcbsd.action == EOD_)
- exiterr(161, &pcbsd);
- if (!errsw && pcbsd.action != ESGD)
- sderr(126, (UNCH *)0, (UNCH *)0);
- return appinfosw;
- }
-
- /* Parse the literal (which should contain the version of the
- standard) at the beginning of a SGML declaration. */
-
- static int sdversion(tbuf)
- UNCH *tbuf;
- {
- if (sdparm(tbuf, &pcblitv) != LIT1) {
- sderr(123, (UNCH *)0, (UNCH *)0);
- return FAIL;
- }
- sdfixstandard(tbuf);
- if (ustrcmp(tbuf, standard) != 0)
- sderr(E_BADVERSION, tbuf, standard);
- return SUCCESS;
- }
-
- /* Parse the CHARSET section. Use one token lookahead. */
-
- static int sdcharset(tbuf)
- UNCH *tbuf;
- {
- int i;
- int status[256];
-
- if (sdname(tbuf, kcharset) == FAIL) return FAIL;
- (void)sdparm(tbuf, 0);
-
- if (sdcsdesc(tbuf, status) == FAIL)
- return FAIL;
-
- for (i = 128; i < 256; i++)
- if (status[i] != UNDESC)
- break;
- if (i >= 256) {
- /* Only a 7-bit character set was described. Fill it out to 8-bits. */
- for (i = 128; i < 256; i++)
- status[i] = UNUSED;
- #if 0
- sderr(E_7BIT, (UNCH *)0, (UNCH *)0);
- #endif
- }
- /* Characters that are declared UNUSED in the document character set
- are assigned to non-SGML. */
- for (i = 0; i < 256; i++) {
- if (status[i] == UNDESC) {
- sderr(E_CHARMISSING, ltous((long)i), (UNCH *)0);
- char_flags[i] |= CHAR_NONSGML;
- }
- else if (status[i] == UNUSED)
- char_flags[i] |= CHAR_NONSGML;
- }
- done_nonsgml = 1;
- return SUCCESS;
- }
-
- /* Parse a character set description. Uses one character lookahead. */
-
- static int sdcsdesc(tbuf, status)
- UNCH *tbuf;
- int *status;
- {
- int i;
- int nsets = 0;
- struct fpi fpi;
-
- for (i = 0; i < 256; i++)
- status[i] = UNDESC;
-
- for (;;) {
- int nchars;
- int *baseset = 0;
-
- if (pcbsd.action != NAS1) {
- if (nsets == 0) {
- sderr(120, (UNCH *)0, (UNCH *)0);
- return FAIL;
- }
- break;
- }
- if (!matches(tbuf, kbaseset)) {
- if (nsets == 0) {
- sderr(118, tbuf+1, kbaseset);
- return FAIL;
- }
- break;
- }
- nsets++;
- MEMZERO((UNIV)&fpi, FPISZ);
- if (sdparm(tbuf, &pcblitv) != LIT1) {
- sderr(123, (UNCH *)0, (UNCH *)0);
- return FAIL;
- }
- fpi.fpipubis = tbuf;
- /* Give a warning if it is not a CHARSET fpi. */
- if (parsefpi(&fpi))
- sderr(E_FORMAL, (UNCH *)0, (UNCH *)0);
- else if (fpi.fpic != FPICHARS)
- sderr(E_BADCLASS, kcharset, (UNCH *)0);
- else {
- fpi.fpipubis[fpi.fpil + fpi.fpill] = '\0';
- baseset = (int *)pmaplookup(charset_map,
- (char *)fpi.fpipubis + fpi.fpil);
- if (!baseset)
- sderr(E_UNKNOWNSET, fpi.fpipubis + fpi.fpil, (UNCH *)0);
- }
- if (sdname(tbuf, kdescset) == FAIL) return FAIL;
- nchars = 0;
- for (;;) {
- long start, count;
- long basenum;
- if (sdparm(tbuf, 0) != NUM1)
- break;
- start = atol((char *)tbuf);
- if (sdparm(tbuf, 0) != NUM1) {
- sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
- return FAIL;
- }
- count = atol((char *)tbuf);
- switch (sdparm(tbuf, &pcblitv)) {
- case NUM1:
- basenum = atol((char *)tbuf);
- break;
- case LIT1:
- basenum = UNKNOWN;
- break;
- case NAS1:
- if (matches(tbuf, kunused)) {
- basenum = UNUSED;
- break;
- }
- /* fall through */
- default:
- sderr(E_CHARDESC, ltous(start), (UNCH *)0);
- return FAIL;
- }
- if (start + count > 256)
- sderr(E_CHARRANGE, (UNCH *)0, (UNCH *)0);
- else {
- int i;
- int lim = (int)start + count;
- for (i = (int)start; i < lim; i++) {
- if (status[i] != UNDESC)
- sderr(E_CHARDUP, ltous((long)i), (UNCH *)0);
- else if (basenum == UNUSED || basenum == UNKNOWN)
- status[i] = (int)basenum;
- else if (baseset == 0)
- status[i] = UNKNOWN_SET;
- else {
- int n = basenum + (i - start);
- if (n < 0 || n > 255)
- sderr(E_CHARRANGE, (UNCH *)0, (UNCH *)0);
- else if (baseset[n] == UNUSED)
- sderr(E_BADBASECHAR, ltous((long)n), (UNCH *)0);
- else
- status[i] = baseset[n];
- }
- }
- }
- nchars++;
- }
- if (nchars == 0) {
- sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
- return FAIL;
- }
- }
- return SUCCESS;
- }
-
- /* Parse the CAPACITY section. Uses one token lookahead. */
-
- static int sdcapacity(tbuf)
- UNCH *tbuf;
- {
- int ncap;
-
- if (sdckname(tbuf, kcapacity) == FAIL)
- return FAIL;
- if (sdparm(tbuf, 0) != NAS1) {
- sderr(120, (UNCH *)0, (UNCH *)0);
- return FAIL;
- }
- if (matches(tbuf, kpublic))
- return sdpubcapacity(tbuf);
- if (!matches(tbuf, ksgmlref)) {
- sderr(E_CAPACITY, tbuf+1, (UNCH *)0);
- return FAIL;
- }
- memcpy((UNIV)sd.capacity, (UNIV)refcapset, sizeof(sd.capacity));
- ncap = 0;
- for (;;) {
- int capno = -1;
- int i;
-
- if (sdparm(tbuf, 0) != NAS1)
- break;
- for (i = 0; i < SIZEOF(captab); i++)
- if (matches(tbuf, captab[i])) {
- capno = i;
- break;
- }
- if (capno < 0)
- break;
- if (sdparm(tbuf, 0) != NUM1) {
- sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
- return FAIL;
- }
- sd.capacity[capno] = atol((char *)tbuf);
- ncap++;
- }
- if (ncap == 0) {
- sderr(E_CAPMISSING, (UNCH *)0, (UNCH *)0);
- return FAIL;
- }
-
- return SUCCESS;
- }
-
- /* Parse a CAPACITY section that started with PUBLIC. Must do one
- token lookahead, since sdcapacity() also does. */
-
- static int sdpubcapacity(tbuf)
- UNCH *tbuf;
- {
- UNIV ptr;
- if (sdparm(tbuf, &pcblitv) != LIT1) {
- sderr(123, (UNCH *)0, (UNCH *)0);
- return FAIL;
- }
- sdfixstandard(tbuf);
- ptr = pmaplookup(capset_map, (char *)tbuf);
- if (!ptr)
- sderr(E_CAPSET, tbuf, (UNCH *)0);
- else
- memcpy((UNIV)sd.capacity, (UNIV)ptr, sizeof(sd.capacity));
- (void)sdparm(tbuf, 0);
- return SUCCESS;
- }
-
- /* Parse the SCOPE section. Uses no lookahead. */
-
- static int sdscope(tbuf)
- UNCH *tbuf;
- {
- if (sdckname(tbuf, kscope) == FAIL)
- return FAIL;
- if (sdparm(tbuf, 0) != NAS1) {
- sderr(120, (UNCH *)0, (UNCH *)0);
- return FAIL;
- }
- if (matches(tbuf, kdocument))
- ;
- else if (matches(tbuf, kinstance))
- sderr(E_INSTANCE, (UNCH *)0, (UNCH *)0);
- else {
- sderr(E_SCOPE, tbuf+1, (UNCH *)0);
- return FAIL;
- }
- return SUCCESS;
- }
-
- /* Parse the SYNTAX section. Uses one token lookahead. */
-
- static int sdsyntax(tbuf)
- UNCH *tbuf;
- {
- if (sdname(tbuf, ksyntax) == FAIL) return FAIL;
- if (sdparm(tbuf, 0) != NAS1) {
- sderr(120, (UNCH *)0, (UNCH *)0);
- return FAIL;
- }
- if (matches(tbuf, kpublic))
- return sdpubsyntax(tbuf);
- return sdxsyntax(tbuf);
- }
-
- /* Parse the SYNTAX section which starts with PUBLIC. Uses one token
- lookahead. */
-
- static int sdpubsyntax(tbuf)
- UNCH *tbuf;
- {
- int nswitches;
- if (sdparm(tbuf, &pcblitv) != LIT1)
- return FAIL;
- sdfixstandard(tbuf);
- if (ustrcmp(tbuf, CORE_SYNTAX) == 0)
- sd.shortref = 0;
- else if (ustrcmp(tbuf, REFERENCE_SYNTAX) == 0)
- sd.shortref = 1;
- else
- sderr(E_SYNTAX, tbuf, (UNCH *)0);
- if (sdparm(tbuf, 0) != NAS1)
- return SUCCESS;
- if (!matches(tbuf, kswitches))
- return SUCCESS;
- nswitches = 0;
- for (;;) {
- int errsw = 0;
-
- if (sdparm(tbuf, 0) != NUM1)
- break;
- if (atol((char *)tbuf) > 255) {
- sderr(E_CHARNUM, (UNCH *)0, (UNCH *)0);
- errsw = 1;
- }
- if (sdparm(tbuf, 0) != NUM1) {
- sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
- return FAIL;
- }
- if (!errsw) {
- if (atol((char *)tbuf) > 255)
- sderr(E_CHARNUM, (UNCH *)0, (UNCH *)0);
- }
- nswitches++;
- }
- if (nswitches == 0) {
- sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
- return FAIL;
- }
- sderr(E_SWITCHES, (UNCH *)0, (UNCH *)0);
- return SUCCESS;
- }
-
- /* Parse an explicit concrete syntax. Uses one token lookahead. */
-
- static
- int sdxsyntax(tbuf)
- UNCH *tbuf;
- {
- static int (*section[]) P((UNCH *)) = {
- sdshunchar,
- sdsynref,
- sdfunction,
- sdnaming,
- sddelim,
- sdnames,
- sdquantity,
- };
- int i;
-
- for (i = 0; i < SIZEOF(section); i++)
- if ((*section[i])(tbuf) == FAIL)
- return FAIL;
- return SUCCESS;
- }
-
- /* Parse the SHUNCHAR section. Uses one token lookahead. */
-
- static
- int sdshunchar(tbuf)
- UNCH *tbuf;
- {
- int i;
- for (i = 0; i < 256; i++)
- char_flags[i] &= ~CHAR_SHUNNED;
-
- if (sdckname(tbuf, kshunchar) == FAIL)
- return FAIL;
-
- if (sdparm(tbuf, 0) == NAS1) {
- if (matches(tbuf, knone)) {
- (void)sdparm(tbuf, 0);
- return SUCCESS;
- }
- if (matches(tbuf, kcontrols)) {
- for (i = 0; i < 256; i++)
- if (ISASCII(i) && iscntrl(i))
- char_flags[i] |= CHAR_SHUNNED;
- if (sdparm(tbuf, 0) != NUM1)
- return SUCCESS;
- }
- }
- if (pcbsd.action != NUM1) {
- sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
- return FAIL;
- }
- do {
- long n = atol((char *)tbuf);
- if (n > 255)
- sderr(E_CHARNUM, (UNCH *)0, (UNCH *)0);
- else
- char_flags[(int)n] |= CHAR_SHUNNED;
- } while (sdparm(tbuf, 0) == NUM1);
- return SUCCESS;
- }
-
- /* Parse the syntax reference character set. Uses one token lookahead. */
-
- static
- int sdsynref(tbuf)
- UNCH *tbuf;
- {
- return sdcsdesc(tbuf, synrefcharset);
- }
-
- /* Translate a character number from the syntax reference character set
- to the system character set. If it can't be done, give an error message
- and return -1. */
-
- static
- int sdtranscharnum(tbuf)
- UNCH *tbuf;
- {
- long n = atol((char *)tbuf);
- if (n > 255) {
- sderr(E_CHARNUM, (UNCH *)0, (UNCH *)0);
- return -1;
- }
- return sdtranschar((int)n);
- }
-
-
- static
- int sdtranschar(n)
- int n;
- {
- int ch = synrefcharset[n];
- if (ch >= 0)
- return ch;
- switch (ch) {
- case UNUSED:
- sderr(E_SYNREFUNUSED, ltous((long)n), (UNCH *)0);
- break;
- case UNDESC:
- sderr(E_SYNREFUNDESC, ltous((long)n), (UNCH *)0);
- break;
- case UNKNOWN:
- sderr(E_SYNREFUNKNOWN, ltous((long)n), (UNCH *)0);
- break;
- case UNKNOWN_SET:
- sderr(E_SYNREFUNKNOWNSET, ltous((long)n), (UNCH *)0);
- break;
- default:
- abort();
- }
- return -1;
- }
-
-
- /* Parse the function section. Uses two tokens lookahead. "NAMING"
- could be a function name. */
-
- static
- int sdfunction(tbuf)
- UNCH *tbuf;
- {
- static UNCH *fun[] = { kre, krs, kspace };
- static int funval[] = { RECHAR, RSCHAR, ' ' };
- int i;
- int had_tab = 0;
- int changed = 0; /* attempted to change reference syntax */
-
- if (sdckname(tbuf, kfunction) == FAIL)
- return FAIL;
- for (i = 0; i < SIZEOF(fun); i++) {
- int ch;
- if (sdname(tbuf, fun[i]) == FAIL)
- return FAIL;
- if (sdparm(tbuf, 0) != NUM1) {
- sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
- return FAIL;
- }
- ch = sdtranscharnum(tbuf);
- if (ch >= 0 && ch != funval[i])
- changed = 1;
- }
- for (;;) {
- int tabsw = 0;
- int namingsw = 0;
- if (sdparm(tbuf, 0) != NAS1) {
- sderr(120, (UNCH *)0, (UNCH *)0);
- return FAIL;
- }
- if (matches(tbuf, (UNCH *)"TAB")) {
- tabsw = 1;
- if (had_tab)
- sderr(E_FUNDUP, (UNCH *)0, (UNCH *)0);
- }
- else {
- for (i = 0; i < SIZEOF(fun); i++)
- if (matches(tbuf, fun[i]))
- sderr(E_BADFUN, fun[i], (UNCH *)0);
- if (matches(tbuf, knaming))
- namingsw = 1;
- else
- changed = 1;
- }
- if (sdparm(tbuf, 0) != NAS1) {
- sderr(120, (UNCH *)0, (UNCH *)0);
- return FAIL;
- }
- if (namingsw) {
- if (matches(tbuf, klcnmstrt))
- break;
- changed = 1;
- }
- if (sdparm(tbuf, 0) != NUM1) {
- sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
- return FAIL;
- }
- if (tabsw && !had_tab) {
- int ch = sdtranscharnum(tbuf);
- if (ch >= 0 && ch != TABCHAR)
- changed = 1;
- had_tab = 1;
- }
-
- }
- if (!had_tab)
- changed = 1;
- if (changed)
- sderr(E_FUNCHAR, (UNCH *)0, (UNCH *)0);
- return SUCCESS;
- }
-
- /* Parse the NAMING section. Uses no lookahead. */
-
- static
- int sdnaming(tbuf)
- UNCH *tbuf;
- {
- int i;
- int bad = 0;
- static UNCH *classes[] = { klcnmstrt, kucnmstrt, klcnmchar, kucnmchar };
- static UNCH *types[] = { kgeneral, kentity };
-
- #define NCLASSES SIZEOF(classes)
-
- int bufsize = 4; /* allocated size of buf */
- UNCH *buf = (UNCH *)rmalloc(bufsize); /* holds characters
- in naming classes */
- int bufi = 0; /* next index into buf */
- int start[NCLASSES]; /* index of first character for each class */
- int count[NCLASSES]; /* number of characters for each class */
-
- for (i = 0; i < NCLASSES; i++) {
- UNCH *s;
-
- if (sdckname(tbuf, classes[i]) == FAIL) {
- frem((UNIV)buf);
- return FAIL;
- }
- if (sdparm(tbuf, &pcblitp) != LIT1) {
- sderr(123, (UNCH *)0, (UNCH *)0);
- frem((UNIV)buf);
- return FAIL;
- }
- start[i] = bufi;
-
- for (s = tbuf; *s; s++) {
- int c = *s;
- if (c == DELNONCH) {
- c = UNSHIFTNON(*s);
- s++;
- }
- c = sdtranschar(c);
- if (c < 0)
- bad = 1;
- else if ((char_flags[c] & (CHAR_SIGNIFICANT | CHAR_MAGIC))
- && c != '.' && c != '-') {
- int class = lextoke[c];
- if (class == SEP || class == SP || class == NMC
- || class == NMS || class == NU)
- sderr(E_NMBAD, ltous((long)c), (UNCH *)0);
- else
- sderr(E_NMUNSUP, ltous((long)c), (UNCH *)0);
- bad = 1;
- }
- if (bufi >= bufsize)
- buf = (UNCH *)rrealloc((UNIV)buf, bufsize *= 2);
- buf[bufi++] = c;
- }
-
- count[i] = bufi - start[i];
- (void)sdparm(tbuf, 0);
- }
- if (!bad && count[0] != count[1]) {
- sderr(E_NMSTRTCNT, (UNCH *)0, (UNCH *)0);
- bad = 1;
- }
- if (!bad && count[2] != count[3]) {
- sderr(E_NMCHARCNT, (UNCH *)0, (UNCH *)0);
- bad = 1;
- }
- if (!bad) {
- nlextoke = (UNCH *)rmalloc(256);
- memcpy((UNIV)nlextoke, lextoke, 256);
- nlextoke['.'] = nlextoke['-'] = INV;
-
- nlextran = (UNCH *)rmalloc(256);
- memcpy((UNIV)nlextran, lextran, 256);
-
- for (i = 0; i < count[0]; i++) {
- UNCH lc = buf[start[0] + i];
- UNCH uc = buf[start[1] + i];
- nlextoke[lc] = NMS;
- nlextoke[uc] = NMS;
- nlextran[lc] = uc;
- }
-
- for (i = 0; i < count[2]; i++) {
- UNCH lc = buf[start[2] + i];
- UNCH uc = buf[start[3] + i];
- if (nlextoke[lc] == NMS) {
- sderr(E_NMDUP, ltous((long)lc), (UNCH *)0);
- bad = 1;
- }
- else if (nlextoke[uc] == NMS) {
- sderr(E_NMDUP, ltous((long)uc), (UNCH *)0);
- bad = 1;
- }
- else {
- nlextoke[lc] = NMC;
- nlextoke[uc] = NMC;
- nlextran[lc] = uc;
- }
- }
- if (nlextoke['-'] != NMC) {
- sderr(E_NMMINUS, (UNCH *)0, (UNCH *)0);
- bad = 1;
- }
- if (bad) {
- if (nlextoke) {
- frem((UNIV)nlextoke);
- nlextoke = 0;
- }
- if (nlextran) {
- frem((UNIV)nlextran);
- nlextran = 0;
- }
- }
- }
-
- frem((UNIV)buf);
-
- if (sdckname(tbuf, knamecase) == FAIL)
- return FAIL;
- for (i = 0; i < SIZEOF(types); ++i) {
- if (sdname(tbuf, types[i]) == FAIL)
- return FAIL;
- if (sdparm(tbuf, 0) != NAS1) {
- sderr(120, (UNCH *)0, (UNCH *)0);
- return FAIL;
- }
- if (matches(tbuf, kyes))
- sd.namecase[i] = 1;
- else if (matches(tbuf, kno))
- sd.namecase[i] = 0;
- else {
- sderr(E_YESNO, tbuf+1, (UNCH *)0);
- return FAIL;
- }
- }
- return SUCCESS;
- }
-
- /* Parse the DELIM section. Uses one token lookahead. */
-
- static
- int sddelim(tbuf)
- UNCH *tbuf;
- {
- int changed = 0;
- if (sdname(tbuf, kdelim) == FAIL
- || sdname(tbuf, kgeneral) == FAIL
- || sdname(tbuf, ksgmlref) == FAIL)
- return FAIL;
- for (;;) {
- if (sdparm(tbuf, 0) != NAS1) {
- sderr(120, (UNCH *)0, (UNCH *)0);
- return FAIL;
- }
- if (matches(tbuf, kshortref))
- break;
- if (sdparm(tbuf, &pcblitp) != LIT1) {
- sderr(123, (UNCH *)0, (UNCH *)0);
- return FAIL;
- }
- changed = 1;
- }
- if (changed) {
- sderr(E_GENDELIM, (UNCH *)0,(UNCH *)0);
- changed = 0;
- }
- if (sdparm(tbuf, 0) != NAS1) {
- sderr(120, (UNCH *)0, (UNCH *)0);
- return FAIL;
- }
- if (matches(tbuf, ksgmlref))
- sd.shortref = 1;
- else if (matches(tbuf, knone))
- sd.shortref = 0;
- else {
- sderr(118, tbuf+1, ksgmlref); /* probably they forgot SGMLREF */
- return FAIL;
- }
- while (sdparm(tbuf, &pcblitp) == LIT1)
- changed = 1;
- if (changed)
- sderr(E_SRDELIM, (UNCH *)0, (UNCH *)0);
- return SUCCESS;
- }
-
- /* Parse the NAMES section. Uses one token lookahead. */
-
- static
- int sdnames(tbuf)
- UNCH *tbuf;
- {
- int i;
- if (sdckname(tbuf, knames) == FAIL)
- return FAIL;
- if (sdname(tbuf, ksgmlref) == FAIL)
- return FAIL;
-
- while (sdparm(tbuf, 0) == NAS1) {
- int j;
- if (matches(tbuf, kquantity))
- break;
- for (i = 0; i < NKEYS; i++)
- if (matches(tbuf, key[i]))
- break;
- if (i >= NKEYS) {
- sderr(E_BADKEY, tbuf+1, (UNCH *)0);
- return FAIL;
- }
- if (sdparm(tbuf, &pcblitp) != NAS1) {
- sderr(120, (UNCH *)0, (UNCH *)0);
- return FAIL;
- }
- if (!newkey) {
- newkey = (UNCH (*)[REFNAMELEN+1])rmalloc((REFNAMELEN+1)*NKEYS);
- MEMZERO((UNIV)newkey, (REFNAMELEN+1)*NKEYS);
- }
- for (j = 0; j < NKEYS; j++) {
- if (matches(tbuf, key[j])) {
- sderr(E_REFNAME, tbuf + 1, (UNCH *)0);
- break;
- }
- if (matches(tbuf, newkey[j])) {
- sderr(E_DUPNAME, tbuf + 1, (UNCH *)0);
- break;
- }
- }
- if (j >= NKEYS)
- ustrcpy(newkey[i], tbuf + 1);
- }
- /* Now install the new keys. */
- if (newkey) {
- for (i = 0; i < NKEYS; i++)
- if (newkey[i][0] != '\0') {
- UNCH temp[REFNAMELEN + 1];
-
- ustrcpy(temp, key[i]);
- ustrcpy(key[i], newkey[i]);
- ustrcpy(newkey[i], temp);
- }
- }
- return SUCCESS;
- }
-
- /* Parse the QUANTITY section. Uses one token lookahead. */
-
- static int sdquantity(tbuf)
- UNCH *tbuf;
- {
- int quantity[NQUANTITY];
- int i;
-
- for (i = 0; i < NQUANTITY; i++)
- quantity[i] = -1;
- if (sdckname(tbuf, kquantity) == FAIL)
- return FAIL;
- if (sdname(tbuf, ksgmlref) == FAIL)
- return FAIL;
- while (sdparm(tbuf, 0) == NAS1 && !matches(tbuf, kfeatures)) {
- long n;
- for (i = 0; i < SIZEOF(quantity_names); i++)
- if (matches(tbuf, quantity_names[i]))
- break;
- if (i >= SIZEOF(quantity_names)) {
- sderr(E_BADQUANTITY, tbuf + 1, (UNCH *)0);
- return FAIL;
- }
- if (sdparm(tbuf, 0) != NUM1) {
- sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
- return FAIL;
- }
- n = atol((char *)tbuf);
- if (n < sd.quantity[i])
- sderr(E_QUANTITY, (UNCH *)quantity_names[i],
- ltous((long)sd.quantity[i]));
- else if (n > max_quantity[i]) {
- sderr(E_QTOOBIG, (UNCH *)quantity_names[i],
- ltous((long)max_quantity[i]));
- quantity[i] = max_quantity[i];
- }
- else
- quantity[i] = (int)n;
- }
- for (i = 0; i < NQUANTITY; i++)
- if (quantity[i] > 0) {
- sd.quantity[i] = quantity[i];
- if (!quantity_changed)
- quantity_changed = (char *)rmalloc(NQUANTITY);
- quantity_changed[i] = 1;
- }
- return SUCCESS;
- }
-
- /* Parse the FEATURES section. Uses no lookahead. */
-
- static int sdfeatures(tbuf)
- UNCH *tbuf;
- {
- static struct {
- UNCH *name;
- UNCH argtype; /* 0 = no argument, 1 = boolean, 2 = numeric */
- UNIV valp; /* UNCH * if boolean, long * if numeric. */
- } features[] = {
- { kminimize, 0, 0 },
- { kdatatag, 1, 0 },
- { komittag, 1, (UNIV)&sd.omittag },
- { krank, 1, 0 },
- { kshorttag, 1, (UNIV)&sd.shorttag },
- { klink, 0, 0 },
- { ksimple, 2, 0 },
- { kimplicit, 1, 0 },
- { kexplicit, 2, 0 },
- { kother, 0, 0 },
- { kconcur, 2, 0 },
- { ksubdoc, 2, (UNIV)&sd.subdoc },
- { kformal, 1, (UNIV)&sd.formal },
- };
-
- int i;
-
- if (sdckname(tbuf, kfeatures) == FAIL)
- return FAIL;
- for (i = 0; i < SIZEOF(features); i++) {
- if (sdname(tbuf, features[i].name) == FAIL) return FAIL;
- if (features[i].argtype > 0) {
- long n;
- if (sdparm(tbuf, 0) != NAS1) {
- sderr(120, (UNCH *)0, (UNCH *)0);
- return FAIL;
- }
- if (matches(tbuf, kyes)) {
- if (features[i].argtype > 1) {
- if (sdparm(tbuf, 0) != NUM1) {
- sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
- return FAIL;
- }
- n = atol((char *)tbuf);
- if (n == 0)
- sderr(E_ZEROFEATURE, features[i].name, (UNCH *)0);
- }
- else
- n = 1;
- }
- else if (matches(tbuf, kno))
- n = 0;
- else {
- sderr(E_YESNO, tbuf+1, (UNCH *)0);
- return FAIL;
- }
- if (features[i].valp == 0) {
- if (n > 0)
- sderr(E_NOTSUPPORTED, features[i].name,
- (UNCH *)0);
- }
- else if (features[i].argtype > 1)
- *(long *)features[i].valp = n;
- else
- *(UNCH *)features[i].valp = (UNCH)n;
- }
- }
- if (!sd.shorttag)
- noemptytag();
- return SUCCESS;
- }
-
- /* Parse the APPINFO section. Uses no lookahead. */
-
- static int sdappinfo(tbuf)
- UNCH *tbuf;
- {
- if (sdname(tbuf, kappinfo) == FAIL) return FAIL;
- switch (sdparm(tbuf, &pcblitv)) {
- case LIT1:
- appinfosw = 1;
- break;
- case NAS1:
- if (matches(tbuf, knone))
- break;
- sderr(118, tbuf+1, knone);
- return FAIL;
- default:
- sderr(E_XNMLIT, knone, (UNCH *)0);
- return FAIL;
- }
- return SUCCESS;
- }
-
- /* Change a prefix of ISO 8879-1986 to ISO 8879:1986. Amendment 1 to
- the standard requires the latter. */
-
- static VOID sdfixstandard(tbuf)
- UNCH *tbuf;
- {
- if (strncmp((char *)tbuf, "ISO 8879-1986", 13) == 0) {
- sderr(E_STANDARD, (UNCH *)0, (UNCH *)0);
- tbuf[8] = ':';
- }
- }
-
- static int sdname(tbuf, key)
- UNCH *tbuf;
- UNCH *key;
- {
- if (sdparm(tbuf, 0) != NAS1) {
- sderr(120, (UNCH *)0, (UNCH *)0);
- return FAIL;
- }
- if (!matches(tbuf, key)) {
- sderr(118, tbuf+1, key);
- return FAIL;
- }
- return SUCCESS;
- }
-
- static int sdckname(tbuf, key)
- UNCH *tbuf;
- UNCH *key;
- {
- if (pcbsd.action != NAS1) {
- sderr(120, (UNCH *)0, (UNCH *)0);
- return FAIL;
- }
- if (!matches(tbuf, key)) {
- sderr(118, tbuf+1, key);
- return FAIL;
- }
- return SUCCESS;
- }
-
- /* Parse a SGML declaration parameter. If lpcb is NULL, pt must be
- REFNAMELEN+2 characters long, otherwise at least LITLEN+2 characters
- long. LPCB should be NULL if a literal is not allowed. */
-
- static int sdparm(pt, lpcb)
- UNCH *pt; /* Token buffer. */
- struct parse *lpcb; /* PCB for literal parse. */
- {
- for (;;) {
- parse(&pcbsd);
- if (pcbsd.action != ISIG)
- break;
- sderr(E_SIGNIFICANT, (UNCH *)0, (UNCH *)0);
- }
- ++parmno;
- switch (pcbsd.action) {
- case LIT1:
- if (!lpcb) {
- sderr(E_BADLIT, (UNCH *)0, (UNCH *)0);
- REPEATCC;
- return pcbsd.action = INV_;
- }
- parselit(pt, lpcb, REFLITLEN, lex.d.lit);
- return pcbsd.action;
- case LIT2:
- if (!lpcb) {
- sderr(E_BADLIT, (UNCH *)0, (UNCH *)0);
- REPEATCC;
- return pcbsd.action = INV_;
- }
- parselit(pt, lpcb, REFLITLEN, lex.d.lita);
- return pcbsd.action = LIT1;
- case NAS1:
- parsenm(pt, 1);
- return pcbsd.action;
- case NUM1:
- parsetkn(pt, NU, REFNAMELEN);
- return pcbsd.action;
- }
- return pcbsd.action;
- }
-
- VOID sdinit()
- {
- int i;
- /* Shunned character numbers in the reference concrete syntax. */
- static UNCH refshun[] = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
- 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 127, 255
- };
- UNCH **p;
- /* A character is magic if it is a non-SGML character used for
- some internal purpose in the parser. */
- char_flags[EOS] |= CHAR_MAGIC;
- char_flags[EOBCHAR] |= CHAR_MAGIC;
- char_flags[EOFCHAR] |= CHAR_MAGIC;
- char_flags[GENRECHAR] |= CHAR_MAGIC;
- char_flags[DELNONCH] |= CHAR_MAGIC;
- char_flags[DELCDATA] |= CHAR_MAGIC;
- char_flags[DELSDATA] |= CHAR_MAGIC;
-
- /* Figure out the significant SGML characters. */
- for (p = lextabs; *p; p++) {
- UNCH datclass = (*p)[CANON_DATACHAR];
- UNCH nonclass = (*p)[CANON_NONSGML];
- for (i = 0; i < 256; i++)
- if (!(char_flags[i] & CHAR_MAGIC)
- && (*p)[i] != datclass && (*p)[i] != nonclass)
- char_flags[i] |= CHAR_SIGNIFICANT;
- }
- for (i = 0; i < SIZEOF(refshun); i++)
- char_flags[refshun[i]] |= CHAR_SHUNNED;
- for (i = 0; i < 256; i++)
- if (ISASCII(i) && iscntrl(i))
- char_flags[i] |= CHAR_SHUNNED;
- bufsalloc();
- }
-
-
- static
- VOID bufsalloc()
- {
- scbs = (struct source *)rmalloc((REFENTLVL+1)*sizeof(struct source));
- tbuf = (UNCH *)rmalloc(REFATTSPLEN+REFLITLEN+1);
- /* entbuf is used for parsing numeric character references */
- entbuf = (UNCH *)rmalloc(REFNAMELEN + 2);
- }
-
- static
- VOID bufsrealloc()
- {
- UNS size;
-
- if (ENTLVL != REFENTLVL)
- scbs = (struct source *)rrealloc((UNIV)scbs,
- (ENTLVL+1)*sizeof(struct source));
- /* Calculate the size for tbuf. */
- size = LITLEN + ATTSPLEN;
- if (PILEN > size)
- size = PILEN;
- if (BSEQLEN > size)
- size = BSEQLEN;
- if (size != REFATTSPLEN + REFLITLEN)
- tbuf = (UNCH *)rrealloc((UNIV)tbuf, size + 1);
- if (NAMELEN != REFNAMELEN)
- entbuf = (UNCH *)rrealloc((UNIV)entbuf, NAMELEN + 2);
- }
-
-
- /* Check that the non-SGML characters are compatible with the concrete
- syntax and munge the lexical tables accordingly. If IMPLIED is
- non-zero, then the SGML declaration was implied; in this case, don't
- give error messages about shunned characters not being declared
- non-SGML. Also make any changes that are required by the NAMING section.
- */
-
- static VOID setlexical()
- {
- int i;
- UNCH **p;
-
- if (nlextoke) {
- /* Handle characters that were made significant by the
- NAMING section. */
- for (i = 0; i < 256; i++)
- if (nlextoke[i] == NMC || nlextoke[i] == NMS)
- char_flags[i] |= CHAR_SIGNIFICANT;
- }
-
- for (i = 0; i < 256; i++)
- if (char_flags[i] & CHAR_SIGNIFICANT) {
- /* Significant SGML characters musn't be non-SGML. */
- if (char_flags[i] & CHAR_NONSGML) {
- UNCH buf[2];
- buf[0] = i;
- buf[1] = '\0';
- sderr(E_NONSGML, buf, (UNCH *)0);
- char_flags[i] &= ~CHAR_NONSGML;
- }
- }
- else {
- /* Shunned characters that are not significant SGML characters
- must be non-SGML. */
- if ((char_flags[i] & (CHAR_SHUNNED | CHAR_NONSGML))
- == CHAR_SHUNNED) {
- sderr(E_SHUNNED, ltous((long)i), (UNCH *)0);
- char_flags[i] |= CHAR_NONSGML;
- }
- }
-
-
- /* Now munge the lexical tables. */
- for (p = lextabs; *p; p++) {
- UNCH nonclass = (*p)[CANON_NONSGML];
- UNCH datclass = (*p)[CANON_DATACHAR];
- UNCH nmcclass = (*p)[CANON_NMC];
- UNCH nmsclass = (*p)[CANON_NMS];
- UNCH minclass = (*p)[CANON_MIN];
- for (i = 0; i < 256; i++) {
- if (char_flags[i] & CHAR_NONSGML) {
- /* We already know that it's not significant. */
- if (!(char_flags[i] & CHAR_MAGIC))
- (*p)[i] = nonclass;
- }
- else {
- if (char_flags[i] & CHAR_MAGIC) {
- sderr(E_MUSTBENON, ltous((long)i), (UNCH *)0);
- }
- else if (!(char_flags[i] & CHAR_SIGNIFICANT))
- (*p)[i] = datclass;
- else if (nlextoke
- /* This relies on the fact that lextoke
- occurs last in lextabs. */
- && lextoke[i] != nlextoke[i]) {
- switch (nlextoke[i]) {
- case NMC:
- (*p)[i] = nmcclass;
- break;
- case NMS:
- (*p)[i] = nmsclass;
- break;
- case INV:
- /* This will happen if period is not a
- name character. */
- (*p)[i] = minclass;
- break;
- default:
- abort();
- }
- }
- }
- }
- }
- if (nlextran) {
- memcpy((UNIV)lextran, (UNIV)nlextran, 256);
- frem((UNIV)nlextran);
- }
- if (nlextoke) {
- frem((UNIV)nlextoke);
- nlextoke = 0;
- }
-
- }
-
- /* Munge parse tables so that empty start and end tags are not recognized. */
-
- static VOID noemptytag()
- {
- static struct parse *pcbs[] = { &pcbconm, &pcbcone, &pcbconr, &pcbconc };
- int i;
-
- for (i = 0; i < SIZEOF(pcbs); i++) {
- int maxclass, maxstate;
- int j, k, act;
- UNCH *plex = pcbs[i]->plex;
- UNCH **ptab = pcbs[i]->ptab;
-
- /* Figure out the maximum lexical class. */
- maxclass = 0;
- for (j = 0; j < 256; j++)
- if (plex[j] > maxclass)
- maxclass = plex[j];
-
- /* Now figure out the maximum state number and at the same time
- change actions. */
-
- maxstate = 0;
-
- for (j = 0; j <= maxstate; j += 2) {
- for (k = 0; k <= maxclass; k++)
- if (ptab[j][k] > maxstate)
- maxstate = ptab[j][k];
- /* If the '>' class has an empty start or end tag action,
- change it to the action that the NMC class has. */
- act = ptab[j + 1][plex['>']];
- if (act == NET_ || act == NST_)
- ptab[j + 1][plex['>']] = ptab[j + 1][plex['_']];
- }
- }
- }
-
- /* Lookup the value of the entry in pmap PTR whose key is KEY. */
-
- static UNIV pmaplookup(ptr, key)
- struct pmap *ptr;
- char *key;
- {
- for (; ptr->name; ptr++)
- if (strcmp(key, ptr->name) == 0)
- return ptr->value;
- return 0;
- }
-
- /* Return an ASCII representation of N. */
-
- static UNCH *ltous(n)
- long n;
- {
- static char buf[sizeof(long)*3 + 2];
- sprintf(buf, "%ld", n);
- return (UNCH *)buf;
- }
-
- VOID sgmlwrsd(fp)
- FILE *fp;
- {
- int i;
- int changed;
- char *p;
- char uc[256]; /* upper case characters (with different lower
- case characters) */
- char lcletter[256]; /* LC letters: a-z */
-
- fprintf(fp, "<!SGML \"%s\"\n", standard);
- fprintf(fp, "CHARSET\nBASESET \"%s//CHARSET %s//%s\"\nDESCSET\n",
- SYSTEM_CHARSET_OWNER,
- SYSTEM_CHARSET_DESCRIPTION,
- SYSTEM_CHARSET_DESIGNATING_SEQUENCE);
-
- if (!done_nonsgml) {
- done_nonsgml = 1;
- for (i = 0; i < 256; i++)
- if ((char_flags[i] & (CHAR_SIGNIFICANT | CHAR_SHUNNED))
- == CHAR_SHUNNED)
- char_flags[i] |= CHAR_NONSGML;
- }
- i = 0;
- while (i < 256) {
- int j;
- for (j = i + 1; j < 256; j++)
- if ((char_flags[j] & CHAR_NONSGML)
- != (char_flags[i] & CHAR_NONSGML))
- break;
- if (char_flags[i] & CHAR_NONSGML)
- fprintf(fp, "%d %d UNUSED\n", i, j - i);
- else
- fprintf(fp, "%d %d %d\n", i, j - i, i);
- i = j;
- }
- fprintf(fp, "CAPACITY\n");
- changed = 0;
- for (i = 0; i < NCAPACITY; i++)
- if (refcapset[i] != sd.capacity[i]) {
- if (!changed) {
- fprintf(fp, "SGMLREF\n");
- changed = 1;
- }
- fprintf(fp, "%s %ld\n", captab[i], sd.capacity[i]);
- }
- if (!changed)
- fprintf(fp, "PUBLIC \"%s\"\n", capset_map[0].name);
- fprintf(fp, "SCOPE DOCUMENT\n");
-
- fprintf(fp, "SYNTAX\nSHUNCHAR");
- for (i = 0; i < 256; i++)
- if (char_flags[i] & CHAR_SHUNNED)
- fprintf(fp, " %d", i);
- fprintf(fp, "\n");
- fprintf(fp, "BASESET \"%s//CHARSET %s//%s\"\nDESCSET 0 256 0\n",
- SYSTEM_CHARSET_OWNER,
- SYSTEM_CHARSET_DESCRIPTION,
- SYSTEM_CHARSET_DESIGNATING_SEQUENCE);
-
- fprintf(fp, "FUNCTION\nRE 13\nRS 10\nSPACE 32\nTAB SEPCHAR 9\n");
-
- MEMZERO((UNIV)uc, 256);
- for (i = 0; i < 256; i++)
- if (lextran[i] != i)
- uc[lextran[i]] = 1;
-
- MEMZERO((UNIV)lcletter, 256);
- for (p = "abcdefghijklmnopqrstuvwxyz"; *p; p++)
- lcletter[(unsigned char)*p]= 1;
-
- fprintf(fp, "NAMING\n");
- fputs("LCNMSTRT \"", fp);
- for (i = 0; i < 256; i++)
- if (lextoke[i] == NMS && !uc[i] && !lcletter[i])
- fprintf(fp, "%d;", i);
- fputs("\"\n", fp);
- fputs("UCNMSTRT \"", fp);
- for (i = 0; i < 256; i++)
- if (lextoke[i] == NMS && !uc[i] && !lcletter[i])
- fprintf(fp, "%d;", lextran[i]);
- fputs("\"\n", fp);
- fputs("LCNMCHAR \"", fp);
- for (i = 0; i < 256; i++)
- if (lextoke[i] == NMC && !uc[i])
- fprintf(fp, "%d;", i);
- fputs("\"\n", fp);
- fputs("UCNMCHAR \"", fp);
- for (i = 0; i < 256; i++)
- if (lextoke[i] == NMC && !uc[i])
- fprintf(fp, "%d;", lextran[i]);
- fputs("\"\n", fp);
-
- fprintf(fp, "NAMECASE\nGENERAL %s\nENTITY %s\n",
- sd.namecase[0] ? "YES" : "NO",
- sd.namecase[1] ? "YES" : "NO");
- fprintf(fp, "DELIM\nGENERAL SGMLREF\nSHORTREF %s\n",
- sd.shortref ? "SGMLREF" : "NONE");
- fprintf(fp, "NAMES SGMLREF\n");
- if (newkey) {
- /* The reference key was saved in newkey. */
- for (i = 0; i < NKEYS; i++)
- if (newkey[i][0])
- fprintf(fp, "%s %s\n", newkey[i], key[i]);
- }
- fprintf(fp, "QUANTITY SGMLREF\n");
- if (quantity_changed)
- for (i = 0; i < NQUANTITY; i++)
- if (quantity_changed[i])
- fprintf(fp, "%s %d\n", quantity_names[i], sd.quantity[i]);
- fprintf(fp,
- "FEATURES\nMINIMIZE\nDATATAG NO OMITTAG %s RANK NO SHORTTAG %s\n",
- sd.omittag ? "YES" : "NO",
- sd.shorttag ? "YES" : "NO");
- fprintf(fp, "LINK SIMPLE NO IMPLICIT NO EXPLICIT NO\n");
- fprintf(fp, "OTHER CONCUR NO ");
- if (sd.subdoc > 0)
- fprintf(fp, "SUBDOC YES %ld ", sd.subdoc);
- else
- fprintf(fp, "SUBDOC NO ");
- fprintf(fp, "FORMAL %s\n", sd.formal ? "YES" : "NO");
- fprintf(fp, "APPINFO NONE");
- fprintf(fp, ">\n");
- }
-
- /*
- Local Variables:
- c-indent-level: 5
- c-continued-statement-offset: 5
- c-brace-offset: -5
- c-argdecl-indent: 0
- c-label-offset: -5
- End:
- */
-