home *** CD-ROM | disk | FTP | other *** search
- /*
- *******************************************************************************
- * *
- * COPYRIGHT: *
- * (C) Copyright International Business Machines Corporation, 1998, 1999 *
- * Licensed Material - Program-Property of IBM - All Rights Reserved. *
- * US Government Users Restricted Rights - Use, duplication, or disclosure *
- * restricted by GSA ADP Schedule Contract with IBM Corp. *
- * *
- *******************************************************************************
- *
- * File parse.c
- *
- * Modification History:
- *
- * Date Name Description
- * 05/26/99 stephen Creation.
- *******************************************************************************
- */
-
- #include "parse.h"
- #include "error.h"
- #include "uhash.h"
- #include "cmemory.h"
- #include "read.h"
- #include "ufile.h"
- #include "ustdio.h"
- #include "ustr.h"
- #include "list.h"
- #include "rblist.h"
- #include "ustring.h"
-
- /* Node IDs for the state transition table. */
- enum ENode {
- eError,
- eInitial, /* Next: Locale name */
- eGotLoc, /* Next: { */
- eIdle, /* Next: Tag name | } */
- eGotTag, /* Next: { */
- eNode5, /* Next: Data | Subtag */
- eNode6, /* Next: } | { | , */
- eList, /* Next: List data */
- eNode8, /* Next: , */
- eTagList, /* Next: Subtag data */
- eNode10, /* Next: } */
- eNode11, /* Next: Subtag */
- eNode12, /* Next: { */
- e2dArray, /* Next: Data | } */
- eNode14, /* Next: , | } */
- eNode15, /* Next: , | } */
- eNode16 /* Next: { | } */
- };
-
- /* Action codes for the state transtiion table. */
- enum EAction {
- /* Generic actions */
- eNOP = 0x0100, /* Do nothing */
- eOpen = 0x0200, /* Open a new locale data block with the data
- string as the locale name */
- eClose = 0x0300, /* Close a locale data block */
- eSetTag = 0x0400, /* Record the last string as the tag name */
-
- /* Comma-delimited lists */
- eBegList = 0x1100, /* Start a new string list with the last string
- as the first element */
- eEndList = 0x1200, /* Close a string list being built */
- eListStr = 0x1300, /* Record the last string as a data string and
- increment the index */
- eStr = 0x1400, /* Record the last string as a singleton string */
-
- /* 2-d lists */
- eBeg2dList = 0x2100, /* Start a new 2d string list with no elements as yet */
- eEnd2dList = 0x2200, /* Close a 2d string list being built */
- e2dStr = 0x2300, /* Record the last string as a 2d string */
- eNewRow = 0x2400, /* Start a new row */
-
- /* Tagged lists */
- eBegTagged = 0x3100, /* Start a new tagged list with the last
- string as the first subtag */
- eEndTagged = 0x3200, /* Close a tagged list being build */
- eSubtag = 0x3300, /* Record the last string as the subtag */
- eTaggedStr = 0x3400 /* Record the last string as a tagged string */
- };
-
- /* A struct which encapsulates a node ID and an action. */
- struct STransition {
- enum ENode fNext;
- enum EAction fAction;
- };
-
- /* This table describes an ATM (state machine) which parses resource
- bundle text files rather strictly. Each row represents a node. The
- columns of that row represent transitions into other nodes. Most
- transitions are "eError" because most transitions are
- disallowed. For example, if the parser has just seen a tag name, it
- enters node 4 ("eGotTag"). The state table then marks only one
- valid transition, which is into node 5, upon seeing an eOpenBrace
- token. We allow an extra comma after the last element in a
- comma-delimited list (transition from eList to eIdle on
- kCloseBrace). */
- static struct STransition gTransitionTable [] = {
- /* kString kOpenBrace kCloseBrace kComma*/
- {eError,eNOP}, {eError,eNOP}, {eError,eNOP}, {eError,eNOP},
-
- {eGotLoc,eOpen}, {eError,eNOP}, {eError,eNOP}, {eError,eNOP},
- {eError,eNOP}, {eIdle,eNOP}, {eError,eNOP}, {eError,eNOP},
-
- {eGotTag,eSetTag}, {eError,eNOP}, {eInitial,eClose}, {eError,eNOP},
- {eError,eNOP}, {eNode5,eNOP}, {eError,eNOP}, {eError,eNOP},
- {eNode6,eNOP}, {e2dArray,eBeg2dList},{eError,eNOP}, {eError,eNOP},
- {eError,eNOP}, {eTagList,eBegTagged},{eIdle,eStr}, {eList,eBegList},
-
- {eNode8,eListStr}, {eError,eNOP}, {eIdle,eEndList}, {eError,eNOP},
- {eError,eNOP}, {eError,eNOP}, {eIdle,eEndList}, {eList,eNOP},
-
- {eNode10,eTaggedStr},{eError,eNOP}, {eError,eNOP}, {eError,eNOP},
- {eError,eNOP}, {eError,eNOP}, {eNode11,eNOP}, {eError,eNOP},
- {eNode12,eNOP}, {eError,eNOP}, {eIdle,eEndTagged},{eError,eNOP},
- {eError,eNOP}, {eTagList,eSubtag}, {eError,eNOP}, {eError,eNOP},
-
- {eNode14,e2dStr}, {eError,eNOP}, {eNode15,eNOP}, {eError,eNOP},
- {eError,eNOP}, {eError,eNOP}, {eNode15,eNOP}, {e2dArray,eNOP},
- {eError,eNOP}, {e2dArray,eNewRow}, {eIdle,eEnd2dList},{eNode16,eNOP},
- {eError,eNOP}, {e2dArray,eNewRow}, {eIdle,eEnd2dList},{eError,eNOP}
- };
-
- /* Row length is 4 */
- #define GETTRANSITION(row,col) (gTransitionTable[col + (row<<2)])
-
- struct SRBItemList*
- parse(FileStream *f,
- UErrorCode *status)
- {
- struct UFILE *file;
- enum ETokenType type;
- enum ENode node;
- struct STransition t;
-
- struct UString token;
- struct UString tag;
- struct UString subtag;
- struct UString localeName;
- struct UString keyname;
-
- struct SRBItem *item;
- struct SRBItemList *list;
- struct SList *current;
-
- /* Hashtable for keeping track of seen tag names */
- struct UHashtable *data;
-
-
- if(U_FAILURE(*status)) return 0;
-
- /* setup */
-
- ustr_init(&token);
- ustr_init(&tag);
- ustr_init(&subtag);
- ustr_init(&localeName);
- ustr_init(&keyname);
-
- node = eInitial;
- data = 0;
- current = 0;
- item = 0;
-
- file = u_finit(f, status);
- list = rblist_open(status);
- if(U_FAILURE(*status)) goto finish;
-
- /* iterate through the stream */
- for(;;) {
-
- /* get next token from stream */
- type = getNextToken(file, &token, status);
- if(U_FAILURE(*status)) goto finish;
-
- switch(type) {
- case tok_EOF:
- *status = (node == eInitial) ? U_ZERO_ERROR : U_INVALID_FORMAT_ERROR;
- setErrorText("Unexpected EOF encountered");
- goto finish;
- /*break;*/
-
- case tok_error:
- *status = U_INVALID_FORMAT_ERROR;
- goto finish;
- /*break;*/
-
- default:
- break;
- }
-
- t = GETTRANSITION(node, type);
- node = t.fNext;
-
- if(node == eError) {
- *status = U_INVALID_FORMAT_ERROR;
- goto finish;
- }
-
- switch(t.fAction) {
- case eNOP:
- break;
-
- /* Record the last string as the tag name */
- case eSetTag:
- ustr_cpy(&tag, &token, status);
- if(U_FAILURE(*status)) goto finish;
- if(uhash_get(data, uhash_hashUString(tag.fChars)) != 0) {
- char *s;
- *status = U_INVALID_FORMAT_ERROR;
- s = icu_malloc(1024);
- strcpy(s, "Duplicate tag name detected: ");
- u_austrcpy(s+strlen(s), tag.fChars);
- setErrorText(s);
- goto finish;
- }
- break;
-
- /* Record a singleton string */
- case eStr:
- if(current != 0) {
- *status = U_INTERNAL_PROGRAM_ERROR;
- goto finish;
- }
- current = strlist_open(status);
- strlist_add(current, token.fChars, status);
- item = make_rbitem(tag.fChars, current, status);
- rblist_add(list, item, status);
- uhash_put(data, tag.fChars, status);
- if(U_FAILURE(*status)) goto finish;
- current = 0;
- item = 0;
- break;
-
- /* Begin a string list */
- case eBegList:
- if(current != 0) {
- *status = U_INTERNAL_PROGRAM_ERROR;
- goto finish;
- }
- current = strlist_open(status);
- strlist_add(current, token.fChars, status);
- if(U_FAILURE(*status)) goto finish;
- break;
-
- /* Record a comma-delimited list string */
- case eListStr:
- strlist_add(current, token.fChars, status);
- if(U_FAILURE(*status)) goto finish;
- break;
-
- /* End a string list */
- case eEndList:
- uhash_put(data, tag.fChars, status);
- item = make_rbitem(tag.fChars, current, status);
- rblist_add(list, item, status);
- if(U_FAILURE(*status)) goto finish;
- current = 0;
- item = 0;
- break;
-
- case eBeg2dList:
- if(current != 0) {
- *status = U_INTERNAL_PROGRAM_ERROR;
- goto finish;
- }
- current = strlist2d_open(status);
- if(U_FAILURE(*status)) goto finish;
- break;
-
- case eEnd2dList:
- uhash_put(data, tag.fChars, status);
- item = make_rbitem(tag.fChars, current, status);
- rblist_add(list, item, status);
- if(U_FAILURE(*status)) goto finish;
- current = 0;
- item = 0;
- break;
-
- case e2dStr:
- strlist2d_add(current, token.fChars, status);
- if(U_FAILURE(*status)) goto finish;
- break;
-
- case eNewRow:
- strlist2d_newRow(current, status);
- if(U_FAILURE(*status)) goto finish;
- break;
-
- case eBegTagged:
- if(current != 0) {
- *status = U_INTERNAL_PROGRAM_ERROR;
- goto finish;
- }
- current = taglist_open(status);
- ustr_cpy(&subtag, &token, status);
- if(U_FAILURE(*status)) goto finish;
- break;
-
- case eEndTagged:
- uhash_put(data, tag.fChars, status);
- item = make_rbitem(tag.fChars, current, status);
- rblist_add(list, item, status);
- if(U_FAILURE(*status)) goto finish;
- current = 0;
- item = 0;
- break;
-
- case eTaggedStr:
- taglist_add(current, subtag.fChars, token.fChars, status);
- if(U_FAILURE(*status)) goto finish;
- break;
-
- /* Record the last string as the subtag */
- case eSubtag:
- ustr_cpy(&subtag, &token, status);
- if(U_FAILURE(*status)) goto finish;
- if(taglist_get(current, subtag.fChars, status) != 0) {
- *status = U_INVALID_FORMAT_ERROR;
- setErrorText("Duplicate subtag found in tagged list");
- goto finish;
- }
- break;
-
- case eOpen:
- if(data != 0) {
- *status = U_INTERNAL_PROGRAM_ERROR;
- goto finish;
- }
- ustr_cpy(&localeName, &token, status);
- rblist_setlocale(list, localeName.fChars, status);
- if(U_FAILURE(*status)) goto finish;
- data = uhash_open(uhash_hashUString, status);
- break;
-
- case eClose:
- if(data == 0) {
- *status = U_INTERNAL_PROGRAM_ERROR;
- goto finish;
- }
- break;
- }
- }
-
- finish:
-
- /* clean up */
-
- if(data != 0)
- uhash_close(data);
-
- if(item != 0)
- icu_free(item);
-
- ustr_deinit(&token);
- ustr_deinit(&tag);
- ustr_deinit(&subtag);
- ustr_deinit(&localeName);
- ustr_deinit(&keyname);
-
- if(file != 0)
- u_fclose(file);
-
- return list;
- }
-