home *** CD-ROM | disk | FTP | other *** search
Text File | 1990-06-15 | 24.8 KB | 1,317 lines |
- Newsgroups: comp.sources.misc
- From: shankar@hpclscu.cup.hp.com (Shankar Unni)
- subject: v13i052: Skeleton Parser and Lexer for ANSI C
- Sender: allbery@uunet.UU.NET (Brandon S. Allbery - comp.sources.misc)
-
- Posting-number: Volume 13, Issue 52
- Submitted-by: shankar@hpclscu.cup.hp.com (Shankar Unni)
- Archive-name: ansi-c_su/part01
-
- The following shar file is a set of source files (and test cases) for a
- small, portable parser and lexer for ANSI C. This stuff originally came to
- me from Vick Khera (@CMU), and I have beefed it up to handle typedef's
- properly, and do some rudimentary line-control.
-
- If you have any enhancements, bug-fixes or requests, send it to me
- (shankar%hpclscu@hpda.hp.com).
-
-
- # This is a shell archive. Remove anything before this line,
- # then unpack it by saving it in a file and typing "sh file".
- #
- # Wrapped by Shankar Unni <shankar@hpclscu> on Wed Jun 13 19:35:25 1990
- #
- # This archive contains:
- # Makefile README TEST1.C TEST2.C
- # gram.y main.c misctypes.h scan.l
- # scanaux.c scanaux.h
- #
-
- LANG=""; export LANG
- PATH=/bin:/usr/bin:$PATH; export PATH
-
- echo x - Makefile
- cat >Makefile <<'@EOF'
- YFLAGS = -d
- CFLAGS = -g
- LFLAGS =
-
- SRC = gram.y scan.l main.c scanaux.c misctypes.h scanaux.h
- OBJ = main.o gram.o scan.o scanaux.o
- TESTS = TEST1.C TEST2.C
- BIN = ansi_c
-
- $(BIN) : $(OBJ)
- cc $(CFLAGS) $(OBJ) -o $(BIN)
-
- scan.o : y.tab.h
-
- clean :
- rm -f y.tab.h y.output *.o
-
- test: ansi_c $(TESTS)
- for fn in $(TESTS); do echo " "; echo $$fn: ; ansi_c < $$fn ; done
-
- shar: CGRAM.SHAR
-
- CGRAM.SHAR: README Makefile $(SRC) $(TESTS)
- shar -c README Makefile $(SRC) $(TESTS) > $@
- @EOF
-
- chmod 664 Makefile
-
- echo x - README
- cat >README <<'@EOF'
- This grammar implements the latest ANSI C grammar. I'm not sure if there
- are any omissions. This stuff came to me from outside HP (via Vick Khera
- of CMU), and I have sort of fixed it up to conform to the latest draft of
- the standard (Dec 88). I'm not sure if I missed out on anything..
-
- I added all the typedef-handling code (scanaux.c). It can handle nested
- re-declarations of typedefs and all that jazz.
-
- Notes:
-
- 1. To make the parser, type "make". This should produce a program file
- called "ansi_c".
-
- 2. The scanner recognizes the cpp line specificiers of the form
- "# <number> [ <filename> ]", and sets internal variables accordingly.
- Any other line starting with "#" is ignored (this includes pragmas).
- If you need to do something with these, change the function line_number.
-
- 3. By default, the parse is silent. However, if the "ansi_c" skeleton is
- run with the "-L" option, then the input is echoed to the output. In
- general, if you want the input echoed, set the global variable "input_echo".
-
- 4. There are a couple of test cases to make sure that ansi_c compiled
- correctly. After making the parser, try "make test".
-
- ----
- Shankar.
- shankar%hpclscu@hpda.hp.com
-
- P.S. If you do make any improvements to this, I'd appreciate a copy of the
- changes.
- @EOF
-
- chmod 664 README
-
- echo x - TEST1.C
- cat >TEST1.C <<'@EOF'
- extern int fum(const char *);
- typedef int FOO;
- int BAR;
-
- FOO junk;
-
- func1()
- {
- char *FOO;
- typedef char *BAR;
- BAR junk;
-
- FOO = 0;
- }
-
- struct {
- int FOO;
- int BAR;
- struct {
- FOO junk;
- } junk;
- } xxx;
-
- struct {
- FOO BAR;
- } yyy;
-
- func2()
- {
- FOO junk;
-
- BAR = 0;
- }
- @EOF
-
- chmod 664 TEST1.C
-
- echo x - TEST2.C
- cat >TEST2.C <<'@EOF'
- struct foo {int a;};
- typedef int foo;
- struct foo bar;
- foo fum;
- int foo();
- foo bar;
- @EOF
-
- chmod 664 TEST2.C
-
- echo x - gram.y
- cat >gram.y <<'@EOF'
- %{
- #include "misctypes.h"
- #include "scanaux.h"
- extern char yytext[];
- extern int yyleng;
- %}
- %token IDENTIFIER CONSTANT STRING_LITERAL SIZEOF
- %token PTR_OP INC_OP DEC_OP LEFT_OP RIGHT_OP LE_OP GE_OP EQ_OP NE_OP
- %token AND_OP OR_OP MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN
- %token SUB_ASSIGN LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN
- %token XOR_ASSIGN OR_ASSIGN TYPE_NAME
-
- %token TYPEDEF EXTERN STATIC AUTO REGISTER
- %token CHAR SHORT INT LONG SIGNED UNSIGNED FLOAT DOUBLE CONST VOLATILE VOID
- %token STRUCT UNION ENUM ELLIPSIS
-
- %token CASE DEFAULT IF ELSE SWITCH WHILE DO FOR GOTO CONTINUE BREAK RETURN
-
- %start translation_unit
- %%
-
- primary_expr
- : identifier
- | CONSTANT
- | STRING_LITERAL
- | '(' expr ')'
- ;
-
- postfix_expr
- : primary_expr
- | postfix_expr '[' expr ']'
- | postfix_expr '(' ')'
- | postfix_expr '(' argument_expr_list ')'
- | postfix_expr '.' identifier
- | postfix_expr PTR_OP identifier
- | postfix_expr INC_OP
- | postfix_expr DEC_OP
- ;
-
- argument_expr_list
- : assignment_expr
- | argument_expr_list ',' assignment_expr
- ;
-
- unary_expr
- : postfix_expr
- | INC_OP unary_expr
- | DEC_OP unary_expr
- | unary_operator cast_expr
- | SIZEOF unary_expr
- | SIZEOF '(' type_name ')'
- ;
-
- unary_operator
- : '&'
- | '*'
- | '+'
- | '-'
- | '~'
- | '!'
- ;
-
- cast_expr
- : unary_expr
- | '(' type_name ')' cast_expr
- ;
-
- multiplicative_expr
- : cast_expr
- | multiplicative_expr '*' cast_expr
- | multiplicative_expr '/' cast_expr
- | multiplicative_expr '%' cast_expr
- ;
-
- additive_expr
- : multiplicative_expr
- | additive_expr '+' multiplicative_expr
- | additive_expr '-' multiplicative_expr
- ;
-
- shift_expr
- : additive_expr
- | shift_expr LEFT_OP additive_expr
- | shift_expr RIGHT_OP additive_expr
- ;
-
- relational_expr
- : shift_expr
- | relational_expr '<' shift_expr
- | relational_expr '>' shift_expr
- | relational_expr LE_OP shift_expr
- | relational_expr GE_OP shift_expr
- ;
-
- equality_expr
- : relational_expr
- | equality_expr EQ_OP relational_expr
- | equality_expr NE_OP relational_expr
- ;
-
- and_expr
- : equality_expr
- | and_expr '&' equality_expr
- ;
-
- exclusive_or_expr
- : and_expr
- | exclusive_or_expr '^' and_expr
- ;
-
- inclusive_or_expr
- : exclusive_or_expr
- | inclusive_or_expr '|' exclusive_or_expr
- ;
-
- logical_and_expr
- : inclusive_or_expr
- | logical_and_expr AND_OP inclusive_or_expr
- ;
-
- logical_or_expr
- : logical_and_expr
- | logical_or_expr OR_OP logical_and_expr
- ;
-
- conditional_expr
- : logical_or_expr
- | logical_or_expr '?' expr ':' conditional_expr
- ;
-
- assignment_expr
- : conditional_expr
- | unary_expr assignment_operator assignment_expr
- ;
-
- assignment_operator
- : '='
- | MUL_ASSIGN
- | DIV_ASSIGN
- | MOD_ASSIGN
- | ADD_ASSIGN
- | SUB_ASSIGN
- | LEFT_ASSIGN
- | RIGHT_ASSIGN
- | AND_ASSIGN
- | XOR_ASSIGN
- | OR_ASSIGN
- ;
-
- expr
- : assignment_expr
- | expr ',' assignment_expr
- ;
-
- constant_expr
- : conditional_expr
- ;
-
- declaration
- : declaration_specifiers ';'
- {reset_in_typedef(); set_typedef_recognition(); }
- | declaration_specifiers init_declarator_list ';'
- {reset_in_typedef(); set_typedef_recognition(); }
- ;
-
- declaration_specifiers
- : storage_class_specifier
- | storage_class_specifier declaration_specifiers
- | type_specifier
- | type_specifier declaration_specifiers
- | type_qualifier
- | type_qualifier declaration_specifiers
- ;
-
- init_declarator_list
- : init_declarator
- | init_declarator_list ',' init_declarator
- ;
-
- init_declarator
- : declarator
- | declarator '=' initializer
- ;
-
- storage_class_specifier
- : TYPEDEF { set_in_typedef(); }
- | EXTERN
- | STATIC
- | AUTO
- | REGISTER
- ;
-
- type_specifier
- : { reset_typedef_recognition(); } type_specifier2
- ;
-
- type_specifier2
- : VOID
- | CHAR
- | SHORT
- | INT
- | LONG
- | FLOAT
- | DOUBLE
- | SIGNED
- | UNSIGNED
- | struct_or_union_specifier
- | enum_specifier
- | typedef_name
- ;
-
- struct_or_union_specifier
- : struct_or_union identifier struct_body
- | struct_or_union struct_body
- | struct_or_union identifier
- ;
-
- struct_body
- : { push_in_memberlist();
- push_in_typedef();
- set_in_memberlist();
- reset_in_typedef();
- set_typedef_recognition(); }
- '{' struct_declaration_list '}'
- { reset_typedef_recognition();
- pop_in_typedef();
- pop_in_memberlist(); }
-
- struct_or_union
- : STRUCT { reset_typedef_recognition(); }
- | UNION { reset_typedef_recognition(); }
- ;
-
- struct_declaration_list
- : struct_declaration
- | struct_declaration_list struct_declaration
- ;
-
- struct_declaration
- : { set_typedef_recognition(); }
- struct_declaration2
- ;
-
- struct_declaration2
- : specifier_qualifier_list struct_declarator_list ';'
- ;
-
- specifier_qualifier_list
- : type_specifier
- | type_specifier specifier_qualifier_list
- | type_qualifier
- | type_qualifier specifier_qualifier_list
- ;
-
- struct_declarator_list
- : struct_declarator
- | struct_declarator_list ',' struct_declarator
- ;
-
- struct_declarator
- : declarator
- | ':' constant_expr
- | declarator ':' constant_expr
- ;
-
- enum_specifier
- : enum_head '{' enumerator_list '}'
- | enum_head identifier '{' enumerator_list '}'
- | enum_head identifier
- ;
-
- enum_head
- : ENUM { reset_typedef_recognition(); }
- ;
-
- enumerator_list
- : enumerator
- | enumerator_list ',' enumerator
- ;
-
- enumerator
- : identifier
- | identifier '=' constant_expr
- ;
-
- type_qualifier
- : CONST
- | VOLATILE
- ;
-
- declarator
- : direct_declarator
- | pointer direct_declarator
- ;
-
- direct_declarator
- : identifier { enter_tdname (yytext, yyleng); }
- | '(' declarator ')'
- | direct_declarator '[' ']'
- | direct_declarator '[' constant_expr ']'
- | direct_declarator '(' ')'
- | direct_declarator '(' parameter_type_list ')'
- | direct_declarator '(' identifier_list ')'
- ;
-
- pointer
- : '*'
- | '*' type_qualifier_list
- | '*' pointer
- | '*' type_qualifier_list pointer
- ;
-
- type_qualifier_list
- : type_qualifier
- | type_qualifier_list type_qualifier
- ;
-
- identifier_list
- : identifier
- | identifier_list ',' identifier
- ;
-
- parameter_type_list
- : { push_in_typedef();
- set_typedef_recognition();
- reset_in_typedef(); }
- parameter_type_list2
- { pop_in_typedef();
- reset_typedef_recognition(); }
-
- parameter_type_list2
- : parameter_list
- | parameter_list ',' ELLIPSIS
- ;
-
- parameter_list
- : parameter_declaration
- | parameter_list ',' parameter_declaration
- ;
-
- parameter_declaration
- : declaration_specifiers declarator
- | declaration_specifiers
- | declaration_specifiers abstract_declarator
- ;
-
- type_name
- : specifier_qualifier_list
- | specifier_qualifier_list abstract_declarator
- ;
-
- abstract_declarator
- : pointer
- | direct_abstract_declarator
- | pointer direct_abstract_declarator
- ;
-
- direct_abstract_declarator
- : '(' abstract_declarator ')'
- | '[' ']'
- | '[' constant_expr ']'
- | direct_abstract_declarator '[' ']'
- | direct_abstract_declarator '[' constant_expr ']'
- | '(' ')'
- | '(' parameter_type_list ')'
- | direct_abstract_declarator '(' ')'
- | direct_abstract_declarator '(' parameter_type_list ')'
- ;
-
- typedef_name
- : TYPE_NAME
- ;
-
- initializer
- : assignment_expr
- | '{' initializer_list '}'
- | '{' initializer_list ',' '}'
- ;
-
- initializer_list
- : initializer
- | initializer_list ',' initializer
- ;
-
- statement
- : labeled_statement
- | compound_statement
- | expression_statement
- | selection_statement
- | iteration_statement
- | jump_statement
- ;
-
- labeled_statement
- : identifier ':' statement
- | CASE constant_expr ':' statement
- | DEFAULT ':' statement
- ;
-
- compound_statement
- : '{' cs_decl_list cs_stmt_list '}'
- ;
-
- cs_decl_list
- : { enter_TD_scope(); }
- declaration_list
- { exit_TD_scope(); }
- |
- ;
-
- cs_stmt_list
- : { reset_typedef_recognition(); }
- statement_list
- |
- ;
-
- declaration_list
- : declaration
- | declaration_list declaration
- ;
-
- statement_list
- : statement
- | statement_list statement
- ;
-
- expression_statement
- : ';'
- | expr ';'
- ;
-
- selection_statement
- : IF '(' expr ')' statement
- | IF '(' expr ')' statement ELSE statement
- | SWITCH '(' expr ')' statement
- ;
-
- iteration_statement
- : WHILE '(' expr ')' statement
- | DO statement WHILE '(' expr ')' ';'
- | FOR '(' ';' ';' ')' statement
- | FOR '(' ';' ';' expr ')' statement
- | FOR '(' ';' expr ';' ')' statement
- | FOR '(' ';' expr ';' expr ')' statement
- | FOR '(' expr ';' ';' ')' statement
- | FOR '(' expr ';' ';' expr ')' statement
- | FOR '(' expr ';' expr ';' ')' statement
- | FOR '(' expr ';' expr ';' expr ')' statement
- ;
-
- jump_statement
- : GOTO identifier ';'
- | CONTINUE ';'
- | BREAK ';'
- | RETURN ';'
- | RETURN expr ';'
- ;
-
- translation_unit
- : external_declaration
- | translation_unit external_declaration
- ;
-
- external_declaration
- : { set_typedef_recognition(); reset_in_typedef(); }
- external_declaration2
- ;
-
- external_declaration2
- : function_definition
- | declaration
- ;
-
- function_definition
- : declarator function_body
- | declaration_specifiers declarator function_body
- ;
-
- function_body
- : compound_statement
- | declaration_list compound_statement
- ;
-
- identifier
- : IDENTIFIER
- ;
- %%
-
- #include <stdio.h>
-
- extern int yycolumn, yylineno;
- extern unsigned char yyfilename[];
-
- yyerror(s)
- char *s;
- {
- fflush(stdout);
- if (input_echo) {
- printf("\n%*s\n", yycolumn, "^");
- }
- printf ("%s, line %d: %s\n", yyfilename, yylineno, s);
- }
- @EOF
-
- chmod 664 gram.y
-
- echo x - main.c
- cat >main.c <<'@EOF'
- #include "scanaux.h"
-
- int input_echo = 0;
-
- main(argc, argv)
- int argc;
- char **argv;
- {
- int yyparse();
-
- if ((argc >=2) && (strcmp (argv[1], "-L") == 0)) {
- input_echo = 1;
- }
- init_scanner();
- return(yyparse());
- }
- @EOF
-
- chmod 664 main.c
-
- echo x - misctypes.h
- cat >misctypes.h <<'@EOF'
- #define TRUE 1
- #define FALSE 0
- @EOF
-
- chmod 664 misctypes.h
-
- echo x - scan.l
- cat >scan.l <<'@EOF'
- D [0-9]
- L [a-zA-Z_]
- H [a-fA-F0-9]
- E [Ee][+-]?{D}+
- FS (f|F|l|L)
- IS (u|U|l|L)*
-
- %{
- #include <stdio.h>
- #include "scanaux.h"
- #include "y.tab.h"
-
- #undef input
- extern int input();
-
- unsigned char yyfilename[256] = "stdin";
- %}
-
- %%
-
- "#" { line_number(); }
- "/*" { comment(); }
-
- "auto" { return(AUTO); }
- "break" { return(BREAK); }
- "case" { return(CASE); }
- "char" { return(CHAR); }
- "const" { return(CONST); }
- "continue" { return(CONTINUE); }
- "default" { return(DEFAULT); }
- "do" { return(DO); }
- "double" { return(DOUBLE); }
- "else" { return(ELSE); }
- "enum" { return(ENUM); }
- "extern" { return(EXTERN); }
- "float" { return(FLOAT); }
- "for" { return(FOR); }
- "goto" { return(GOTO); }
- "if" { return(IF); }
- "int" { return(INT); }
- "long" { return(LONG); }
- "register" { return(REGISTER); }
- "return" { return(RETURN); }
- "short" { return(SHORT); }
- "signed" { return(SIGNED); }
- "sizeof" { return(SIZEOF); }
- "static" { return(STATIC); }
- "struct" { return(STRUCT); }
- "switch" { return(SWITCH); }
- "typedef" { return(TYPEDEF); }
- "union" { return(UNION); }
- "unsigned" { return(UNSIGNED); }
- "void" { return(VOID); }
- "volatile" { return(VOLATILE); }
- "while" { return(WHILE); }
-
- {L}({L}|{D})* { return(check_type()); }
-
- 0[xX]{H}+{IS}? { return(CONSTANT); }
- 0[xX]{H}+{IS}? { return(CONSTANT); }
- 0{D}+{IS}? { return(CONSTANT); }
- 0{D}+{IS}? { return(CONSTANT); }
- {D}+{IS}? { return(CONSTANT); }
- {D}+{IS}? { return(CONSTANT); }
- '(\\.|[^\\'])+' { return(CONSTANT); }
-
- {D}+{E}{FS}? { return(CONSTANT); }
- {D}*"."{D}+({E})?{FS}? { return(CONSTANT); }
- {D}+"."{D}*({E})?{FS}? { return(CONSTANT); }
-
- \"(\\.|[^\\"])*\" { return(STRING_LITERAL); }
-
- "..." { return(ELLIPSIS); }
- ">>=" { return(RIGHT_ASSIGN); }
- "<<=" { return(LEFT_ASSIGN); }
- "+=" { return(ADD_ASSIGN); }
- "-=" { return(SUB_ASSIGN); }
- "*=" { return(MUL_ASSIGN); }
- "/=" { return(DIV_ASSIGN); }
- "%=" { return(MOD_ASSIGN); }
- "&=" { return(AND_ASSIGN); }
- "^=" { return(XOR_ASSIGN); }
- "|=" { return(OR_ASSIGN); }
- ">>" { return(RIGHT_OP); }
- "<<" { return(LEFT_OP); }
- "++" { return(INC_OP); }
- "--" { return(DEC_OP); }
- "->" { return(PTR_OP); }
- "&&" { return(AND_OP); }
- "||" { return(OR_OP); }
- "<=" { return(LE_OP); }
- ">=" { return(GE_OP); }
- "==" { return(EQ_OP); }
- "!=" { return(NE_OP); }
- ";" { return(';'); }
- "{" { return('{'); }
- "}" { return('}'); }
- "," { return(','); }
- ":" { return(':'); }
- "=" { return('='); }
- "(" { return('('); }
- ")" { return(')'); }
- "[" { return('['); }
- "]" { return(']'); }
- "." { return('.'); }
- "&" { return('&'); }
- "!" { return('!'); }
- "~" { return('~'); }
- "-" { return('-'); }
- "+" { return('+'); }
- "*" { return('*'); }
- "/" { return('/'); }
- "%" { return('%'); }
- "<" { return('<'); }
- ">" { return('>'); }
- "^" { return('^'); }
- "|" { return('|'); }
- "?" { return('?'); }
-
- [ \t\v\n\f] { }
- . { /* ignore bad characters */ }
-
- %%
-
- int yycolumn = 0;
-
- yywrap()
- {
- return(1);
- }
-
- int input()
- {
- if (yysptr > yysbuf) {
- /* retrieve pushed-back character */
- yytchar = *--yysptr;
- } else {
- yytchar = getc(yyin);
- if (yytchar == EOF) {
- return 0;
- } else if (input_echo) {
- output(yytchar);
- }
- }
-
- /* count yycolumn and yylineno */
- if (yytchar == '\n') {
- yylineno++;
- yycolumn = 0;
- } else if (yytchar == '\t') {
- yycolumn += 8 - (yycolumn % 8);
- } else {
- yycolumn++;
- }
-
- return yytchar;
- }
-
- comment()
- {
- char c, c1;
-
- loop:
- /* we have already seen a / and a * */
- while ((c = input()) != '*' && c != 0) /* NOTHING */;
-
- if (c != 0 && (c1 = input()) != '/' )
- {
- unput(c1);
- goto loop;
- }
- }
-
- #define READWHILE(cond) while(cond) c = input();
- line_number()
- {
- char c;
- /* skip spaces */
- c = input();
- READWHILE ((c == ' ' || c == '\t'));
-
- if (c >= '0' && c <= '9') {
- /* line number specification */
- int line_num = 0;
- while (c >= '0' && c <= '9') {
- line_num = line_num * 10 + c - '0';
- c = input();
- }
- if (line_num > 0)
- yylineno = line_num - 1;
- READWHILE ((c == ' ' || c == '\t'));
- if (c == '"') {
- unsigned char *yf = yyfilename;
- do {
- *yf++ = c;
- c = input();
- } while (c != '"');
- *yf++ = c;
- *yf = '\0';
- }
- }
-
- /* flush rest of line */
- READWHILE ((c != '\n'));
- }
-
- int check_type()
- {
-
- if (lookup_tdname(yytext, yyleng))
- return (TYPE_NAME);
- else
- return (IDENTIFIER);
- }
- @EOF
-
- chmod 664 scan.l
-
- echo x - scanaux.c
- cat >scanaux.c <<'@EOF'
- #include <assert.h>
- #include <stdio.h>
- #include "misctypes.h"
-
- #define TYPEDEF_UNKNOWN -1
- #define TYPEDEF_FALSE 0
- #define TYPEDEF_TRUE 1
-
- static int in_typedef = FALSE;
- static int typedef_recognition = TRUE;
- static int in_memberlist = FALSE;
-
- extern char *malloc();
- extern char *realloc();
- extern char *calloc();
-
- /* TSS types */
-
- #define TSS_INCR 16
- static struct typedef_state_stack {
- int TOS;
- int MAX;
- int *values;
- } TSS, RDS;
- /*
- * TSS is used to push values of state variables like in_typedef and
- * typedef_recognition
- *
- * RDS is used to keep track of identifiers re-defined in inner scopes.
- */
-
- /* Typedef Table types */
- static struct TypedefTable {
- char **tab;
- int cur;
- int max;
- } TDT;
-
- /* ID Hash Tbl types */
-
- #define IDHASH_INCR 32
- #define HASHSIZE 509
-
- struct hashbucket {
- int nxt_entry;
- int max_entry;
- int entry[1];
- };
-
- static struct hashbucket *HTBL [HASHSIZE]; /* hopefully zeros? */
-
- /* Char pool */
-
- #define CHARBLOCKSIZE 1024
- struct charpool_block {
- int next_ch;
- char chars[CHARBLOCKSIZE];
- };
-
- #define CHARPOOL_INCR 128
- static struct charpool {
- struct charpool_block **char_pool;
- int maxind;
- int curind;
- } CP;
-
- static char *recalloc (ptr, oldnumelems, newnumelems, elemsize)
- char *ptr;
- int oldnumelems;
- int newnumelems;
- int elemsize;
- {
- char *t = calloc (newnumelems, elemsize);
- memcpy (t, ptr, oldnumelems * elemsize);
- free (ptr);
- return t;
- }
-
- void init_scanner()
- {
- TSS.TOS = -1;
- TSS.MAX = TSS_INCR;
- TSS.values = (int *)malloc (TSS_INCR * sizeof(int *));
-
- RDS.TOS = -1;
- RDS.MAX = TSS_INCR;
- RDS.values = (int *)malloc (TSS_INCR * sizeof(int *));
-
- CP.char_pool = (struct charpool_block **)
- malloc (CHARPOOL_INCR * sizeof (struct charpool_block *));
- CP.char_pool[0] = (struct charpool_block *)
- malloc (sizeof (struct charpool_block));
- CP.char_pool[0]->next_ch = 0;
- CP.maxind = CHARPOOL_INCR;
- CP.curind = 0;
-
- TDT.tab = (char **) calloc (CHARPOOL_INCR, sizeof (char *));
- TDT.cur = -1;
- TDT.max = CHARPOOL_INCR;
- }
-
- static void push_TSS (val)
- int val;
- {
- if (++TSS.TOS > TSS.MAX) {
- TSS.MAX += TSS_INCR;
- TSS.values = (int *) realloc (TSS.values, TSS.MAX * sizeof(int *));
- if (! TSS.values) {
- fprintf (stderr, "realloc failed in push_TSS\n");
- exit(1);
- }
- }
- TSS.values[TSS.TOS] = val;
- }
-
- static int pop_TSS ()
- {
- if (TSS.TOS < 0) {
- fprintf (stderr, "TSS underflow\n");
- exit(1);
- }
- return (TSS.values[TSS.TOS--]);
- }
-
- static void push_RDS (val)
- int val;
- {
- if (++RDS.TOS > RDS.MAX) {
- RDS.MAX += TSS_INCR;
- RDS.values = (int *) realloc (RDS.values, RDS.MAX * sizeof(int *));
- if (! RDS.values) {
- fprintf (stderr, "realloc failed in push_RDS\n");
- exit(1);
- }
- }
- RDS.values[RDS.TOS] = val;
- }
-
- static int pop_RDS ()
- {
- if (RDS.TOS < 0) {
- fprintf (stderr, "RDS underflow\n");
- exit(1);
- }
- return (RDS.values[RDS.TOS--]);
- }
-
- int IDhash (text, leng)
- char *text;
- int leng;
- {
- short sum = 0, temp;
-
- while (leng > 0) {
- temp = *text++;
- temp <<= 8;
- temp |= *text++;
- leng -= 2;
- sum ^= temp;
- }
- return (sum %= HASHSIZE);
- }
-
- static int add_charpool (text, leng)
- {
- int start;
- struct charpool_block *tcpb;
-
- if ((CHARBLOCKSIZE - CP.char_pool[CP.curind]->next_ch) < (leng+1)) {
- if (CP.curind++ > CP.maxind) {
- CP.maxind += CHARPOOL_INCR;
- CP.char_pool = (struct charpool_block **) realloc (CP.char_pool,
- CP.maxind * sizeof (struct charpool_block *));
- }
- CP.char_pool[CP.curind] = (struct charpool_block *)
- malloc (sizeof (struct charpool_block));
- CP.char_pool[CP.curind]->next_ch = 0;
- }
-
- tcpb = CP.char_pool[CP.curind];
- start = CP.curind * CHARBLOCKSIZE + tcpb->next_ch;
- memcpy (tcpb->chars + tcpb->next_ch, text, leng);
- tcpb->next_ch += (leng + 1);
- tcpb->chars[tcpb->next_ch - 1] = '\0';
- return start;
- }
-
- static int cpcmp (text, leng, index)
- char *text;
- int leng;
- int index;
- {
- int high = index / CHARBLOCKSIZE;
- int low = index % CHARBLOCKSIZE;
- char *start = CP.char_pool[high]->chars + low;
-
- return (memcmp (text, start, leng));
- }
-
- static int enterIDhash (text, leng)
- char *text;
- int leng;
- {
- int i;
- int hval = IDhash (text, leng);
- struct hashbucket *htmp;
-
- /* search in hash tbl */
- if (!HTBL[hval]) {
- HTBL[hval] = (struct hashbucket *)
- malloc ((IDHASH_INCR + 2) * sizeof(int));
- HTBL[hval]->nxt_entry = 0;
- HTBL[hval]->max_entry = IDHASH_INCR;
- }
-
- htmp = HTBL[hval];
- for (i = 0; i < htmp->nxt_entry; i++) {
- if (!cpcmp (text, leng, htmp->entry[i])) {
- return htmp->entry[i];
- }
- }
-
- if (htmp->nxt_entry > htmp->max_entry) {
- htmp->max_entry += IDHASH_INCR;
- HTBL[hval] = (struct hashbucket *)
- realloc (htmp, (htmp->max_entry + 2) * sizeof(int));
- htmp = HTBL[hval];
- }
- htmp->entry[htmp->nxt_entry++] = add_charpool (text, leng);
- }
-
- static void set_typedef (index, val)
- int index;
- int val;
- {
- int low, high;
-
- high = index / CHARBLOCKSIZE;
- low = index % CHARBLOCKSIZE;
-
- if (high > TDT.cur) {
- assert (high == (TDT.cur + 1));
- if (++TDT.cur > TDT.max) {
- TDT.tab = (char **) recalloc (TDT.tab, TDT.max,
- TDT.max+CHARPOOL_INCR, sizeof (char *));
- TDT.max += CHARPOOL_INCR;
- }
- }
- if (! TDT.tab[high]) {
- TDT.tab[high] = (char *) malloc (CHARBLOCKSIZE * sizeof(char));
- memset (TDT.tab[high], TYPEDEF_UNKNOWN, CHARBLOCKSIZE);
- }
-
- TDT.tab[high][low] = val;
- }
-
- static int lookup_typedef (index)
- int index;
- {
- int low, high;
-
- high = index / CHARBLOCKSIZE;
- low = index % CHARBLOCKSIZE;
- return (TDT.tab[high] ? TDT.tab[high][low] : TYPEDEF_UNKNOWN);
- }
-
- int lookup_tdname(text, leng)
- char *text;
- int leng;
- {
- int IDindex;
-
- if (typedef_recognition) {
- IDindex = enterIDhash (text, leng);
- return (lookup_typedef(IDindex) == TYPEDEF_TRUE);
- } else {
- return FALSE;
- }
- }
-
- void enter_tdname(text, leng)
- char *text;
- int leng;
- {
- int IDindex, oldval;
-
- IDindex = enterIDhash (text, leng);
- oldval = lookup_typedef (IDindex);
- if (in_typedef) {
- if (RDS.TOS >= 0 && oldval == TYPEDEF_FALSE)
- push_RDS (IDindex);
- set_typedef (IDindex, TYPEDEF_TRUE);
- } else if (! in_memberlist) {
- if (RDS.TOS >= 0 && oldval == TYPEDEF_TRUE)
- push_RDS (IDindex);
- set_typedef (IDindex, TYPEDEF_FALSE);
- }
- }
-
- void enter_TD_scope()
- {
- push_RDS (-1);
- }
-
- void exit_TD_scope()
- {
- int k;
-
- while ((k = pop_RDS()) != -1) {
- if (lookup_typedef(k)) {
- set_typedef (k, FALSE);
- } else {
- set_typedef (k, TRUE);
- }
- }
- }
-
- void set_in_memberlist()
- {
- in_memberlist = TRUE;
- }
-
- void reset_in_memberlist()
- {
- in_memberlist = FALSE;
- }
-
- void push_in_memberlist()
- {
- push_TSS(in_memberlist);
- }
-
- void pop_in_memberlist()
- {
- in_memberlist = pop_TSS();
- }
-
- void set_in_typedef()
- {
- in_typedef = TRUE;
- }
-
- void reset_in_typedef()
- {
- in_typedef = FALSE;
- }
-
- void push_in_typedef()
- {
- push_TSS(in_typedef);
- }
-
- void pop_in_typedef()
- {
- in_typedef = pop_TSS();
- }
-
- void set_typedef_recognition()
- {
- typedef_recognition = TRUE;
- }
-
- void reset_typedef_recognition()
- {
- typedef_recognition = FALSE;
- }
- @EOF
-
- chmod 664 scanaux.c
-
- echo x - scanaux.h
- cat >scanaux.h <<'@EOF'
- void init_scanner();
- void push_TSS ();
- int pop_TSS ();
-
- extern void set_in_memberlist(),
- reset_in_memberlist(),
- push_in_memberlist(),
- pop_in_memberlist(),
- set_in_typedef(),
- reset_in_typedef(),
- push_in_typedef(),
- pop_in_typedef(),
- set_typedef_recognition(),
- reset_typedef_recognition();
-
- extern int lookup_tdname();
-
- extern int input_echo;
- @EOF
-
- chmod 664 scanaux.h
-
- exit 0
-
- -----
- Shankar Unni E-Mail:
- Hewlett-Packard California Language Lab. Internet: shankar@hpda.hp.com
- Phone : (408) 447-5797 UUCP: ...!hplabs!hpda!shankar
-
- DISCLAIMER:
- This response does not represent the official position of, or statement by,
- the Hewlett-Packard Company. The above data is provided for informational
- purposes only. It is supplied without warranty of any kind.
-
-