home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Source Code 1994 March
/
Source_Code_CD-ROM_Walnut_Creek_March_1994.iso
/
netsrcs
/
cplspls.txt
< prev
next >
Wrap
Internet Message Format
|
1987-01-15
|
32KB
From nmyers@mntgfx.MENTOR.COM Wed Jan 14 17:35:26 1987
Path: beno!seismo!husc6!panda!genrad!decvax!tektronix!sequent!mntgfx!nmyers
From: nmyers@mntgfx.MENTOR.COM (Nathan Myers)
Newsgroups: net.sources
Subject: C++ Lexical Scanner (in C++)
Keywords: object parse compiler cfront
Message-ID: <431@mntgfx.MENTOR.COM>
Date: 14 Jan 87 22:35:26 GMT
Organization: Mentor Graphics, Beaverton OR
Lines: 1054
C++ Lexical Scanner (in C++)
Following this note is a shar-format file containing a
lexical scanner for C++. Use it however you like. It
should be adequate to drop into a full compiler.
For such a use, error reporting would probably need
some refinement. Also, if the compiler was to have
a built-in preprocessor (i.e. not using cpp) the handler
for "#" lines would need a bit of work.
I will be interested in bug reports, significant improvements.
Nathan Myers tektronix!sequent!mntgfx!nmyers
Share and enjoy!
-------------------- cut here -----------------------------
#! /bin/sh
# This is a shar format file. Extract with sh, not csh.
#
echo "x - read.me"
sed -e 's/^_ //' >read.me <<'%%%EOF%%%'
_
_ The following source files are included:
_
_ clex.c
_ clex.h
_ clex_sym.h
_ clex_test.c
_ kwhash.c
_ Makefile
_
_ They implement a self-contained lexical scanner class for C++.
_ It is extensible by derivation primarily in the area of
_ processing "#" compiler directives (currently, it only
_ interprets the "#line" construct).
_
_ It has one other degree of flexibility, in its handling
_ of bracket-enclosed expressions "[]". These may be treated
_ as a normal sequence of tokens or as delimited strings;
_ the former is of greater use in a traditional parser, while
_ the latter is favored for extraction of declarations by
_ a code browser.
_
_ To allay some confusion, I should point out here that
_ clex_sym.h is used in an unusual way: it is included
_ twice in the module clex.c; once for declaration
_ part, once for the (static) definition part. It is
_ built this way to keep all knowledge of keywords
_ in a single place.
_
_ The file kwhash.c is a standard C standalone program used
_ to arrive at the collision-free hash function used to
_ recognize C++ keywords. Any new keyword stands about
_ one chance in 3 of colliding with an existing keyword,
_ thus requiring that a new hash function be generated.
_
_ The file clex_test.c compiles to a program which reads
_ C or C++ code from standard input and emits token names
_ on the standard output. Try it with different values
_ in the constructor's second argument.
%%%EOF%%%
echo "x - Makefile"
sed -e 's/^_ //' >Makefile <<'%%%EOF%%%'
_ # Makefile for clex/cparse
_ INCLUDE=/usr/local/include/CC
_ CCC=/user/local/CC
_ CCOPTS= -g -O
_ # -g: include debug info -O: optimize
_
_ all: kwhash clex_test
_
_ kwhash: kwhash.c /usr/include/stdio.h
_ /bin/cc ${CCOPTS} -o kwhash kwhash.c
_
_ clex_test: clex_test.o clex.o
_ ${CCC} -g -o clex_test clex_test.o clex.o
_
_ .c.o:
_ /usr/lib/cpp -I${INCLUDE} -Dc_plusplus=1 $*.c >$*.cpp
_ /user/mentor/local/cfront +L +f$*.c <$*.cpp >$*..c && /bin/rm $*.cpp
_ /bin/cc -c ${CCOPTS} $*..c && /bin/mv $*..o $*.o
_
_ clex.o: ${INCLUDE}/stdio.h \
_ ${INCLUDE}/stream.h \
_ ${INCLUDE}/string.h \
_ ${INCLUDE}/stdlib.h \
_ ${INCLUDE}/ctype.h \
_ ${INCLUDE}/assert.h \
_ clex.h clex_sym.h
_
_ clex_test.o: ${INCLUDE}/stdio.h \
_ clex.h clex_sym.h
_
%%%EOF%%%
echo "x - clex.h"
sed -e 's/^_ //' >clex.h <<'%%%EOF%%%'
_
_
_ #ifndef INCLUDED_CLEX
_ #define INCLUDED_CLEX 1
_
_ #ifndef INCLUDED_STDIO
_ #include <stdio.h>
_ #endif
_
_ enum Boolean { FALSE, TRUE };
_
_ #include "clex_sym.h"
_
_ class Clex
_ {
_ friend class Cparse;
_ enum Clex_mode
_ { CL_NONE=0, CL_COMMENT=1, CL_QUOTE=2, CL_POUND=4, CL_BRACK=8 };
_
_ protected:
_ short look; // a one-char lookahead
_ FILE* fp;
_ Boolean block_brack; // if TRUE, treat contents of "[]" as a string
_ long line_num; // line number in original source file
_ char filename[256]; // name of original source file
_ short bufsiz; // number of chars currently in buf
_ char buf[256];
_
_ void eat_one() { look = short(getc(fp)); }
_ void put_in_buf(char c) { if (bufsiz < sizeof(buf)-1) buf[bufsiz++] = c; }
_ void buf_one() { put_in_buf(look); eat_one(); }
_ Clex_sym terminate(Clex_sym s) { buf[bufsiz] = '\0'; return s; }
_ Clex_sym eat_return(Clex_sym);
_ Clex_sym num(char);
_ Clex_sym ident(char);
_ Clex_sym lbrack(Clex_mode);
_ Clex_sym quote(char, Clex_sym, Clex_mode);
_ void block_comment(Clex_mode);
_ void line_comment();
_ void eoln(Clex_mode);
_
_ virtual Boolean pound(Clex_mode, char*, short len);
_
_ public:
_ Clex_sym next();
_ const char* str() { return buf; }
_ short strlen() { return bufsiz; }
_ long line_no() { return line_num; }
_ const char* fname() { return filename; }
_ const char* debug(Clex_sym);
_
_ Clex(FILE*, Boolean block_brack);
_ };
_
_ #endif
_
%%%EOF%%%
echo "x - clex_sym.h"
sed -e 's/^_ //' >clex_sym.h <<'%%%EOF%%%'
_
_ /* clex_sym.h:
_ // This file defines both an enum {} type named "symbol", and
_ // a variable sym_str[] with string representations of the
_ // symbols. It is intended to maintain an exact
_ // correspondence between array entries and symbol values.
_ */
_
_ /*
_ This file is #include'd twice: once for the enum
_ (with CLEX_IMPLEMENTATION turned off) and once for
_ the array initialization (with it turned on). The
_ lower-numbered symbols have uppercase name strings,
_ but the keyword symbol strings are stored separately.
_
_ If a keyword is to be added, add it first to the
_ standalone program kwhash.c and generate a new
_ perfect hash function for the new set. Then add
_ it to both places below and modify the hash function
_ and table size in clex.c.
_ */
_
_ #ifndef CLEX_IMPLEMENTATION
_
_ #define CLEX_S(sym) sym
_ #define CLEX_S2(sym1,sym2) sym1
_ enum Clex_sym
_ {
_
_ #else /* CLEX_IMPLEMENTATION */
_
_ #undef CLEX_S
_ #undef CLEX_S2
_ #define CLEX_S(sym) "sym"
_ #define CLEX_S2(sym1,sym2) sym2
_ static char* sym_str[] =
_ {
_
_ #endif /* CLEX_IMPLEMENTATION */
_
_ CLEX_S(NONE_S = 0), /* should never get this */
_
_ CLEX_S(ERROR_S),
_ CLEX_S( ERROR_EOLN_S),
_ CLEX_S( ERROR_EOF_S),
_ CLEX_S( ERROR_UNKN_S),
_ #ifndef CLEX_IMPLEMENTATION
_ CLEX_S(ERROR_MAX = ERROR_UNKN_S),
_ #endif
_
_ CLEX_S(EOF_S),
_ CLEX_S(EOLN_S), // \n
_
_ CLEX_S(BANG_S), // !
_ CLEX_S( NE_S), // !=
_ CLEX_S(QUOTE_S), // "
_ CLEX_S(POUND_S), // #
_ CLEX_S(MOD_S), // %
_ CLEX_S( MODAS_S), // %=
_ CLEX_S(AMPER_S), // &
_ CLEX_S( LAND_S), // &&
_ CLEX_S( ANDAS_S), // &=
_ CLEX_S(APOS_S), // '
_ CLEX_S(LPAR_S), // (
_ CLEX_S(RPAR_S), // )
_ CLEX_S(STAR_S), // *
_ CLEX_S( MULAS_S), // *=
_ CLEX_S(PLUS_S), // +
_ CLEX_S( INCRE_S), // ++
_ CLEX_S( ADDAS_S), // +=
_ CLEX_S(COMMA_S), // ),
_ CLEX_S(MINUS_S), // -
_ CLEX_S( DECRE_S), // --
_ CLEX_S( SUBAS_S), // -=
_ CLEX_S( DEREF_S), // ->
_ CLEX_S(DOT_S), // .
_ CLEX_S( ELLIP_S), // ...
_ CLEX_S(SLASH_S), // /
_ CLEX_S( DIVAS_S), // /=
_ CLEX_S(COLON_S), // :
_ CLEX_S( SCOPE_S), // ::
_ CLEX_S(SEMI_S), // ;
_ CLEX_S(LT_S), // <
_ CLEX_S( LE_S), // <=
_ CLEX_S( SHL_S), // <<
_ CLEX_S( SHLAS_S), // <<=
_ CLEX_S(AS_S), // =
_ CLEX_S( EQ_S), // ==
_ CLEX_S(GT_S), // >
_ CLEX_S( GE_S), // >=
_ CLEX_S( SHR_S), // >>
_ CLEX_S( SHRAS_S), // >>=
_ CLEX_S(QUEST_S), // ?
_ CLEX_S(AT_S), // @ (undefined)
_ CLEX_S(LBRACK_S), // [
_ CLEX_S(BSLASH_S), // \
_ CLEX_S(RBRACK_S), // ]
_ CLEX_S(CARET_S), // ^
_ CLEX_S( XORAS_S), // ^=
_ CLEX_S(GRAVE_S), // ` (undefined)
_ CLEX_S(LBRACE_S), // {
_ CLEX_S(VBAR_S), // |
_ CLEX_S( LOR_S), // ||
_ CLEX_S( ORAS_S), // |=
_ CLEX_S(RBRACE_S), // }
_ CLEX_S(TILDE_S), // ~
_
_ CLEX_S(IDENT_S), // a name, or string that could be a name
_ CLEX_S(NUM_S), // a numeric string
_ CLEX_S(FLOATNUM_S) // a recognizably floating-point num
_
_ #ifndef CLEX_IMPLEMENTATION
_ , CLEX_S(KEYWORD_S),
_
_ #else
_
_ };
_ static char *keywords[] =
_ {
_
_ #endif
_
_ CLEX_S2(ASM_S = KEYWORD_S, "asm"),
_ CLEX_S2(AUTO_S, "auto"),
_ CLEX_S2(BREAK_S, "break"),
_ CLEX_S2(CASE_S, "case"),
_ CLEX_S2(CHAR_S, "char"),
_ CLEX_S2(CLASS_S, "class"),
_ CLEX_S2(CONST_S, "const"),
_ CLEX_S2(CONTINUE_S, "continue"),
_ CLEX_S2(DEFAULT_S, "default"),
_ CLEX_S2(DELETE_S, "delete"),
_ CLEX_S2(DO_S, "do"),
_ CLEX_S2(DOUBLE_S, "double"),
_ CLEX_S2(ELSE_S, "else"),
_ CLEX_S2(ENUM_S, "enum"),
_ CLEX_S2(EXTERN_S, "extern"),
_ CLEX_S2(FLOAT_S, "float"),
_ CLEX_S2(FOR_S, "for"),
_ CLEX_S2(FRIEND_S, "friend"),
_ CLEX_S2(GOTO_S, "goto"),
_ CLEX_S2(IF_S, "if"),
_ CLEX_S2(INLINE_S, "inline"),
_ CLEX_S2(INT_S, "int"),
_ CLEX_S2(LONG_S, "long"),
_ CLEX_S2(NEW_S, "new"),
_ CLEX_S2(OPERATOR_S, "operator"),
_ CLEX_S2(OVERLOAD_S, "overload"),
_ CLEX_S2(PRIVATE_S, "private"),
_ CLEX_S2(PROTECTED_S,"protected"),
_ CLEX_S2(PUBLIC_S, "public"),
_ CLEX_S2(REGISTER_S, "register"),
_ CLEX_S2(RETURN_S, "return"),
_ CLEX_S2(SHORT_S, "short"),
_ CLEX_S2(SIGNED_S, "signed"),
_ CLEX_S2(SIZEOF_S, "sizeof"),
_ CLEX_S2(STATIC_S, "static"),
_ CLEX_S2(STRUCT_S, "struct"),
_ CLEX_S2(SWITCH_S, "switch"),
_ CLEX_S2(THIS_S, "this"),
_ CLEX_S2(TYPEDEF_S, "typedef"),
_ CLEX_S2(UNION_S, "union"),
_ CLEX_S2(UNSIGNED_S, "unsigned"),
_ CLEX_S2(VIRTUAL_S, "virtual"),
_ CLEX_S2(VOLATILE_S, "volatile"),
_ CLEX_S2(VOID_S, "void"),
_ CLEX_S2(WHILE_S, "while"),
_
_ CLEX_S2(END_OF_SYMBOLS_S, NULL)
_ };
_
_ #ifndef CLEX_IMPLEMENTATION
_ const CLEX_NUMKEYS = (END_OF_SYMBOLS_S - KEYWORD_S);
_ #endif
_
%%%EOF%%%
echo "x - clex.c"
sed -e 's/^_ //' >clex.c <<'%%%EOF%%%'
_
_ #ifndef INCLUDED_STREAM
_ #include <stream.h>
_ #endif
_ #ifndef INCLUDED_STRING
_ #include <string.h>
_ #endif
_ #ifndef INCLUDED_STDLIB
_ #include <stdlib.h>
_ #endif
_ #ifndef INCLUDED_ASSERT
_ #include <assert.h>
_ #endif
_ #ifndef INCLUDED_CTYPE
_ #include <ctype.h>
_ #endif
_
_ #include "clex.h"
_
_ // get string value tables, sym_str[] and keyword[] :
_ #define CLEX_IMPLEMENTATION 1
_ #include "clex_sym.h"
_
_ /******************************************************************************
_ * *
_ * KWTABLE -- keyword hash table (internal use only) *
_ * KWtable implements a collision-free hash table of C++ keywords. The *
_ * table size and hash function are computed by use of a standalone C *
_ * program, kwhash.c, included in this directory. *
_ * *
_ ******************************************************************************/
_
_ #define U_short unsigned short
_ #define U_char unsigned char
_
_ struct KWtable
_ {
_ enum { HASHSIZE = 131 }; // as computed by kwhash.c, for a=9,b=2,c=2
_
_ struct {
_ char* kwp;
_ Clex_sym sym;
_ } kwhash[HASHSIZE];
_
_ KWtable(char**);
_ U_short hash(const U_char*, U_short len);
_ void insert(char*, Clex_sym);
_ Clex_sym lookup(char*, short len);
_ };
_
_ static KWtable kwt = KWtable(keywords); // keywords[] defined in Clex_sym.h
_
_ KWtable::
_ KWtable (char** kwl)
_ {
_ short int i;
_ for (i = 0; i < HASHSIZE; ++i)
_ kwhash[i].kwp = NULL;
_ for (i = 0; i < CLEX_NUMKEYS; ++i)
_ insert(kwl[i], KEYWORD_S + i);
_ // rely on assert() to prevent hash collisions -- may need
_ // a new hash function or table size when keyword added.
_ }
_
_ // the values used in the following hash function, and HASHSIZE, were
_ // determined by use of the standalone C program kwhash.c, to
_ // ensure that no collisions occur.
_
_ inline
_ U_short KWtable::
_ hash (const U_char* cp, U_short len)
_ {
_ return (((U_short)cp[0] ) ^
_ ((U_short)cp[1] << 9) ^
_ ((U_short)cp[len-1] << 2) ^
_ (len << 2) ) % HASHSIZE;
_ }
_
_ void KWtable::
_ insert (char* cp, Clex_sym s)
_ {
_ U_short h = hash(cp, strlen(cp));
_ assert(kwt.kwhash[h].kwp == NULL); // collisions not permitted.
_ kwt.kwhash[h].kwp = cp;
_ kwt.kwhash[h].sym = s;
_ }
_
_ Clex_sym KWtable::
_ lookup (char* cp, short len)
_ {
_ if (len < 2 || len > 9) return (IDENT_S);
_ short h = hash(cp, len);
_ if (kwt.kwhash[h].kwp == NULL) return (IDENT_S);
_ if (strcmp(kwt.kwhash[h].kwp, cp)) return (IDENT_S);
_ return (kwt.kwhash[h].sym);
_ }
_
_ /******************************************************************************
_ * *
_ * CLEX -- c++ lexical scanner *
_ * *
_ ******************************************************************************/
_
_ // CONSTRUCTOR Clex:
_ // The argument block_brack, if TRUE, dictates that the contents
_ // of square brackets "[]" be returned as a string in the string
_ // buffer. If false, square brackets are treated as simple tokens.
_
_ Clex::
_ Clex (FILE* f, Boolean b)
_ {
_ fp = f;
_ block_brack = b;
_ filename[0] = '\0';
_ bufsiz = 0; buf[0] = '\0';
_ // prime the pipeline:
_ line_num = 0;
_ look = '\n'; // be prepared to handle '#' as first char
_ }
_
_ Clex_sym Clex::
_ num (char c)
_ {
_ Clex_sym s = NUM_S;
_
_ bufsiz = 0;
_ put_in_buf(c);
_ while (isdigit(look))
_ buf_one();
_
_ // hexadecimal
_ if (bufsiz == 1 && *buf == '0' && (look == 'x' || look == 'X'))
_ {
_ do { buf_one(); }
_ while (isxdigit(look));
_ if (look == 'L' || look == 'l' || look == 'U' || look == 'u')
_ buf_one();
_ return terminate(s);
_ }
_
_ // long or unsigned
_ if (look == 'L' || look == 'l' || look == 'U' || look == 'u')
_ { buf_one(); return terminate(NUM_S); }
_
_ // floating point
_ else if (look == '.')
_ {
_ s = FLOATNUM_S;
_ do { buf_one(); }
_ while (isdigit(look));
_ }
_
_ // scientific notation
_ if (look == 'e' || look == 'E')
_ {
_ s = FLOATNUM_S;
_ do { buf_one(); }
_ while (isdigit(look));
_ }
_ else
_ return terminate(s);
_
_ if (look == '+' || look == '-')
_ do { buf_one(); }
_ while (isdigit(look));
_ return terminate(s);
_ }
_
_ Clex_sym Clex::
_ ident (char first)
_ {
_ register Boolean maybe_kw = TRUE;
_ register short bs = 0;
_ buf[bs++] = first;
_ while (isalnum(look) || look == '_' || look == '$')
_ {
_ // note: this function accounts for 30% of the total scan time
_ if (maybe_kw && (isupper(look) || look == '_' ))
_ maybe_kw = FALSE;
_ buf[bs++] = look; // don't worry about overflow
_ eat_one();
_ }
_ buf[bs] = '\0';
_ bufsiz = bs;
_
_ if (maybe_kw)
_ return kwt.lookup(buf, bufsiz);
_ return IDENT_S;
_ }
_
_ Clex_sym Clex::
_ quote (char c, Clex_sym s, Clex_mode m)
_ {
_ if (m == CL_NONE)
_ bufsiz = 0;
_ while (look != c)
_ {
_ if (look == EOF)
_ { return terminate(ERROR_EOF_S); }
_ else if (look == '\n')
_ { return terminate(ERROR_EOLN_S); }
_ else if (look == '\\')
_ {
_ eat_one();
_ if (look == '\n')
_ { eat_one(); eoln(m|CL_QUOTE); continue; }
_ else if (look == EOF)
_ { return terminate(ERROR_EOF_S); }
_ else
_ put_in_buf('\\'); // this handles \' and \" too.
_ }
_ buf_one();
_ }
_ eat_one(); // eat the closing quote
_ return terminate(s);
_ }
_
_
_ // lbrack() accumulates the contents between "[" and "]" into
_ // the string buffer, handling syntactically quoted strings,
_ // comments, and nested brackets. Note that lbrack() is
_ // called recursively in the case of nested brackets.
_
_ Clex_sym Clex::
_ lbrack (Clex_mode m)
_ {
_ if (m == CL_NONE)
_ bufsiz = 0;
_ while (look != ']')
_ {
_ if (look == EOF)
_ return terminate(ERROR_EOF_S);
_
_ else if (look == '\n')
_ { eat_one(); eoln(m|CL_BRACK); }
_ else if (look == '[')
_ {
_ buf_one();
_ if (lbrack(m|CL_BRACK) == ERROR_EOF_S)
_ return ERROR_EOF_S; // already cleaned up.
_ else put_in_buf(']');
_ }
_ else if (look == '\'' || look == '"')
_ {
_ char c = look;
_ buf_one();
_ (void) quote(c, NONE_S, m|CL_BRACK);
_ put_in_buf(c);
_ }
_ else if (look == '/') // maybe a comment
_ {
_ eat_one();
_ if (look == '/')
_ line_comment();
_ else if (look == '*')
_ {
_ block_comment(m|CL_BRACK);
_ if (look == EOF) return terminate(ERROR_EOF_S);
_ }
_ else // stash the '/' and the char after
_ { put_in_buf('/'); buf_one(); }
_ }
_ else // just a character to save
_ buf_one();
_ }
_
_ eat_one(); // eat the ']'.
_ return terminate(LBRACK_S);
_ }
_
_
_ void Clex::
_ block_comment(Clex_mode m)
_ {
_ eat_one(); // eat the '*'
_ while (! (look == '*' && (eat_one(), look == '/')) )
_ {
_ if (look == EOF) return;
_ if (look == '\n') { eat_one(); eoln(m|CL_COMMENT); }
_ else if (look != '*') eat_one();
_ }
_ eat_one(); // eat the '/'
_ }
_
_ void Clex::
_ line_comment()
_ {
_ do { eat_one(); }
_ while (look != '\n' && look != EOF);
_ }
_
_ // eat_return() is intended to save space in Clex::next() -- the
_ // inline function eat_one() produces quite a lot of code.
_ Clex_sym Clex::
_ eat_return(Clex_sym s)
_ { eat_one(); return s; }
_
_ Clex_sym Clex::
_ next()
_ {
_ short val;
_ while (val = look, eat_one(), val != EOF)
_ {
_ char ch = char(val);
_ switch (ch)
_ {
_ case ' ' : continue;
_
_ case '_' :
_ case '$' : return ident(ch);
_
_ case '0' : case '1' : case '2' : case '3' : case '4' :
_ case '5' : case '6' : case '7' : case '8' : case '9' :
_ return num(ch);
_
_ case ',' : return COMMA_S;
_ case ';' : return SEMI_S;
_ case '[' : if (block_brack) return lbrack(CL_NONE);
_ else return LBRACK_S;
_ case ']' : return RBRACK_S;
_ case '{' : return LBRACE_S;
_ case '}' : return RBRACE_S;
_ case '(' : return LPAR_S;
_ case ')' : return RPAR_S;
_ case '~' : return TILDE_S;
_ case '?' : return QUEST_S;
_ case '"' : return quote(ch, QUOTE_S, CL_NONE);
_ case '\'': return quote(ch, APOS_S, CL_NONE);
_
_ case '=' : // '=', '=='
_ if (look != '=') return AS_S;
_ else return eat_return(EQ_S);
_
_ case ':' : // ":", "::"
_ if (look != ':') return COLON_S;
_ else return eat_return(SCOPE_S);
_
_ case '!' : // "!", "!="
_ if (look != '=') return BANG_S;
_ else return eat_return(NE_S);
_
_ case '^' : // "^", "^="
_ if (look != '=') return CARET_S;
_ else return eat_return(XORAS_S);
_
_ case '*' : // '*', '*='
_ if (look != '=') return STAR_S;
_ else return eat_return(MULAS_S);
_
_ case '%' : // '%', '%='
_ if (look != '=') return MOD_S;
_ else return eat_return(MODAS_S);
_
_ case '|' : // "|=", "||", "|"
_ if (look == '|') return eat_return(LOR_S);
_ else if (look == '=') return eat_return(ORAS_S);
_ else return VBAR_S;
_
_ case '&' : // "&", "&=", "&&"
_ if (look == '&') return eat_return(LAND_S);
_ else if (look == '=') return eat_return(ANDAS_S);
_ else return AMPER_S;
_
_ case '+' : // '+', '++', '+='
_ if (look == '+') return eat_return(INCRE_S);
_ else if (look == '=') return eat_return(ADDAS_S);
_ else return PLUS_S;
_
_ case '-' : // '--', '-=', '->', '-',
_ if (look == '-') return eat_return(DECRE_S);
_ else if (look == '=') return eat_return(SUBAS_S);
_ else if (look == '>') return eat_return(DEREF_S);
_ else return MINUS_S;
_
_ case '/' : // '/*', '//', '/=', '/'
_ if (look == '*')
_ {
_ block_comment(CL_NONE);
_ if (look == EOF) // almost certainly a mistake:
_ return ERROR_EOF_S;
_ else continue;
_ }
_ else if (look == '/')
_ { line_comment(); continue; }
_ else if (look == '=') return eat_return(DIVAS_S);
_ else return SLASH_S;
_
_ case '.' : // ".", "..."
_ if (isdigit(look)) return num(ch);
_ else if (look == '.')
_ {
_ eat_one(); // check for "..", undefined.
_ if (look != '.') return ERROR_UNKN_S;
_ else return eat_return(ELLIP_S);
_ }
_ else return DOT_S;
_
_ case '<' : // '<=', '<', '<<', '<<='
_ if (look == '=') return eat_return(LE_S);
_ else if (look == '<')
_ {
_ eat_one();
_ if (look != '=') return SHL_S;
_ else return eat_return(SHLAS_S);
_ }
_ else return LT_S;
_
_ case '>' : // '>=', '>', '>>', '>>='
_ if (look == '=') return eat_return(GE_S);
_ else if (look == '>')
_ {
_ eat_one();
_ if (look != '=') return SHR_S;
_ else return eat_return(SHRAS_S);
_ }
_ else return GT_S;
_
_ default:
_ if (isalpha(ch))
_ return ident(ch);
_ if (ch == '\n')
_ eoln(CL_NONE);
_ else if (iscntrl(ch))
_ continue;
_ else
_ return ERROR_UNKN_S;
_ }
_ }
_
_ return EOF_S;
_ }
_
_ struct Quickbuf
_ {
_ short len;
_ char line[10240];
_ void put_in(char c) { if (len < sizeof(line)-1) line[len++] = c; }
_ void terminate() { line[len] = '\0'; }
_ Quickbuf() { len = 0; }
_ };
_
_ void Clex::
_ eoln(Clex_mode m)
_ {
_ // assume NL character already eaten.
_ ++line_num;
_ // don't process '#' lines in quotes, comments, or '#' continuations.
_ if (m & (CL_QUOTE|CL_POUND|CL_COMMENT))
_ return;
_
_ // eat whitespace
_ while (look != EOF && look != '\n')
_ {
_ if (look == ' ' || iscntrl(char(look))) eat_one();
_ else break;
_ }
_ if (look != '#')
_ return;
_
_ // eat the '#' and subsequent whitespace
_ do { eat_one(); if (look == EOF || look == '\n') break; }
_ while (look == ' ' || iscntrl(char(look)));
_
_ // collect the '#' line
_ Quickbuf b;
_ do { // record line
_ if (look == '\\') // check for continuation line
_ {
_ eat_one();
_ if (look == '\n') { eat_one(); eoln(m|CL_POUND); }
_ else { b.put_in('\\'); }
_ }
_ else if (look == '/') // check for comment in '#' line
_ {
_ eat_one();
_ if (look == '*')
_ {
_ block_comment(m|CL_POUND);
_ if (look == EOF) break;
_ }
_ else if (look == '/') line_comment();
_ else { b.put_in('/'); }
_ }
_ else
_ {
_ if (iscntrl(char(look))) look = ' ';
_ b.put_in(look);
_ eat_one();
_ }
_
_ } while (look != '\n' && look != EOF);
_ b.terminate();
_
_ (void) pound(m, b.line, b.len); // call virtual handler
_ }
_
_ Boolean Clex::
_ pound (Clex_mode m, char* line, short len)
_ {
_ void(m); // to keep cfront blissful
_ char* cp = line;
_ if (!isdigit(*cp))
_ {
_ if (len < 5) return FALSE;
_ if (strncmp(cp, "line ", 5) != 0)
_ return FALSE; // don't know what it is
_ cp += 4;
_ while (*cp == ' ') ++cp;
_ if (!isdigit(*cp))
_ return FALSE;
_ }
_
_ // # <line> "<filename>" or #line <line> "<filename>"
_ line_num = atoi(cp) - 1; // will be incremented by eoln() later
_ while (isdigit(*cp)) ++cp;
_ while (*cp == ' ') ++cp;
_ if (*cp == '"')
_ {
_ char* cpq = cp;
_ do { ++cpq; }
_ while (*cpq != '"' && *cpq != '\0');
_ strncpy(filename, cp+1, cpq - cp - 1);
_ filename[cpq - cp - 1] = '\0';
_ }
_
_ return TRUE;
_ }
_
_ const char* Clex::
_ debug (Clex_sym s)
_ {
_ return (s >= KEYWORD_S) ? keywords[s - KEYWORD_S] : sym_str[s] ;
_ }
%%%EOF%%%
echo "x - kwhash.c"
sed -e 's/^_ //' >kwhash.c <<'%%%EOF%%%'
_
_ /* this is a C program */
_
_ #include <stdio.h>
_
_ static char *keywords[] =
_ {
_ "asm",
_ "auto",
_ "break",
_ "case",
_ "char",
_ "class",
_ "const",
_ "continue",
_ "default",
_ "delete",
_ "do",
_ "double",
_ "else",
_ "enum",
_ "extern",
_ "float",
_ "for",
_ "friend",
_ "goto",
_ "if",
_ "inline",
_ "int",
_ "long",
_ "new",
_ "operator",
_ "overload",
_ "private",
_ "protected",
_ "public",
_ "register",
_ "return",
_ "short",
_ "signed",
_ "sizeof",
_ "static",
_ "struct",
_ "switch",
_ "this",
_ "typedef",
_ "union",
_ "unsigned",
_ "virtual",
_ "volatile",
_ "void",
_ "while"
_ };
_
_ #define KW_NUMKEYS (sizeof(keywords)/sizeof(*keywords))
_
_ unsigned int hashsize = 137;
_ char** kwhash;
_ typedef unsigned short u_short;
_
_ u_short
_ hash(cp, len, a, b, c)
_ unsigned char* cp;
_ u_short len;
_ u_short a, b, c;
_ {
_ return (((u_short)cp[0] ) ^
_ ((u_short)cp[1] << a) ^
_ ((u_short)cp[len-1] << b) ^
_ (len << c) ) % hashsize;
_ }
_
_ int
_ insert(cp, a, b, c)
_ char *cp;
_ u_short a, b, c;
_ {
_ short h;
_
_ h = hash(cp, strlen(cp), a, b, c);
_ if (kwhash[h] != NULL)
_ {
_ /*
_ printf("Keyword hash collision: %s, %s\n", kwhash[h], cp);
_ */
_ return 0;
_ }
_ else
_ kwhash[h] = cp;
_ return 1;
_ }
_
_ int
_ try(a, b, c)
_ short a, b, c;
_ {
_ short int i;
_ int collisions;
_
_ collisions = 0;
_ for (i = 0; i < hashsize; ++i)
_ kwhash[i] = NULL;
_ for (i = 0; i < KW_NUMKEYS; ++i)
_ if (!insert(keywords[i], a, b, c))
_ ++collisions;
_ return collisions;
_ }
_
_ main(argc, argv)
_ int argc;
_ char **argv;
_ {
_ int min_collisions;
_ int min_abc = 0;
_ short a, b, c;
_
_ if (argc > 1) hashsize = atoi(argv[1]);
_ else
_ {
_ printf("usage: %s <hash_size>\n\t<hash_size> should be prime.\n",
_ argv[0]);
_ exit(-1);
_ }
_
_ if (hashsize < KW_NUMKEYS)
_ {
_ printf("Hash table is too small.\n");
_ exit(-1);
_ }
_
_ kwhash = (char**) malloc(hashsize * sizeof(char*));
_ min_collisions = hashsize + 1;
_ for (a = 0; a <= 10; ++a)
_ {
_ for (b = 0; b <= 10; ++b)
_ {
_ for (c = 0; c <= 10; ++c)
_ {
_ int collisions;
_
_ collisions = try(a, b, c);
_ if (collisions <= min_collisions)
_ {
_ printf("abc: %03x Collisions: %2d ",
_ ((a<<8)|(b<<4)|c), collisions);
_ min_collisions = collisions;
_ if (collisions == 0) putchar('*');
_ putchar('\n');
_ }
_ }
_ }
_ }
_ }
%%%EOF%%%
echo "x - clex_test.c"
sed -e 's/^_ //' >clex_test.c <<'%%%EOF%%%'
_
_ // clex_test -- test code for clex.o
_
_ #include "clex.h"
_
_ main()
_ {
_ Clex cl = Clex(stdin, TRUE);
_ Clex_sym s;
_ do {
_ s = cl.next();
_ printf("%5D ", cl.line_no());
_ if (s >= KEYWORD_S)
_ printf(" %s\n", cl.debug(s));
_ else if (s == IDENT_S ||
_ s == NUM_S ||
_ s == FLOATNUM_S ||
_ s == LBRACK_S ||
_ s == APOS_S ||
_ s == QUOTE_S )
_ printf( " %s \"%s\"\n", cl.debug(s), cl.str());
_ else
_ printf( " %s\n", cl.debug(s));
_ } while (s > EOF_S);
_
_ exit(0);
_ }
%%%EOF%%%
echo "Done."
exit 0