home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
OS/2 Shareware BBS: 5 Edit
/
05-Edit.zip
/
browser2.zip
/
lex.c
< prev
next >
Wrap
C/C++ Source or Header
|
1995-02-17
|
31KB
|
1,228 lines
/* EMACS CLASS BROWSER FOR C++.
Copyright (C) 1993-95
Gerd Moellmann (mmann@ibm.net, CIS:100025,3303)
$Id: lex.c,v 3.1 1995/02/17 18:20:24 mmann Exp $
This file may be made part of GNU Emacs at the option of the FSF.
This code is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY. No author or distributor
accepts responsibility to anyone for the consequences of using it
or for whether it serves any particular purpose or works at all,
unless he says so in writing. Refer to the GNU Emacs General Public
License for full details.
Everyone is granted permission to copy, modify and redistribute
this code, but only under the conditions described in the
GNU Emacs General Public License. A copy of this license is
supposed to have been given to you along with GNU Emacs so you
can know your rights and responsibilities. It should be in a
file named COPYING. Among other things, the copyright notice
and this notice must be preserved on all copies. */
#ifdef __IBMC__
#include <io.h>
#endif
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <ctype.h>
#include <assert.h>
#include "br-rev.el"
#include "lex.h"
#define USAGE "\
EBROWSE v%s\n\
%s\n\
Usage: ebrowse [options] {files}\n\
Options:\n\
-a append output\n\
-e<file> call from Emacs\n\
-f do not do expensive friends processing\n\
-i<file> read file names from file\n\
-m# set minimum regexp length\n\
-M# set maximum regexp length\n\
-n include nested classes\n\
-o<file> set output file name\n\
-r<file> write regular expressions to file\n\
-s don't record structs and unions\n\
-v be verbose\n\
-V be very verbose\n\
-x record regular expressions\n\
"
#define MAX_STRING_LENGTH 1024
#define HASH_SIZE 1001
char yytext[MAX_STRING_LENGTH];
char* yytext_end = &yytext[MAX_STRING_LENGTH];
int yyin;
FILE* yyout;
int regexp_file;
unsigned yyline;
int allow_cpp_comments = 1;
long long_val;
char* filename;
char* regexp_filename;
/* Three character class vectors. */
char is_ident[255];
char is_digit[255];
char is_white[255];
/* Commond line flags. */
int f_costly_friends = 1;
int f_append = 0;
int f_verbose = 0;
int f_very_verbose = 0;
int f_structs = 1;
int f_regexps = 0;
int f_emacs = 0;
int f_nested_classes = 0;
int min_regexp = MIN_REGEXP;
int max_regexp = MAX_REGEXP;
unsigned carriage_returns;
char* inbuffer;
char* in;
struct kw
{
char* name;
struct kw* next;
int tk;
}
* kw_hash_table[HASH_SIZE];
void
yyerror (char* format, ...)
{
va_list args;
va_start (args, format);
printf ("%s:%d: ", filename, yyline);
vprintf (format, args);
putchar ('\n');
va_end (args);
}
#ifdef PROTOTYPES
static void kw_insert (char* name, int tk);
static struct kw* kw_lookup (char* name);
static void re_init_lex (void);
static void init_lex (void);
static void usage (void);
static void process_file (char* file);
static void process_stdin (void);
#ifdef __GNUC__
extern char** glob_filename (char* path);
#endif
#else
#ifdef __GNUC__
extern char** glob_filename ();
#endif
#endif
static void
kw_insert (name, tk)
char* name;
int tk;
{
char* s;
unsigned h = 0;
struct kw* k = (struct kw*) xmalloc (sizeof *k);
for (s = name; *s; ++s)
h = (h << 1) ^ *s;
h %= HASH_SIZE;
k->name = name;
k->tk = tk;
k->next = kw_hash_table[h];
kw_hash_table[h] = k;
}
static struct kw*
kw_lookup (name)
char* name;
{
char* s;
unsigned h = 0;
struct kw* k;
for (s = name; *s; ++s)
h = (h << 1) ^ *s;
h %= HASH_SIZE;
for (k = kw_hash_table[h]; k; k = k->next)
if (!strcmp (k->name, name))
break;
return k;
}
static void
re_init_lex ()
{
in = inbuffer;
#if MSDOS || OS2
carriage_returns = 0;
#endif
yyline = 1;
}
void
init_lex ()
{
int i;
inbuffer = in = (char*) malloc (INBUFFER_SIZE+1);
yyline = 1;
for (i = 0; i < sizeof is_ident; ++i)
{
if (i == '_' || isalnum (i))
is_ident[i] = 1;
if (i >= '0' && i <= '9')
is_digit[i] = 1;
if (i == ' ' || i == '\t' || i == '\f' || i == '\v')
is_white[i] = 1;
}
kw_insert ("asm", ASM);
kw_insert ("auto", AUTO);
kw_insert ("break", BREAK);
kw_insert ("case", CASE);
kw_insert ("catch", CATCH);
kw_insert ("char", CHAR);
kw_insert ("class", CLASS);
kw_insert ("const", CONST);
kw_insert ("continue", CONTINUE);
kw_insert ("default", DEFAULT);
kw_insert ("delete", DELETE);
kw_insert ("do", DO);
kw_insert ("double", DOUBLE);
kw_insert ("else", ELSE);
kw_insert ("enum", ENUM);
kw_insert ("extern", EXTERN);
kw_insert ("float", FLOAT);
kw_insert ("for", FOR);
kw_insert ("friend", FRIEND);
kw_insert ("goto", GOTO);
kw_insert ("if", IF);
kw_insert ("inline", INLINE);
kw_insert ("int", INT);
kw_insert ("long", LONG);
kw_insert ("new", NEW);
kw_insert ("operator", OPERATOR);
kw_insert ("private", PRIVATE);
kw_insert ("protected", PROTECTED);
kw_insert ("public", PUBLIC);
kw_insert ("register", REGISTER);
kw_insert ("return", RETURN);
kw_insert ("short", SHORT);
kw_insert ("signed", SIGNED);
kw_insert ("sizeof", SIZEOF);
kw_insert ("static", STATIC);
kw_insert ("struct", STRUCT);
kw_insert ("switch", SWITCH);
kw_insert ("template", TEMPLATE);
kw_insert ("this", THIS);
kw_insert ("throw", THROW);
kw_insert ("try", TRY);
kw_insert ("typedef", TYPEDEF);
kw_insert ("union", UNION);
kw_insert ("unsigned", UNSIGNED);
kw_insert ("virtual", VIRTUAL);
kw_insert ("void", VOID);
kw_insert ("volatile", VOLATILE);
kw_insert ("while", WHILE);
}
#define get(c) (c = *in++)
#define unget(c) --in
#undef EOF
#define EOF 0
int
yylex ()
{
int c;
char end_char;
char* p;
for (;;)
{
while (is_white[get (c)])
;
switch (c)
{
case '\n':
/* Newlines */
++yyline;
break;
case '\r':
#if MSDOS || OS2
++carriage_returns;
#endif
break;
case EOF:
/* End of file */
return 0;
case '\\':
get (c);
#if MSDOS || OS2
if (c == '\r')
{
get (c);
++carriage_returns;
}
#endif
break;
case '"':
case '\'':
/* String and character constants */
p = yytext;
end_char = c;
while (get (c) != EOF && c != end_char)
{
switch (c)
{
case '\\':
/* Escape sequences. */
if (get (c) == EOF)
{
if (end_char == '\'')
yyerror ("EOF in character constant");
else
yyerror ("EOF in string constant");
goto end_string;
}
else switch (c)
{
#if MSDOS || OS2
case '\r':
get (c);
++carriage_returns;
break;
#endif
case '\n':
break;
case 'a':
*p++ = '\a';
break;
case 'b':
*p++ = '\b';
break;
case 'f':
*p++ = '\f';
break;
case 'n':
*p++ = '\n';
break;
case 'r':
*p++ = '\r';
break;
case 't':
*p++ = '\t';
break;
case 'v':
*p++ = '\v';
break;
case 'x':
{
/* Hexadecimal escape sequence. */
int i, v;
for (i = v = 0; i < 2; ++i)
{
get (c);
if (c >= '0' && c <= '7')
v = 16 * v + c - '0';
else if (c >= 'a' && c <= 'f')
v = 16 * v + c - 'a' + 10;
else if (c >= 'A' && c <= 'F')
v = 16 * v + c - 'A' + 10;
else
{
unget (c);
break;
}
}
*p = v;
}
break;
case '0':
{
/* Octal escape sequence. */
int i, v;
for (i = v = 0; i < 3; ++i)
{
get (c);
if (c >= '0' && c <= '7')
v = 8 * v + c - '0';
else
{
unget (c);
break;
}
}
*p = v;
}
break;
default:
*p++ = c;
break;
}
break;
case '\n':
if (end_char == '\'')
yyerror ("newline in character constant");
else
yyerror ("newline in string constant");
++yyline;
goto end_string;
default:
*p++ = c;
break;
}
}
end_string:
if (p > yytext_end)
{
yyerror ("string constant overflow");
exit (1);
}
return end_char == '\'' ? CCHAR : CSTRING;
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
case 'v': case 'w': case 'x': case 'y': case 'z':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
case 'V': case 'W': case 'X': case 'Y': case 'Z': case '_':
{
/* Identifier and keywords. */
unsigned hash;
struct kw* k;
p = yytext;
*p++ = hash = c;
while (is_ident[get (*p)])
hash = (hash << 1) ^ *p++;
unget (*p);
*p = 0;
for (k = kw_hash_table[hash % HASH_SIZE]; k; k = k->next)
if (!strcmp (k->name, yytext))
return k->tk;
return IDENT;
}
case '/':
/* C and C++ comments, '/' and '/=' */
switch (get (c))
{
case '*':
while (get (c) != EOF)
{
switch (c)
{
case '*':
if (get (c) == '/')
goto comment_end;
unget (c);
break;
case '\\':
get (c);
break;
case '\n':
++yyline;
break;
#if MSDOS || OS2
case '\r':
++carriage_returns;
break;
#endif
}
}
comment_end:;
break;
case '=':
return DIVASGN;
case '/':
if (allow_cpp_comments)
{
while (get (c) != EOF && c != '\n')
;
++yyline;
break;
}
default:
unget (c);
return '/';
}
break;
case '+':
if (get (c) == '+')
return INC;
else if (c == '=')
return ADDASGN;
unget (c);
return '+';
case '-':
switch (get (c))
{
case '-':
return DEC;
case '>':
if (get (c) == '*')
return ARROWSTAR;
unget (c);
return ARROW;
case '=':
return SUBASGN;
}
unget (c);
return '-';
case '*':
if (get (c) == '=')
return MULASGN;
unget (c);
return '*';
case '%':
if (get (c) == '=')
return MODASGN;
unget (c);
return '%';
case '|':
if (get (c) == '|')
return LOR;
else if (c == '=')
return ORASGN;
unget (c);
return '|';
case '&':
if (get (c) == '&')
return LAND;
else if (c == '=')
return ANDASGN;
unget (c);
return '&';
case '^':
if (get (c) == '=')
return XORASGN;
unget (c);
return '^';
case '.':
if (get (c) == '*')
return POINTSTAR;
else if (c == '.')
{
if (get (c) != '.')
yyerror ("invalid token '..' ('...' assumed)");
unget (c);
return ELLIPSIS;
}
else if (! is_digit[c])
{
unget (c);
return '.';
}
goto mantissa;
case ':':
if (get (c) == ':')
return DCOLON;
unget (c);
return ':';
case '=':
if (get (c) == '=')
return EQ;
unget (c);
return '=';
case '!':
if (get (c) == '=')
return NE;
unget (c);
return '!';
case '<':
switch (get (c))
{
case '=':
return LE;
case '<':
if (get (c) == '=')
return LSHIFTASGN;
unget (c);
return LSHIFT;
}
unget (c);
return '<';
case '>':
switch (get (c))
{
case '=':
return GE;
case '>':
if (get (c) == '=')
return RSHIFTASGN;
unget (c);
return RSHIFT;
}
unget (c);
return '>';
case '#':
while (get (c) != EOF && c != '\n')
if (c == '\\')
{
#if MSDOS || OS2
get (c); /* for \r */
if (c == '\r')
get (c);
#else
get (c);
#endif
}
if (c == EOF)
return 0;
++yyline;
break;
case '(': case ')': case '[': case ']': case '{': case '}':
case ';': case ',': case '?': case '~':
return c;
case '0':
long_val = 0;
if (get (c) == 'x' || c == 'X')
{
while (get (c) != EOF)
{
if (is_digit[c])
long_val = long_val * 16 + c - '0';
else if (c >= 'a' && c <= 'f')
long_val = long_val * 16 + c - 'a' + 10;
else if (c >= 'A' && c <= 'F')
long_val = long_val * 16 + c - 'A' + 10;
else
break;
}
goto int_suffixes;
}
else if (c == '.')
goto mantissa;
while (c >= '0' && c <= '7')
{
long_val = long_val << 3 + c - '0';
get (c);
}
int_suffixes:
/* Integer suffixes */
while (c == 'l' || c == 'L' || c == 'u' || c == 'U')
get (c);
unget (c);
return CINT;
case '1': case '2': case '3': case '4': case '5': case '6':
case '7': case '8': case '9':
/* Integer or floating constant, part before '.' */
long_val = c - '0';
while (get (c) != EOF && is_digit[c])
long_val = 10 * long_val + c - '0';
if (c != '.')
goto int_suffixes;
mantissa:
/* Digits following '.' */
while (is_digit[c])
get (c);
/* Optional exponent */
if (c == 'E' || c == 'e')
{
if (get (c) == '-' || c == '+')
get (c);
while (is_digit[c])
get (c);
}
/* Optional type suffixes */
while (c == 'f' || c == 'F' || c == 'l' || c == 'L')
get (c);
return CFLOAT;
default:
#if 0
yyerror ("illegal character '%c'", c);
#endif
break;
}
}
}
char*
line_start ()
{
char* p;
char* s;
char* t;
static char buffer[MAX_REGEXP];
static char* end_buf = &buffer[MAX_REGEXP] - 1;
if (!f_regexps)
return NULL;
for (p = in - 1; p > inbuffer && *p != '\n'; --p)
;
if (*p == '\n')
{
while (in - p < MIN_REGEXP && p > inbuffer)
{
/* Line probably not significant enough */
for (--p; p >= inbuffer && *p != '\n'; --p)
;
}
if (*p == '\n')
++p;
}
/* Copy from end to make sure significant portions are included.
This implies that in the browser a regular expressino of the
form `^.*{regexp}' has to be used. */
for (s = end_buf-1, t = in; s > buffer && t > p;)
{
*--s = *--t;
if (*s == '"')
*--s = '\\';
#ifdef OS2
else if (*s == '\r')
++s;
#endif
}
*(end_buf-1) = 0;
return s;
}
unsigned
buffer_pos ()
{
#if MSDOS || OS2
return in - inbuffer - carriage_returns;
#else
return in - inbuffer;
#endif
}
char*
token_string (t)
unsigned t;
{
static char b[3];
switch (t)
{
case CSTRING: return "string constant";
case CCHAR: return "char constant";
case CINT: return "int constant";
case CFLOAT: return "floating constant";
case ELLIPSIS: return "...";
case LSHIFTASGN: return "<<=";
case RSHIFTASGN: return ">>=";
case ARROWSTAR: return "->*";
case IDENT: return "identifier";
case DIVASGN: return "/=";
case INC: return "++";
case ADDASGN: return "+=";
case DEC: return "--";
case ARROW: return "->";
case SUBASGN: return "-=";
case MULASGN: return "*=";
case MODASGN: return "%=";
case LOR: return "||";
case ORASGN: return "|=";
case LAND: return "&&";
case ANDASGN: return "&=";
case XORASGN: return "^=";
case POINTSTAR: return ".*";
case DCOLON: return "::";
case EQ: return "==";
case NE: return "!=";
case LE: return "<=";
case LSHIFT: return "<<";
case GE: return ">=";
case RSHIFT: return ">>";
case ASM: return "asm";
case AUTO: return "auto";
case BREAK: return "break";
case CASE: return "case";
case CATCH: return "catch";
case CHAR: return "char";
case CLASS: return "class";
case CONST: return "const";
case CONTINUE: return "continue";
case DEFAULT: return "default";
case DELETE: return "delete";
case DO: return "do";
case DOUBLE: return "double";
case ELSE: return "else";
case ENUM: return "enum";
case EXTERN: return "extern";
case FLOAT: return "float";
case FOR: return "for";
case FRIEND: return "friend";
case GOTO: return "goto";
case IF: return "if";
case INLINE: return "inline";
case INT: return "int";
case LONG: return "long";
case NEW: return "new";
case OPERATOR: return "operator";
case PRIVATE: return "private";
case PROTECTED: return "protected";
case PUBLIC: return "public";
case REGISTER: return "register";
case RETURN: return "return";
case SHORT: return "short";
case SIGNED: return "signed";
case SIZEOF: return "sizeof";
case STATIC: return "static";
case STRUCT: return "struct";
case SWITCH: return "switch";
case TEMPLATE: return "template";
case THIS: return "this";
case THROW: return "throw";
case TRY: return "try";
case TYPEDEF: return "typedef";
case UNION: return "union";
case UNSIGNED: return "unsigned";
case VIRTUAL: return "virtual";
case VOID: return "void";
case VOLATILE: return "volatile";
case WHILE: return "while";
case YYEOF: return "EOF";
}
assert (t < 255);
b[0] = t;
b[1] = 0;
return b;
}
static void
usage ()
{
char buffer[50];
char* rev = buffer;
char* s;
for (s = strcpy (buffer, revision); *s; ++s)
{
if (*s == '$')
*s = '\0';
else if (*s == ':')
{
do ++s; while (*s == ' ');
rev = s;
}
}
printf (USAGE, rev, copyright);
exit (1);
}
void
process_file (file)
char* file;
{
char* s;
unsigned n;
/* Record in FILENAME the current file name. */
filename = dupstr (file);
/* Convert backslashes in paths to forward slashes.
This is only needed if the Emacs used is not prepared
to handle backslashes properly (e.g. DJ Emacs under
MSDOS. */
#ifdef CONVERT_BACKSLASH
#ifdef MSDOS
strlwr (filename);
#endif
for (s = filename; *s; ++s)
if (*s == '\\')
*s = '/';
#endif
/* Give a progress indication if needed. */
if (f_very_verbose)
{
puts (filename);
fflush (stdout);
}
else if (f_verbose)
{
putchar ('.');
fflush (stdout);
}
/* Reinitialize scanner and parser. */
re_init_lex ();
re_init_parse ();
/* Read input file into input buffer and parse it. */
if ((yyin = open (filename, O_RDONLY | O_BINARY)) == -1)
{
yyerror ("cannot open");
return;
}
if ((n = read (yyin, inbuffer, INBUFFER_SIZE)) == INBUFFER_SIZE)
yyerror ("file too big, only %u bytes will be processed.", n);
inbuffer[n] = 0;
yyparse ();
close (yyin);
}
void
process_stdin ()
{
unsigned n;
yyin = fileno (stdin);
if ((n = read (yyin, inbuffer, INBUFFER_SIZE)) == INBUFFER_SIZE)
yyerror ("file too big, only %u bytes will be processed.", n);
inbuffer[n] = 0;
re_init_lex ();
re_init_parse ();
yyparse ();
}
main (argc, argv)
int argc;
char** argv;
{
int i;
unsigned n;
int any_inputfiles = 0;
static char out_filename[255] = DEFAULT_OUTFILE;
static char* input_filenames[MAX_INPUTFILES];
static int n_input_files;
static char b[255];
#if defined __GNUC__ && OS2
_wildcard (&argc, &argv);
#endif
filename = "command line";
/* stdout is not a constant under IBM C Set++ 2.1 so it
cannot be an initializer in global scope (would you believe
that?) */
yyout = stdout;
if (argc < 2)
usage ();
/* Get command line switches. */
for (i = 1; i < argc; ++i)
{
if (*argv[i] == '-')
{
int j;
for (j = 1; argv[i][j]; ++j)
{
switch (argv[i][j])
{
case 'n':
f_nested_classes = 1;
break;
case 'e':
f_emacs = 1;
if (argv[i][j+1])
{
filename = &argv[i][j+1];
goto next_argv;
}
else if (i < argc - 1)
{
argv[i] = NULL;
filename = argv[++i];
goto next_argv;
}
else
usage ();
break;
case 'x':
f_regexps = 1;
break;
/* Do not record regular expressions/ write regexps
to file (and insert positions instead of strings
into Lisp structures). */
case 'r':
if (argv[i][j+1])
{
regexp_filename = &argv[i][j+1];
goto next_argv;
}
else if (i < argc - 1)
{
argv[i] = NULL;
regexp_filename = argv[++i];
goto next_argv;
}
else
usage ();
break;
case 'i':
/* read file names from input file. */
if (argv[i][j+1])
{
input_filenames[n_input_files++] =
dupstr (&argv[i][j+1]);
goto next_argv;
}
else if (i < argc - 1)
{
argv[i] = NULL;
input_filenames[n_input_files++] = dupstr (argv[++i]);
goto next_argv;
}
else
usage ();
break;
case 'a':
/* Append to output file */
f_append = 1;
break;
case 'f':
f_costly_friends = 0;
break;
case 's':
f_structs = 0;
break;
case 'v':
f_verbose = 1;
break;
case 'V':
f_verbose = 1;
f_very_verbose = 1;
break;
case 'o':
if (argv[i][j+1])
{
strcpy (out_filename, &argv[i][j+1]);
goto next_argv;
}
else if (i < argc - 1)
{
argv[i] = NULL;
strcpy (out_filename, argv[++i]);
goto next_argv;
}
else
usage ();
break;
case 'm':
case 'M':
{
int v;
char option = argv[i][j];
if (argv[i][j+1])
v = atoi (&argv[i][j+1]);
else if (i < argc - 1)
{
argv[i] = NULL;
v = atoi (argv[++i]);
}
else
usage ();
if (option == 'm')
min_regexp = v;
else
max_regexp = v;
goto next_argv;
}
default:
usage ();
break;
}
}
next_argv:;
if (n_input_files == MAX_INPUTFILES)
{
yyerror ("too many input files specified");
exit (1);
}
argv[i] = NULL;
}
}
init_lex ();
init_sym ();
if (f_emacs)
{
process_stdin ();
dump_roots (stdout);
exit (0);
}
/* Open output file */
if (*out_filename)
if (0 == (yyout = fopen (out_filename, f_append ? "a" : "w")))
{
yyerror ("cannot open output file %s", out_filename);
exit (1);
}
/* Open regular expression output file, if any. */
if (regexp_filename)
if (-1 == (regexp_file = open (regexp_filename,
O_WRONLY | O_BINARY | O_CREAT)))
{
yyerror ("cannot open regexp output file `%s'", regexp_filename);
exit (1);
}
/* Process input files given on command line. For GNU use globbing
on the remaining files. */
for (i = 1; i < argc; ++i)
if (argv[i])
{
process_file (argv[i]);
any_inputfiles = 1;
}
/* Process files given on stdin if no files specified */
if (!any_inputfiles && !input_filenames[0])
{
while (fgets (b, sizeof b - 1, stdin))
{
b[strlen (b)-1] = 0;
process_file (b);
}
}
else
{
/* Process files from input files. */
for (i = 0; input_filenames[i]; ++i)
{
FILE* fp = fopen (input_filenames[i], "r");
if (!fp)
yyerror ("cannot open input file `%s'", input_filenames[i]);
else
{
while (fgets (b, sizeof b - 1, fp))
{
b[strlen (b)-1] = 0;
process_file (b);
}
fclose (fp);
}
}
}
/* Print results */
dump_roots (yyout);
if (f_verbose || f_very_verbose)
printf ("\ntotal allocated = %ld bytes\n", total_allocated);
if (yyout != stdout)
fclose (yyout);
if (regexp_file > 0)
close (regexp_file);
return 0;
}