home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
The C Users' Group Library 1994 August
/
wc-cdrom-cusersgrouplibrary-1994-08.iso
/
listings
/
v_10_04
/
1004071a
< prev
next >
Wrap
Text File
|
1992-02-19
|
23KB
|
952 lines
/******************************************************************************
* Module : Lexical Analyzer --- Process the input text file into tokens
* that the parser can understand.
*
* Copyright (C) 1990 John W. M. Stevens, All Rights Reserved
*
* Routines : Lex - Return the next token from the file.
* OpenPrg - Open the source file.
* ParsErr - Report a parsing error.
*
* Author : John W. M. Stevens
******************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include "lex.h"
/* Structure of trie branch. */
typedef struct key_st {
char c;
TKNS token;
struct key_st *child;
} NODE;
/* Constants local to this file. */
#define MAX_STR 256
#define NOT_FND -2
/* Object Data. */
static char word[MAX_STR + 1]; /* Last string analyzed. */
static char PrvWd[MAX_STR + 1]; /* Previous word. */
static int LnNo = 0; /* The current line number in the file. */
static FILE *PrgFl; /* File pointer. */
/* Trie data structure containing all the keywords and punctuation marks for
* the language being tokenized.
*/
static
NODE T5[2] = {
{ ' ', 2, NULL },
{ 'n', ACTION, NULL }
};
static
NODE T4[2] = {
{ ' ', 2, NULL },
{ 'o', 0, T5 }
};
static
NODE T3[2] = {
{ ' ', 2, NULL },
{ 'i', 0, T4 }
};
static
NODE T2[2] = {
{ ' ', 2, NULL },
{ 't', 0, T3 }
};
static
NODE T8[2] = {
{ ' ', 2, NULL },
{ 'r', AFTER, NULL }
};
static
NODE T7[2] = {
{ ' ', 2, NULL },
{ 'e', 0, T8 }
};
static
NODE T6[2] = {
{ ' ', 2, NULL },
{ 't', 0, T7 }
};
static
NODE T9[2] = {
{ ' ', 2, NULL },
{ 'd', AND, NULL }
};
static
NODE Te[2] = {
{ ' ', 2, NULL },
{ 'e', ARCHIVE, NULL }
};
static
NODE Td[2] = {
{ ' ', 2, NULL },
{ 'v', 0, Te }
};
static
NODE Tc[2] = {
{ ' ', 2, NULL },
{ 'i', 0, Td }
};
static
NODE Tb[2] = {
{ ' ', 2, NULL },
{ 'h', 0, Tc }
};
static
NODE Ta[2] = {
{ ' ', 2, NULL },
{ 'c', 0, Tb }
};
static
NODE T16[2] = {
{ ' ', 2, NULL },
{ 's', ATTRIBUTES, NULL }
};
static
NODE T15[2] = {
{ ' ', 2, NULL },
{ 'e', 0, T16 }
};
static
NODE T14[2] = {
{ ' ', 2, NULL },
{ 't', 0, T15 }
};
static
NODE T13[2] = {
{ ' ', 2, NULL },
{ 'u', 0, T14 }
};
static
NODE T12[2] = {
{ ' ', 2, NULL },
{ 'b', 0, T13 }
};
static
NODE T11[2] = {
{ ' ', 2, NULL },
{ 'i', 0, T12 }
};
static
NODE T10[2] = {
{ ' ', 2, NULL },
{ 'r', 0, T11 }
};
static
NODE Tf[2] = {
{ ' ', 2, NULL },
{ 't', 0, T10 }
};
static
NODE T1[6] = {
{ ' ', 6, NULL },
{ 'c', 0, T2 },
{ 'f', 0, T6 },
{ 'n', 0, T9 },
{ 'r', 0, Ta },
{ 't', 0, Tf }
};
static
NODE T1b[2] = {
{ ' ', 2, NULL },
{ 'e', BEFORE, NULL }
};
static
NODE T1a[2] = {
{ ' ', 2, NULL },
{ 'r', 0, T1b }
};
static
NODE T19[2] = {
{ ' ', 2, NULL },
{ 'o', 0, T1a }
};
static
NODE T18[2] = {
{ ' ', 2, NULL },
{ 'f', 0, T19 }
};
static
NODE T17[2] = {
{ ' ', 2, NULL },
{ 'e', 0, T18 }
};
static
NODE T23[2] = {
{ ' ', 2, NULL },
{ 'y', DIRECTORY_T, NULL }
};
static
NODE T22[2] = {
{ ' ', 2, NULL },
{ 'r', 0, T23 }
};
static
NODE T21[2] = {
{ ' ', 2, NULL },
{ 'o', 0, T22 }
};
static
NODE T20[2] = {
{ ' ', 2, NULL },
{ 't', 0, T21 }
};
static
NODE T1f[2] = {
{ ' ', 2, NULL },
{ 'c', 0, T20 }
};
static
NODE T1e[2] = {
{ ' ', 2, NULL },
{ 'e', 0, T1f }
};
static
NODE T1d[2] = {
{ ' ', 2, NULL },
{ 'r', 0, T1e }
};
static
NODE T1c[2] = {
{ ' ', 2, NULL },
{ 'i', 0, T1d }
};
static
NODE T26[2] = {
{ ' ', 2, NULL },
{ 'c', EXEC, NULL }
};
static
NODE T25[2] = {
{ ' ', 2, NULL },
{ 'e', 0, T26 }
};
static
NODE T24[2] = {
{ ' ', 2, NULL },
{ 'x', 0, T25 }
};
static
NODE T2a[2] = {
{ ' ', 2, NULL },
{ 's', FILES, NULL }
};
static
NODE T29[2] = {
{ ' ', 2, NULL },
{ 'e', 0, T2a }
};
static
NODE T28[2] = {
{ ' ', 2, NULL },
{ 'l', 0, T29 }
};
static
NODE T27[2] = {
{ ' ', 2, NULL },
{ 'i', 0, T28 }
};
static
NODE T2f[2] = {
{ ' ', 2, NULL },
{ 'n', HIDDEN, NULL }
};
static
NODE T2e[2] = {
{ ' ', 2, NULL },
{ 'e', 0, T2f }
};
static
NODE T2d[2] = {
{ ' ', 2, NULL },
{ 'd', 0, T2e }
};
static
NODE T2c[2] = {
{ ' ', 2, NULL },
{ 'd', 0, T2d }
};
static
NODE T2b[2] = {
{ ' ', 2, NULL },
{ 'i', 0, T2c }
};
static
NODE T33[2] = {
{ ' ', 2, NULL },
{ 'l', LABEL, NULL }
};
static
NODE T32[2] = {
{ ' ', 2, NULL },
{ 'e', 0, T33 }
};
static
NODE T31[2] = {
{ ' ', 2, NULL },
{ 'b', 0, T32 }
};
static
NODE T30[2] = {
{ ' ', 2, NULL },
{ 'a', 0, T31 }
};
static
NODE T3a[2] = {
{ ' ', 2, NULL },
{ 'd', MODIFIED, NULL }
};
static
NODE T39[2] = {
{ ' ', 2, NULL },
{ 'e', 0, T3a }
};
static
NODE T38[2] = {
{ ' ', 2, NULL },
{ 'i', 0, T39 }
};
static
NODE T37[2] = {
{ ' ', 2, NULL },
{ 'f', 0, T38 }
};
static
NODE T36[2] = {
{ ' ', 2, NULL },
{ 'i', 0, T37 }
};
static
NODE T35[2] = {
{ ' ', 2, NULL },
{ 'd', 0, T36 }
};
static
NODE T34[2] = {
{ ' ', 2, NULL },
{ 'o', 0, T35 }
};
static
NODE T3d[2] = {
{ ' ', 2, NULL },
{ 'e', NAME, NULL }
};
static
NODE T3c[2] = {
{ ' ', 2, NULL },
{ 'm', 0, T3d }
};
static
NODE T3e[2] = {
{ ' ', 2, NULL },
{ 't', NOT, NULL }
};
static
NODE T3b[3] = {
{ ' ', 3, NULL },
{ 'a', 0, T3c },
{ 'o', 0, T3e }
};
static
NODE T3f[2] = {
{ ' ', 2, NULL },
{ 'r', OR, NULL }
};
static
NODE T43[2] = {
{ ' ', 2, NULL },
{ 't', PRINT, NULL }
};
static
NODE T42[2] = {
{ ' ', 2, NULL },
{ 'n', 0, T43 }
};
static
NODE T41[2] = {
{ ' ', 2, NULL },
{ 'i', 0, T42 }
};
static
NODE T40[2] = {
{ ' ', 2, NULL },
{ 'r', 0, T41 }
};
static
NODE T4a[2] = {
{ ' ', 2, NULL },
{ 'y', READONLY, NULL }
};
static
NODE T49[2] = {
{ ' ', 2, NULL },
{ 'l', 0, T4a }
};
static
NODE T48[2] = {
{ ' ', 2, NULL },
{ 'n', 0, T49 }
};
static
NODE T47[2] = {
{ ' ', 2, NULL },
{ 'o', 0, T48 }
};
static
NODE T46[2] = {
{ ' ', 2, NULL },
{ 'd', 0, T47 }
};
static
NODE T4d[2] = {
{ ' ', 2, NULL },
{ 's', RECURS, NULL }
};
static
NODE T4c[2] = {
{ ' ', 2, NULL },
{ 'r', 0, T4d }
};
static
NODE T4b[2] = {
{ ' ', 2, NULL },
{ 'u', 0, T4c }
};
static
NODE T45[3] = {
{ ' ', 3, NULL },
{ 'a', 0, T46 },
{ 'c', 0, T4b }
};
static
NODE T44[2] = {
{ ' ', 2, NULL },
{ 'e', 0, T45 }
};
static
NODE T52[2] = {
{ ' ', 2, NULL },
{ 'h', SEARCH, NULL }
};
static
NODE T51[2] = {
{ ' ', 2, NULL },
{ 'c', 0, T52 }
};
static
NODE T50[2] = {
{ ' ', 2, NULL },
{ 'r', 0, T51 }
};
static
NODE T55[2] = {
{ ' ', 2, NULL },
{ 't', SELECT, NULL }
};
static
NODE T54[2] = {
{ ' ', 2, NULL },
{ 'c', 0, T55 }
};
static
NODE T53[2] = {
{ ' ', 2, NULL },
{ 'e', 0, T54 }
};
static
NODE T4f[3] = {
{ ' ', 3, NULL },
{ 'a', 0, T50 },
{ 'l', 0, T53 }
};
static
NODE T59[2] = {
{ ' ', 2, NULL },
{ 'm', SYSTEM, NULL }
};
static
NODE T58[2] = {
{ ' ', 2, NULL },
{ 'e', 0, T59 }
};
static
NODE T57[2] = {
{ ' ', 2, NULL },
{ 't', 0, T58 }
};
static
NODE T56[2] = {
{ ' ', 2, NULL },
{ 's', 0, T57 }
};
static
NODE T4e[3] = {
{ ' ', 3, NULL },
{ 'e', 0, T4f },
{ 'y', 0, T56 }
};
static
NODE T0[21] = {
{ ' ', 21, NULL },
{ '(', L_PAREN, NULL },
{ ')', R_PAREN, NULL },
{ ',', COMMA, NULL },
{ '/', F_SLASH, NULL },
{ 'a', 0, T1 },
{ 'b', 0, T17 },
{ 'd', 0, T1c },
{ 'e', 0, T24 },
{ 'f', 0, T27 },
{ 'h', 0, T2b },
{ 'l', 0, T30 },
{ 'm', 0, T34 },
{ 'n', 0, T3b },
{ 'o', 0, T3f },
{ 'p', 0, T40 },
{ 'r', 0, T44 },
{ 's', 0, T4e },
{ '{', L_BRACE, NULL },
{ '|', BAR, NULL },
{ '}', R_BRACE, NULL }
};
/*-----------------------------------------------------------------------------
| Routine : OpenPrg() --- Open the ASCII text file that contains the
| back up program.
|
| Inputs : FileNm - File name of source file.
-----------------------------------------------------------------------------*/
void OpenPrg(char *FileNm)
{
/* Open the program script file. */
if ((PrgFl = fopen(FileNm, "rt")) == NULL)
{
fprintf(stderr, "OpenPrg (fopen) : Could not open file '%s' for "
"reading.\n", FileNm);
exit( -1 );
}
/* Initialize object variables. */
*word = *PrvWd = '\0';
}
/*-----------------------------------------------------------------------------
| Routine : ParsErr() --- Report a parse error.
|
| Inputs : Err - Error string.
-----------------------------------------------------------------------------*/
void ParsErr(char *Err)
{
/* Print line number and error message. */
fprintf(stderr, "Error in Line: %d, %s.\n", LnNo + 1, Err);
/* If there is a previous word, show it. */
if ( *word )
fprintf(stderr, "\tOn or after word '%s'\n", word);
exit( -1 );
}
/*-----------------------------------------------------------------------------
| Routine : TrieSrch() --- Search the trie for a key word.
|
| Inputs : Trie - The trie level pointer.
| ch - The current character to search for.
| WordPtr - The pointer to the current byte of the word buffer.
| Returns : Returns either a token value or
| NOT_FND - For key word not found.
| EOF - For end of file.
-----------------------------------------------------------------------------*/
static
int TrieSrch(NODE *Trie,
int ch,
char *WordPtr)
{
register int mid; /* Mid point of array piece. */
register TKNS ret; /* Return value of comparison. */
auto int lo; /* Limits of current array piece for */
auto int hi; /* binary search. */
/* Make sure that input is lower case. */
ch = tolower( ch );
/* Search for a token. */
hi = Trie[0].token - 1;
lo = 1;
do
{
/* Find mid point of current array piece. */
mid = (lo + hi) >> 1;
/* Do character comparison. */
ret = ch - Trie[mid].c;
/* Fix the array limits. */
if (ret <= 0)
hi = mid - 1;
if (ret >= 0)
lo = mid + 1;
} while (hi >= lo);
/* If the character matches one of the entries in this level and this
* entry has a child, recurse. If a match is found but the matching
* entry has no child, return the token value associated with the
* match. If the return value from the recursive call indicates that
* no match was found at a lower level, return the token value
* associated with the match at this level of the trie.
*/
if (ret == 0)
{
/* Save the current character in the buffer for error reporting. */
*WordPtr++ = ch;
/* Are we looking for more characters in this string? */
if ( Trie[mid].child )
{
/* Get the next character. */
if ((ch = fgetc( PrgFl )) == EOF)
return( EOF );
/* Search next level. */
if ((ret = TrieSrch(Trie[mid].child, ch, WordPtr)) == NOT_FND)
{
ungetc(ch, PrgFl);
return( Trie[mid].token );
}
return( ret );
}
else
{
/* Properly NUL terminate the buffer that the keyword is
* being saved in and return the token value.
*/
*WordPtr = '\0';
return( Trie[mid].token );
}
}
/* Terminate string in keyword buffer and return not found. */
*WordPtr = '\0';
return( NOT_FND );
}
/*-----------------------------------------------------------------------------
| Routine : GetNo --- Get a number from the file.
|
| Inputs : word - Pointer to word buffer for error reporting.
| Outputs : RetNo - Returns the number read from the file.
|
| Returns : Returns the last character read from the file or EOF.
-----------------------------------------------------------------------------*/
static
int GetNo(char **word,
long *RetNo)
{
auto int c;
/* Get number. */
*RetNo = 0L;
while ((c = fgetc( PrgFl )) >= '0' && c <= '9')
{
/* Save character in word buffer. */
*(*word)++ = c;
/* Calculate value of number. */
*RetNo = *RetNo * 10L + (long) (c - '0');
}
return( c );
}
/*-----------------------------------------------------------------------------
| Routine : Lex() --- Get the next key word from the input file.
|
| Outputs : sym - The symbolic data read from the file.
|
| Returns : Returns the token read or EOF.
-----------------------------------------------------------------------------*/
TKNS Lex(TOKEN *sym)
{
register int i;
register int tkn;
auto int ch;
auto char *bf;
/* Strip comments and white space. If the character read is a '#',
* every thing to the end of the line is a comment.
*/
ch = fgetc( PrgFl );
while (ch == ' ' || ch == '\t' || ch == '\n' || ch == '#')
{
/* Process the special characters '#' and '\n'. */
if (ch == '\n')
/* End of line, increment the line number. */
LnNo++;
else if (ch == '#')
{
/* Found a comment character, strip all characters to end
* of line and increment the line number.
*/
while (fgetc( PrgFl ) != '\n')
;
LnNo++;
}
/* Get the next character. */
ch = fgetc( PrgFl );
}
/* Get strings, etc. */
if (ch == '"')
{
/* Get contents of string. */
bf = sym->str;
for (i = 0; i < MAX_STR; i++)
if ((ch = fgetc( PrgFl )) != '"' && ch != EOF)
*bf++ = ch;
else
break;
*bf = '\0';
/* Return string token. */
strcpy(word, sym->str);
return( STRING );
}
else if (ch >= '0' && ch <= '9')
{
auto long no;
/* Establish a pointer to the word buffer and unget the
* numeric character for re-reading.
*/
bf = word;
ungetc(ch, PrgFl);
/* Get number, time or date. */
if ((ch = GetNo(&bf, &no)) == ':')
{
/* Getting time, not number. */
*bf++ = ch;
sym->ftime.ti_hour = (unsigned char) no;
sym->ftime.ti_hund = (unsigned char) 0;
/* Get minutes. */
if ((ch = GetNo(&bf, &no)) == ':')
{
/* Save minutes. */
*bf++ = ch;
sym->ftime.ti_min = (unsigned char) no;
/* Get seconds. */
if ((ch = GetNo(&bf, &no)) == '.')
{
*bf = '\0';
ParsErr( "Hundredths of seconds not allowed in "
"time expressions" );
}
sym->ftime.ti_sec = (unsigned char) no;
}
else
{
/* No seconds to get. */
sym->ftime.ti_min = (unsigned char) no;
sym->ftime.ti_sec = (unsigned char) 0;
}
/* This is a time. */
tkn = TIME;
}
else if (ch == '/')
{
/* Getting date, not number. */
*bf++ = ch;
sym->fdate.da_mon = (char) no;
/* Get day. */
if ((ch = GetNo(&bf, &no)) == '/')
{
/* Save character. */
*bf++ = ch;
sym->fdate.da_day = (char) no;
/* Get year. */
ch = GetNo(&bf, &no);
if (no > 1980L)
no -= 1980L;
else if (no > 80L && no < 100L)
no -= 80L;
else
{
*bf = '\0';
ParsErr( "Error, bad year value in date expression." );
}
sym->fdate.da_year = (int) no;
}
else
{
*bf = '\0';
ParsErr( "Missing year in date expression" );
}
/* This is a date. */
tkn = DATE;
}
else
{
/* Just an integer constant. */
sym->no = no;
tkn = NUMBER;
}
/* Return the unused character. */
*bf = '\0';
ungetc(ch, PrgFl);
return( tkn );
}
else if (ch == EOF)
return( EOF );
/* Call the trie search routine to return the next token, EOF
* or NOT_FND. If not found, print an error and quit.
*/
if ((tkn = TrieSrch(T0, ch, word)) == NOT_FND)
{
/* Illegal first character in word. */
if ( *PrvWd )
fprintf(stderr, "Parse - Error in Line: %d, cannot identify "
"word after '%s'.\n", LnNo + 1, PrvWd);
else
fprintf(stderr, "Parse - Error in Line: %d, cannot identify "
"first word in file.\n", LnNo + 1);
exit( -1 );
}
else if (tkn == 0)
{
/* Illegal word. */
fprintf(stderr, "Parse - Error in Line: %d, cannot identify "
"word '%s'.\n", LnNo + 1, word);
exit( -1 );
}
strcpy(PrvWd, word);
/* Return the token found. */
return( tkn );
}
/* End of File */