home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Monster Media 1994 #1
/
monster.zip
/
monster
/
OS2
/
CPOSTSRC.ZIP
/
CTOK.C
< prev
next >
Wrap
Text File
|
1992-02-24
|
17KB
|
550 lines
/*------------------------------------------------------------------
* ctok : C language tokenizer
*------------------------------------------------------------------
* 10-01-91 Patrick J. Mueller
*------------------------------------------------------------------*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "ctok.h"
/*------------------------------------------------------------------
* is a character a valid character in a C identifier
*------------------------------------------------------------------*/
#define isCsymbol(c) (isalnum(c) || ('_' == c))
/*------------------------------------------------------------------
* typedefs
*------------------------------------------------------------------*/
typedef struct
{
int eof;
char *buffer;
long bufferLen;
long bufferInd;
long fileOffs;
long line;
int unGetChar;
int unGetReady;
long tokOffs;
long tokLen;
CTokRead readFunc;
void *readInfo;
char ident[MAX_IDENT_LEN+1];
} CTokInfo;
/*-/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\-*/
/*-\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/-*/
/*------------------------------------------------------------------
* get next char from file
*------------------------------------------------------------------*/
void GetNextChar(
int *c,
CTokInfo *cti
)
{
cti->fileOffs++;
/*---------------------------------------------------------------
* check for end of file
*---------------------------------------------------------------*/
if (cti->eof)
{
*c = EOF;
return;
}
/*---------------------------------------------------------------
* check for a char in the unget holder
*---------------------------------------------------------------*/
if (cti->unGetReady)
{
cti->unGetReady = 0;
*c = cti->unGetChar;
if ('\n' == *c)
cti->line++;
return;
}
/*---------------------------------------------------------------
* see if we need to read another buffer
*---------------------------------------------------------------*/
if (cti->bufferInd == cti->bufferLen)
{
cti->bufferLen = cti->readFunc(cti->readInfo,&(cti->buffer));
cti->bufferInd = 0L;
if (0L == cti->bufferLen)
{
*c = EOF;
cti->eof = 1;
return;
}
}
/*---------------------------------------------------------------
* read character from buffer
*---------------------------------------------------------------*/
*c = cti->buffer[cti->bufferInd++];
if ('\n' == *c)
cti->line++;
return;
}
/*------------------------------------------------------------------
* put back last char from file
*------------------------------------------------------------------*/
void UnGetNextChar(
int c,
CTokInfo *cti
)
{
cti->fileOffs--;
cti->unGetChar = c;
cti->unGetReady = 1;
if ('\n' == c)
cti->line--;
}
/*-/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\-*/
/*-\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/-*/
/*------------------------------------------------------------------
* read a C character constant or string
*------------------------------------------------------------------*/
static void ReadString(
CTokInfo *cti,
int c
)
{
int stop;
/*---------------------------------------------------------------
* the character passed in is ' or ", and it is the character that
* signifies the end of the string
*---------------------------------------------------------------*/
stop = c;
/*---------------------------------------------------------------
* keep going until we hit our stop character
*---------------------------------------------------------------*/
GetNextChar(&c,cti);
while (stop != c)
{
/*------------------------------------------------------------
* for a \, inhale next character
*------------------------------------------------------------*/
if ('\\' == c)
GetNextChar(&c,cti);
/*------------------------------------------------------------
* for EOF, break
*------------------------------------------------------------*/
if (EOF == c)
break;
GetNextChar(&c,cti);
}
return;
}
/*------------------------------------------------------------------
* read a C comment
*------------------------------------------------------------------*/
static void ReadComment(
CTokInfo *cti
)
{
int c;
/*---------------------------------------------------------------
* loop until end of file (or return in middle)
*---------------------------------------------------------------*/
GetNextChar(&c,cti);
while (EOF != c)
{
/*------------------------------------------------------------
* if not *, just get next character
*------------------------------------------------------------*/
if ('*' != c)
GetNextChar(&c,cti);
/*------------------------------------------------------------
* got a * - see if next is /
*------------------------------------------------------------*/
else
{
/*---------------------------------------------------------
* if next is /, return
*---------------------------------------------------------*/
GetNextChar(&c,cti);
if ('/' == c)
return;
}
}
return;
}
/*------------------------------------------------------------------
* read a C++ style comment
*------------------------------------------------------------------*/
static void ReadCppComment(
CTokInfo *cti
)
{
int c;
/*---------------------------------------------------------------
* loop until end of line or end of file
*---------------------------------------------------------------*/
GetNextChar(&c,cti);
while ((EOF != c) && ('\n' != c))
GetNextChar(&c,cti);
UnGetNextChar(c,cti);
return;
}
/*------------------------------------------------------------------
* read an identifier
*------------------------------------------------------------------*/
static void ReadIdent(
CTokInfo *cti,
int c
)
{
int identLen;
/*---------------------------------------------------------------
* initialize length and stick first char in
*---------------------------------------------------------------*/
identLen = 0;
cti->ident[identLen++] = (char) c;
/*---------------------------------------------------------------
* while still a valid symbol character ...
*---------------------------------------------------------------*/
GetNextChar(&c,cti);
while (isCsymbol(c))
{
/*------------------------------------------------------------
* make sure we got enough room, then stick it in
*------------------------------------------------------------*/
if (identLen < MAX_IDENT_LEN)
cti->ident[identLen++] = (char) c;
GetNextChar(&c,cti);
}
/*---------------------------------------------------------------
* finish up identifier, put last character back
*---------------------------------------------------------------*/
cti->ident[identLen] = '\0';
UnGetNextChar(c,cti);
}
/*------------------------------------------------------------------
* read a number
*------------------------------------------------------------------*/
static void ReadNumber(
CTokInfo *cti,
int c
)
{
/*---------------------------------------------------------------
* while still a valid number character ...
*---------------------------------------------------------------*/
GetNextChar(&c,cti);
while (isalnum(c))
GetNextChar(&c,cti);
/*---------------------------------------------------------------
* put last character back
*---------------------------------------------------------------*/
UnGetNextChar(c,cti);
}
/*------------------------------------------------------------------
* read a preprocessor statement
*------------------------------------------------------------------*/
static void ReadPreprocessor(
CTokInfo *cti
)
{
int c;
/*---------------------------------------------------------------
* loop until end of file (or return in middle)
*---------------------------------------------------------------*/
GetNextChar(&c,cti);
while (EOF != c)
{
/*------------------------------------------------------------
* if we found a newline, leave
*------------------------------------------------------------*/
if ('\n' == c)
{
UnGetNextChar(c,cti);
return;
}
/*------------------------------------------------------------
* if we got anything but a \, eat it
*------------------------------------------------------------*/
else if ('\\' != c)
GetNextChar(&c,cti);
/*------------------------------------------------------------
* got a \ - see if next is \n
*------------------------------------------------------------*/
else
{
/*---------------------------------------------------------
* if next isn't \n, start at top of loop
*---------------------------------------------------------*/
GetNextChar(&c,cti);
/*---------------------------------------------------------
* skip over white space first
*---------------------------------------------------------*/
while (isspace(c) && ('\n' != c))
GetNextChar(&c,cti);
if ('\n' != c)
continue;
/*---------------------------------------------------------
* if it is a \n, read next char and continue
*---------------------------------------------------------*/
GetNextChar(&c,cti);
continue;
}
}
return;
}
/*-/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\-*/
/*-\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/-*/
/*------------------------------------------------------------------
* tokenizer
*------------------------------------------------------------------*/
static int GetToken(
CTokInfo *cti
)
{
int c;
int type;
unsigned long offsStart;
/*---------------------------------------------------------------
* read next character
*---------------------------------------------------------------*/
GetNextChar(&c,cti);
/*---------------------------------------------------------------
* skip white space
*---------------------------------------------------------------*/
while (isspace(c))
GetNextChar(&c,cti);
/*---------------------------------------------------------------
* save starting offset
*---------------------------------------------------------------*/
offsStart = cti->fileOffs;
/*---------------------------------------------------------------
* empty identifier
*---------------------------------------------------------------*/
memset(cti->ident,'\0',sizeof(cti->ident));
/*---------------------------------------------------------------
* big switch on it's value
*---------------------------------------------------------------*/
switch(c)
{
/*------------------------------------------------------------
* check for end of file
*------------------------------------------------------------*/
case EOF:
type = TOKEN_EOF;
break;
/*------------------------------------------------------------
* for pound sign, read preprocessor directive
*------------------------------------------------------------*/
case '#':
ReadPreprocessor(cti);
type = TOKEN_PREPROC;
break;
/*------------------------------------------------------------
* single or double quote
*------------------------------------------------------------*/
case '\'':
case '"':
ReadString(cti,c);
type = TOKEN_STRING;
break;
/*------------------------------------------------------------
* start of comment?
*------------------------------------------------------------*/
case '/':
/*---------------------------------------------------------
* get next char - if *, read to end of comment
*---------------------------------------------------------*/
GetNextChar(&c,cti);
if ('*' == c)
{
ReadComment(cti);
type = TOKEN_COMMENT;
}
/*---------------------------------------------------------
* see if it's a C++ style comment
*---------------------------------------------------------*/
else if ('/' == c)
{
ReadCppComment(cti);
type = TOKEN_COMMENT;
}
/*---------------------------------------------------------
* otherwise it's just a plain /
*---------------------------------------------------------*/
else
{
UnGetNextChar(c,cti);
type = TOKEN_OPER;
}
break;
/*------------------------------------------------------------
* everything else - identifiers and punctuation
*------------------------------------------------------------*/
default:
if (isCsymbol(c) && !isdigit(c))
{
ReadIdent(cti,c);
type = TOKEN_IDENT;
}
else if (isdigit(c))
{
ReadNumber(cti,c);
type = TOKEN_NUMBER;
}
/*---------------------------------------------------------
* anything else
*---------------------------------------------------------*/
else
{
type = TOKEN_OPER;
cti->ident[0] = (char) c;
}
break;
}
cti->tokOffs = offsStart;
cti->tokLen = cti->fileOffs - offsStart + 1;
return(type);
}
/*-/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\-*/
/*-\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/-*/
/*------------------------------------------------------------------
* Initializer
*------------------------------------------------------------------*/
void *CTokInit(
CTokRead readFunc,
void *readInfo
)
{
CTokInfo *cti;
/*---------------------------------------------------------------
* allocate space for structure
*---------------------------------------------------------------*/
cti = malloc(sizeof(CTokInfo));
if (NULL == cti)
return NULL;
/*---------------------------------------------------------------
* initialize structure
*---------------------------------------------------------------*/
cti->eof = 0;
cti->buffer = NULL;
cti->bufferLen = 0L;
cti->bufferInd = 0L;
cti->fileOffs = -1L;
cti->line = 1;
cti->unGetChar = '\0';
cti->unGetReady = 0;
cti->tokOffs = 0L;
cti->tokLen = 0L;
cti->readFunc = readFunc;
cti->readInfo = readInfo;
memset(cti->ident,'\0',sizeof(cti->ident));
return cti;
}
/*-/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\-*/
/*-\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/-*/
/*------------------------------------------------------------------
* Terminator
*------------------------------------------------------------------*/
void CTokTerm(
void *handle
)
{
free(handle);
}
/*-/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\-*/
/*-\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/-*/
/*------------------------------------------------------------------
* Tokenizer
*------------------------------------------------------------------*/
void CTokGet(
void *handle,
Token *token
)
{
CTokInfo *cti;
cti = handle;
token->type = GetToken(cti);
token->offs = cti->tokOffs;
token->len = cti->tokLen;
token->ident = cti->ident;
token->line = cti->line;
}