home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
BURKS 2
/
BURKS_AUG97.ISO
/
BURKS
/
SOFTWARE
/
SOURCES
/
MAWK11AS.ZIP
/
SCAN.C
(
.txt
)
< prev
next >
Wrap
C/C++ Source or Header
|
1991-12-18
|
22KB
|
833 lines
/********************************************
scan.c
copyright 1991, Michael D. Brennan
This is a source file for mawk, an implementation of
the AWK programming language.
Mawk is distributed without warranty under the terms of
the GNU General Public License, version 2, 1991.
********************************************/
/* $Log: scan.c,v $
* Revision 5.1 91/12/05 07:56:27 brennan
* 1.1 pre-release
*
*/
#include "mawk.h"
#include "sizes.h"
#include "scan.h"
#include "memory.h"
#include "field.h"
#include "init.h"
#include "fin.h"
#include "repl.h"
#include "code.h"
#if HAVE_FCNTL_H
#include <fcntl.h>
#endif
#include "files.h"
/* static functions */
static void PROTO(scan_fillbuff, (void) ) ;
static void PROTO(scan_open, (void) ) ;
static int PROTO(slow_next, (void) ) ;
static void PROTO(eat_comment, (void) ) ;
static void PROTO(eat_semi_colon, (void) ) ;
static double PROTO(collect_decimal, (int, int *) ) ;
static int PROTO(collect_string, (void) ) ;
static int PROTO(collect_RE, (void) ) ;
/*-----------------------------
program file management
*----------------------------*/
char *pfile_name ;
STRING *program_string ;
PFILE *pfile_list ;
static unsigned char *buffer ;
static unsigned char *buffp ;
/* unsigned so it works with 8 bit chars */
static int program_fd ;
static int eof_flag ;
void scan_init(cmdline_program)
char * cmdline_program ;
{
if ( cmdline_program )
{
program_fd = -1 ; /* command line program */
program_string = new_STRING((char *)0,
strlen(cmdline_program) + 1 ) ;
(void) strcpy(program_string->str, cmdline_program) ;
/* simulate file termination */
program_string->str[program_string->len-1] = '\n' ;
buffp = (unsigned char *) program_string->str ;
eof_flag = 1 ;
}
else /* program from file[s] */
{
scan_open() ;
buffp = buffer = (unsigned char *) zmalloc( BUFFSZ+1 ) ;
scan_fillbuff() ;
}
eat_nl() ; /* scan to first token */
if ( next() == 0 ) { errmsg(0, "no program") ; mawk_exit(1) ; }
un_next() ;
}
static void scan_open() /* open pfile_name */
{
if ( pfile_name[0] == '-' && pfile_name[1] == 0 )
program_fd = 0 ;
else
if ( (program_fd = open(pfile_name, O_RDONLY, 0)) == -1 )
{ errmsg( errno, "cannot open %s", pfile_name) ; mawk_exit(1) ; }
}
void scan_cleanup()
{
if ( program_fd >= 0 ) zfree(buffer, BUFFSZ+1) ;
else free_STRING(program_string) ;
if ( program_fd > 0 ) (void) close(program_fd) ;
/* redefine SPACE as [ \t\n] */
scan_code['\n'] = posix_space_flag && rs_shadow.type != SEP_MLR
? SC_UNEXPECTED : SC_SPACE ;
scan_code['\f'] = SC_UNEXPECTED ; /*value doesn't matter */
scan_code['\013'] = SC_UNEXPECTED ; /* \v not space */
scan_code['\r'] = SC_UNEXPECTED ;
}
/*--------------------------------
global variables shared by yyparse() and yylex()
and used for error messages too
*-------------------------------*/
int current_token = -1 ;
unsigned token_lineno ;
unsigned compile_error_count ;
int NR_flag ; /* are we tracking NR */
int paren_cnt ;
int brace_cnt ;
int print_flag ; /* changes meaning of '>' */
int getline_flag ; /* changes meaning of '<' */
extern YYSTYPE yylval ;
/*----------------------------------------
file reading functions
next() and un_next(c) are macros in scan.h
*---------------------*/
static unsigned lineno = 1 ;
static void scan_fillbuff()
{ unsigned r ;
r = fillbuff(program_fd, (char *)buffer, BUFFSZ) ;
if ( r < BUFFSZ )
{ eof_flag = 1 ;
/* check eof is terminated */
if ( r && buffer[r-1] != '\n' )
{ buffer[r] = '\n' ; buffer[r+1] = 0 ; }
}
}
/* read one character -- slowly */
static int slow_next()
{
while ( *buffp == 0 )
{
if ( !eof_flag )
{ buffp = buffer ; scan_fillbuff() ; }
else
if ( pfile_list /* open another program file */ )
{
PFILE *q ;
if ( program_fd > 0 ) (void) close(program_fd) ;
eof_flag = 0 ;
pfile_name = pfile_list->fname ;
q = pfile_list ;
pfile_list = pfile_list->link ;
ZFREE(q) ;
scan_open() ;
token_lineno = lineno = 1 ;
}
else break /* real eof */ ;
}
return *buffp++ ; /* note can un_next() , eof which is zero */
}
static void eat_comment()
{ register int c ;
while ( (c = next()) != '\n' && scan_code[c] ) ;
un_next() ;
}
/* this is how we handle extra semi-colons that are
now allowed to separate pattern-action blocks
A proof that they are useless clutter to the language:
we throw them away
*/
static void eat_semi_colon()
/* eat one semi-colon on the current line */
{ register int c ;
while ( scan_code[c = next()] == SC_SPACE ) ;
if ( c != ';' ) un_next() ;
}
void eat_nl() /* eat all space including newlines */
{
while ( 1 )
switch( scan_code[next()] )
{
case SC_COMMENT :
eat_comment() ;
break ;
case SC_NL : lineno++ ;
/* fall thru */
case SC_SPACE : break ;
default :
un_next() ; return ;
}
}
int yylex()
{
register int c ;
token_lineno = lineno ;
reswitch:
switch( scan_code[c = next()] )
{
case 0 :
ct_ret(EOF) ;
case SC_SPACE : goto reswitch ;
case SC_COMMENT :
eat_comment() ; goto reswitch ;
case SC_NL :
lineno++ ; eat_nl() ;
ct_ret(NL) ;
case SC_ESCAPE :
while ( scan_code[ c = next() ] == SC_SPACE ) ;
if ( c == '\n')
{ token_lineno = ++lineno ; goto reswitch ; }
if ( c == 0 ) ct_ret(EOF) ;
un_next() ;
yylval.ival = '\\' ;
ct_ret(UNEXPECTED) ;
case SC_SEMI_COLON :
eat_nl() ;
ct_ret(SEMI_COLON) ;
case SC_LBRACE :
eat_nl() ; brace_cnt++ ;
ct_ret(LBRACE) ;
case SC_PLUS :
switch( next() )
{
case '+' :
yylval.ival = '+' ;
string_buff[0] =
string_buff[1] = '+' ;
string_buff[2] = 0 ;
ct_ret(INC_or_DEC) ;
case '=' :
ct_ret(ADD_ASG) ;
default : un_next() ; ct_ret(PLUS) ;
}
case SC_MINUS :
switch( next() )
{
case '-' :
yylval.ival = '-' ;
string_buff[0] =
string_buff[1] = '-' ;
string_buff[2] = 0 ;
ct_ret(INC_or_DEC) ;
case '=' :
ct_ret(SUB_ASG) ;
default : un_next() ; ct_ret(MINUS) ;
}
case SC_COMMA : eat_nl() ; ct_ret(COMMA) ;
case SC_MUL : test1_ret('=', MUL_ASG, MUL) ;
case SC_DIV :
{ static int can_precede_div[] =
{ DOUBLE, STRING_, RPAREN, ID, D_ID, RE, RBOX, FIELD,
GETLINE, INC_or_DEC, -1 } ;
int *p = can_precede_div ;
do
if ( *p == current_token )
{
if ( *p != INC_or_DEC )
test1_ret('=', DIV_ASG, DIV) ;
if ( next() == '=' )
{ un_next() ; ct_ret( collect_RE() ) ; }
}
while ( * ++p != -1 ) ;
ct_ret( collect_RE() ) ;
}
case SC_MOD : test1_ret('=', MOD_ASG, MOD) ;
case SC_POW : test1_ret('=' , POW_ASG, POW) ;
case SC_LPAREN :
paren_cnt++ ;
ct_ret(LPAREN) ;
case SC_RPAREN :
if ( --paren_cnt < 0 )
{ compile_error( "extra ')'" ) ;
paren_cnt = 0 ;
goto reswitch ; }
ct_ret(RPAREN) ;
case SC_LBOX : ct_ret(LBOX) ;
case SC_RBOX : ct_ret(RBOX) ;
case SC_MATCH :
string_buff[0] = '~' ; string_buff[0] = 0 ;