home *** CD-ROM | disk | FTP | other *** search
- /* ----------------------------------------------------------------------
- * FILE: lexer.c
- * PACKAGE: as31 - 8031/8051 Assembler.
- *
- * DESCRIPTION:
- * This file contains the lexical tokenizer for the assembler.
- * Since yacc is being used the lexer is called yylex().
- *
- * In order to produce a listing, some record of the users
- * source line must be kept. This is done by adding
- * get_ch(), and unget_ch() routine which returns/ungets a character
- * but also places information into a secret array.
- *
- * When a newline is encountered the text line is returned as
- * an attribute on the '\n' character.
- *
- * REVISION HISTORY:
- * Jan. 19, 1990 - Created. (Ken Stauffer)
- *
- * AUTHOR:
- * All code in this file written by Ken Stauffer (University of Calgary).
- * January, 1990.
- *
- */
-
- #include <stdio.h>
- #include <ctype.h>
- #include "as31.h"
-
- extern union ystack yylval;
- extern int pass;
-
- struct symbol *looksym();
- struct opcode *lookop();
- char *malloc();
- int lineno;
-
- static char line[100],*lineptr=line;
-
- /* ----------------------------------------------------------------------
- * get_ch:
- * Get a character from stdin, place char in line[]
- */
-
- get_ch()
- {
- register int c;
-
- c = getchar();
- if( c != EOF && lineptr - line < sizeof(line) )
- *lineptr++ = c;
- return(c);
- }
-
- /* ----------------------------------------------------------------------
- * unget_ch:
- * Unget a character and move lineptr back by one.
- */
-
- unget_ch(c)
- int c;
- {
- ungetc(c,stdin);
- if( lineptr > line )
- lineptr--;
- }
-
- /* ----------------------------------------------------------------------
- * yylex:
- * The tokens themselves are returned via return(token)
- *
- * Some tokens have attributes. These attributes are returned
- * by setting a global variable yylval:
- *
- * yylval.value
- * numbers (any base)
- * strings (in pass 1).
- * bit positions .0, .1, .2, ...
- *
- * yylval.str
- * strings (in pass 2).
- * '\n' (both passes).
- *
- * yylval.sym
- * User defined symbols.
- *
- * yylval.op
- * Reserved keyword (opcode/directive/misc.)
- *
- * No other fields in yylval are used by yylex().
- *
- * Characters that do not have an attribute do
- * not set anything in the yylval variable.
- *
- */
-
- yylex()
- {
- static nl_flag=0; /* sync. error messages and the cur. line */
- register int c;
- char buf[120]; /* temporary buffer */
- char *p; /* general pointer */
- struct symbol *sym;
- struct opcode *op;
-
- int octal=0,hex=0,decimal=0,binary=0;
- register long value = 0;
-
- if( nl_flag ) {
- nl_flag = 0;
- lineno++;
- }
-
- for(;;) {
- c = get_ch();
- switch(c) {
- case EOF: return(EOF);
- case ' ':
- case '\t':
- break;
-
- case '\n':
- nl_flag = 1;
- yylval.str = line;
- *lineptr = '\0';
- lineptr = line;
- return('\n');
-
- case ';':
- while((c=get_ch()) != EOF && c!='\n');
- nl_flag= 1;
- yylval.str = line;
- *lineptr = '\0';
- lineptr = line;
- return(c);
-
- case '"':
- p = buf;
- while((c=get_ch()) != EOF && c!='"' && c!='\n') {
- if( c == '\\' ) {
- switch(c=get_ch()) {
- case 'n': c = '\n'; break;
- case 'r': c = '\r'; break;
- case 't': c = '\t'; break;
- case 'b': c = '\b'; break;
- case '"': c = '"'; break;
- case '\\': c = '\\'; break;
- default:
- error("Invalid escape character: \\%c",c);
- break;
- }
- }
- if( p-buf<sizeof(buf)-1 )
- *p++ = c;
- else {
- error("String constant longer than %d bytes",
- sizeof(buf));
- }
- }
- *p = '\0';
- if( c == '\n' || c == EOF ) {
- error("String terminated improperly.");
- unget_ch(c);
- }
-
- if(pass1)
- yylval.value = strlen(buf);
- else {
- if( (p = malloc(strlen(buf)+1)) == NULL )
- error("Cannot allocate %d bytes",strlen(buf)+1);
- strcpy(p,buf);
- yylval.str = p;
- }
- return(STRING);
-
- case '.':
- if( (c=get_ch())>='0' && c<='7' ) {
- yylval.value = c-'0';
- return(BITPOS);
- }
- unget_ch(c);
- return('.');
-
- case '\'':
- c = get_ch();
- if( c=='\\' ) {
- switch(c=get_ch()) {
- case 'n': c = '\n'; break;
- case 'r': c = '\r'; break;
- case 't': c = '\t'; break;
- case 'b': c = '\b'; break;
- case '\\': c = '\\'; break;
- case '\'': c = '\''; break;
- default:
- error("Invalid escape character: \\%c",c);
- }
- }
- if( get_ch() != '\'' )
- error("Missing quote in character constant");
- yylval.value = c;
- return(VALUE);
-
- case '0': /* parse a number */
- case '1': /* could be followed by a: */
- case '2': /* 'b','B' - Binary */
- case '3': /* 'h','H' - Hex */
- case '4': /* 'd','D' - Decimal */
- case '5': /* 'o','O' - Octal */
- case '6': /* *** Numbers must start with a digit */
- case '7': /* Numbers could be also preceeded by: */
- case '8': /* 0x - Hex, 0b - binary */
- case '9': /* 0 - Octal */
-
- p = buf;
- do {
- if( p-buf<sizeof(buf)-1 )
- *p++ = c;
- c = get_ch();
- } while( c=='H' || c=='h' || c=='O' || c=='o' ||
- c=='x' || c=='X' || isxdigit(c) );
- unget_ch(c);
- *p = '\0';
-
-
- /* Check any preceeding chars */
- if( buf[0]=='0' && (buf[1]=='x' || buf[1]=='X') ) {
- hex++;
- buf[1] = '0';
- } else if( buf[0]=='0' &&
- (buf[1]=='b' || buf[1]=='B') ) {
- binary++;
- buf[1] = '0';
- }
- else if( buf[0]=='0' ) octal++;
-
- /* check any trailing chars */
- c = *(p-1);
- if( !hex && (c=='b' || c=='B') )
- { binary++; *(p-1) = '\0'; }
- else if( c=='H' || c=='h' )
- { hex++; *(p-1) = '\0'; }
- else if( !hex && (c=='D' || c=='d') )
- { decimal++; *(p-1) = '\0'; }
- else if( c=='O' || c=='o' )
- { octal++; *(p-1) = '\0'; }
- else if( !hex && !octal && !binary) decimal++;
-
- if( binary ) {
- for(p=buf; *p; p++ ) {
- if( *p=='1' ) value = value<<1 + 1;
- else if( *p=='0' ) value = value<<1;
- else
- error("Invalid binary digit: %c",*p);
- }
- yylval.value = value;
- return(VALUE);
- }
-
- if( hex ) {
- for(p=buf; *p; p++ ) {
- value <<= 4;
- if( isdigit(*p) )
- value += *p-'0';
- else if( *p>='a' && *p<='f' )
- value += *p-'a'+ 10;
- else if( *p>='A' && *p<='F' )
- value += *p-'A'+ 10;
- else
- error("Invalid hex digit: %c",*p);
- }
- yylval.value = value;
- return(VALUE);
- }
-
- if( decimal ) {
- for(p=buf; *p; p++ ) {
- if( isdigit(*p) )
- value = value*10 + *p-'0';
- else
- error("Invalid decimal digit: %c",*p);
- }
- yylval.value = value;
- return(VALUE);
- }
-
- if( octal ) {
- for(p=buf; *p; p++ ) {
- if( *p>='0' && *p<='7' )
- value = value<<3 + *p-'0';
- else
- error("Invalid octal digit: %c",*p);
- }
- yylval.value = value;
- return(VALUE);
- }
-
- default:
- if( isalpha(c) || c=='_' ) {
- p = buf;
- do {
- if( p-buf<sizeof(buf)-1 )
- *p++ = c;
- c = get_ch();
- } while( isalnum(c) || c=='_' );
- *p = '\0';
- unget_ch(c);
- if( op = lookop(buf) ) {
- yylval.op = op;
- return(op->type);
- }
- sym = looksym(buf);
- yylval.sym = sym;
- return(SYMBOL);
- } else
- return(c);
- } /* switch */
- } /* for */
-
- } /* yylex */
-