home *** CD-ROM | disk | FTP | other *** search
- { Copyright (C) 1986 by QCAD Systems Incorporated, All Rights Reserved }
-
- program TUPROG(input, output);
-
- { TUSKEL:
-
- A simple Compiler for Turbo }
-
- const
- VERSION_STRING= 'version 1.0';
- STACKSIZE = 60; { maximum size of LR(1) parser stack }
- EOS = 0; { marks end of line in LINE }
- EOFCH = 26; { reader end-of-file character }
- EOLCH = 12; { end of line character }
- CR = 13; { carriage return }
- MAXSTRCHARS = 1000; { maximum string space/procedure }
- MAXERRORS = 20; { maximum errors before aborting }
- HASHSIZE = 167; { hash table size -- prime number! }
- HLIMIT = 166; { limit in hash table (hashsize minus one) }
- MAXTOKLEN = 30; { length of a token or symbol }
- MERR_COUNT= 4; {minimum tokens to scan before reporting errors
- again}
-
- {#C -- constants defined by the parser generator go here }
- IDENT_TOKLEN = #C; { maximum user identifier length }
- MAXRPLEN = #D; { length of longest production right part }
- TERM_TOKS = #E; { number of terminal tokens }
- NTERM_TOKS = #F; { number of nonterminal tokens }
- ALL_TOKS = #G; { term_toks + nterm_toks }
- IDENT_TOKX = #H; { token number of <identifier> }
- INT_TOKX = #I; { token number of <integer> }
- REAL_TOKX = #J; { token number of <real> }
- STR_TOKX = #K; { token number of <string> }
- STOP_TOKX = #L; { token number of stopsign (end-of-file) }
- GOAL_TOKX = #M; { token number of goal }
- EOL_TOKX = #N; { token number of end-of-line }
- READSTATE = #O; { first READ state }
- LOOKSTATE = #P; { first LOOK state }
- MAXSTATE = #Q; { largest state number }
- REDUCELEN = #R; { number of productions }
- RLTOKENS = #S;
- SSTOKENS = #T;
- PRODTOKS = #U;
- TOKCHARS = #V;
- START_STATE = #W; { initial state }
- STK_STATE_1 = #X; { state initially pushed on stack }
- {#> -- end of constants }
-
- type
- INT = -32767..32767;
- STRING1= string[1];
- STRING7 = string[7];
- STRING8 = string[8];
- STRING9 = string[9];
- STRING15 = string[15];
- STRING31 = string[31];
- STRING80 = string[80];
- LONGSTRING = string[255];
- TOKRANGE = 1..term_toks;
-
- {$I tudecls}
-
- type
- SYMTABLE= array [0..hlimit] of symtabp;
- STATE_STACK = array [0..stacksize] of int;
- { Types for parser tables. NB: These type names are used by
- the typed constant generation. }
- STATE_ARRAY = array [1..maxstate] of int;
- REDUCE_ARRAY = array [1..reducelen] of int;
- POP_ARRAY = array [1..reducelen] of byte;
- TOKEN_ARRAY = array [0..rltokens] of byte;
- TOSTATE_ARRAY = array [0..rltokens] of int;
- SS_ARRAY = array [0..sstokens] of int;
- PROD_ARRAY = array [1..prodtoks] of byte;
- TOKX_ARRAY = array [1..all_toks] of int;
- TOKCHAR_ARRAY = array [1..tokchars] of char;
- INSYM_ARRAY = array [1..lookstate] of int;
-
- const
- {#<C -- put typed constants here, if they've been requested }
- { Static parser data structures (parser tables). }
- {#IP}
- {#>}
- {Flag constants}
- {#F }
- #N= #V;
-
- var
- { Dynamic parser data structures }
- STACK: state_stack; { the LR(1) state stack }
- SEM: array [0..stacksize] of semrecp; { semantics stack }
- TOS: int; { index of top of stack }
-
- {#<~C -- the following are redundant if typed constants are used }
- { Static parser data structures (parser tables). }
- STATEX: state_array; { stack top index }
- MAP: reduce_array; { mapping from state to apply numbers }
- POPNO: pop_array; { reduce pop size }
- TOKNUM: token_array; { token list }
- TOSTATE: tostate_array; { read, look states }
- STK_STATE: ss_array;
- STK_TOSTATE: ss_array;
- {#<D -- these are for parser stack dumps. }
- PRODX: reduce_array; { prod index into ... }
- PRODS: prod_array; { token number, index into ... }
- INSYM: insym_array;
- {#> -- end if for debugging. }
- {#> -- end if for typed constants. }
-
- {#<D -- debugging (these cannot be typed constants.) }
- { These guys are for printing tokens in parser stack dumps. }
- TOKX: tokx_array; { token index, index into ... }
- TOKCHAR: tokchar_array; { token characters }
- {#> -- end if for debugging. }
-
- { Lexical and token data }
- LINE: longstring; { source line }
- LX: int; { index of next character in LINE }
- ERRPOS: int; { current token position in LINE }
- PROMPT: string8; { prompt string }
- PROMPTLEN: int; { front-end length for error purposes }
- IS_CONSOLE: boolean; { TRUE if input from console }
- CH: char; { next character from input file }
- TOKEN: int; { Next token in input list }
- LSEMP: semrecp; { current semantics assoc. with token }
- TOKENX: int; { index into TOKARY, LSEMPARY }
- TOKARY: array [0..1] of int; { token queue }
- LSEMPARY: array [0..1] of semrecp;
- ERRSYM: symbol; { special symbol reserved for errors }
- { The next table can be omitted if real numbers are not used. }
- PWR10_2: array [0..8] of real; { Binary powers of ten. }
-
- { Symbol table data }
- SYMTAB: symtable;
-
- { String table space }
- STRTAB: packed array [0..maxstrchars] of char;
- STRX: integer; {next available character slot in STRTAB}
-
- SFILE, RFILE: text; { source, report files }
-
- ERR_COUNT,
- ERRORS: int;
- DEBUG: int; { >0 turns on some tracing }
- TRACE: boolean;
-
- SFILENAME, RFILENAME: string80; { file names }
-
- function NEW_SEM (SEMTYP: semtype): semrecp; forward;
- procedure IDEBUG; forward;
-
- {$I tuutils}
-
- {$I tufiles}
-
- {******************}
- procedure MORE(MSG: string80);
- { print the string, and let the user type
- any character to proceed. }
- var FOO: char;
- begin
- foo := resp(msg)
- end;
-
- {******************}
- procedure REPORT_ERR(MSG: string80);
- begin
- if err_count=0 then begin
- if errpos+promptlen>1 then
- write(rfile, ' ':errpos+promptlen-1);
- writeln(rfile, '^'); { mark error point }
- writeln(rfile, msg);
- end
- end;
-
- {*******************}
- procedure ABORT(MSG: string80);
- begin
- report_err(concat('FATAL ERROR: ', msg));
- writeln('... aborting');
- halt;
- end;
-
- {******************}
- procedure ERROR(MSG: string80);
- begin
- if err_count=0 then begin
- report_err(concat('ERROR: ', msg));
- errors:=errors+1;
- if errors>maxerrors then begin
- err_count:=0;
- abort('Error limit exceeded');
- end
- else
- if (rfilename=default_rfile) then idebug;
- end
- end;
-
- {*******************}
- procedure WARN(MSG: string80);
- begin
- report_err(concat('WARNING: ', msg));
- end;
-
- {$I tusyms}
-
- {********************}
- procedure SYMERROR(SYM: symbol; MSG: string80);
- begin
- error(concat(sym, concat(': ', msg)));
- end;
-
- {#<D -- debugging utilities. }
- {$I tudbug}
-
- {#> -- end debugging stuff. }
- { LEXICAL ANALYZER }
-
- {*******************}
- procedure GETLINE;
- { read the next source line, when nextch exhausts
- the current one. }
-
- {.............}
- procedure GENEOF;
- begin
- line := chr(eofch);
- lx := 1
- end;
-
- {............}
- procedure GRABLINE;
- begin
- readln(sfile, line);
- writeln(rfile, ' ; ', line);
- lx := 1
- end;
-
- begin { getline }
- if is_console then begin
- { prompt if from the console file }
- write(prompt);
- grabline;
- if line = 'EOF' then geneof
- end
- else if eof(sfile) then
- geneof
- else
- grabline;
- { The appended eol character ensures that tokens are broken over
- line endings; they would otherwise be invisible to the scanner.
- eolch allows the string scanner to distinguish ends of lines. }
- line := concat(line, ' ');
- line[length(line)-1]:=chr(eolch);
- end;
-
- {*******************}
- procedure NEXTCH;
- { gets next character from line }
- begin
- if lx > length(line) then
- getline;
- ch := line[lx];
- { don't move past an eof mark }
- if ch <> chr(eofch) then lx := lx+1
- end;
-
- {********************}
- function PEEKCH: char;
- begin
- if lx>length(line) then peekch:=chr(eolch)
- else
- peekch:=line[lx];
- end;
-
- {********************}
- procedure SKIPBLANKS;
- label 1;
-
- {..................}
- function END_COMMENT: boolean;
- begin
- if ch=chr(eofch) then begin
- error('unclosed comment at file end');
- end_comment:=true;
- end
- else
- end_comment:=(ch='}');
- end;
-
- begin {comments open on { and close on close-brace }
- 1:
- while ch=' ' do nextch;
- if (ch='{') then begin
- while not(end_comment) do nextch;
- nextch;
- goto 1;
- end
- end;
-
- {***********************}
- procedure GET_SYMBOL;
- {collects Pascal-style identifiers,
- stuffed into symbol table under IDENT tag}
- var SYM: symbol;
- STP: symtabp;
- begin
- sym:='';
- { Keep snarfing alphanumeric characters. Up to the first
- maxtoklen of them will be put in the symbol spelling. }
- while ch in ['a'..'z', 'A'..'Z', '0'..'9', '_'] do begin
- if length(sym) <= maxtoklen then begin
- sym:=concat(sym, ' ');
- sym[length(sym)] := upcase(ch);
- end;
- nextch;
- end;
- stp := makesym(symtab, sym, user, 0); { the default level is 0 }
- if (stp^.symt=reserved) then
- token:=stp^.tokval
- else begin
- lsemp:=new_sem(ident);
- with lsemp^ do begin
- symp := stp;
- token := ident_tokx;
- end
- end
- end;
-
- {************************}
- procedure GET_NUMBER;
- var V: integer;
- begin
- v:=0;
- while ch in ['0'..'9'] do begin
- v:=10*v + ord(ch) - ord('0');
- nextch;
- end;
- token:= int_tokx;
- lsemp:=new_sem(fixed);
- lsemp^.numval:=v;
- end;
-
- {***********************}
- procedure GET_STRING;
- label 1, 99;
-
- {..................}
- procedure PUTCH(CH: char);
- begin
- if strx<maxstrchars then begin
- strtab[strx]:=ch;
- strx:=strx+1;
- end
- else
- if strx=maxstrchars then begin
- error('too many string characters');
- strtab[maxstrchars]:=chr(0);
- strx:=strx+1;
- end
- end;
-
- begin
- lsemp:=new_sem(strng);
- lsemp^.stx:=strx;
- 1:
- nextch; {get over first quote mark}
- while ch<>'''' do begin
- if ch=chr(eolch) then begin
- error('string runs over end of line');
- goto 99;
- end;
- putch(ch);
- nextch;
- end;
- nextch; {get over last quote mark}
- if ch='''' then begin {repeated quote mark}
- putch(ch);
- goto 1; {continue grabbing characters}
- end;
- 99:
- putch(chr(0));
- token:=str_tokx;
- end;
-
- {********************}
- procedure GET_TOKEN;
- { Pascal-style lexical analyzer -- sets TOKEN to token number }
- begin
- lsemp:= nil; { default case }
- skipblanks;
- errpos:=lx-1;
- case ch of
- 'a'..'z', 'A'..'Z': get_symbol;
- '0'..'9': get_number;
- '''': get_string;
- {#<D -- if debugging, invoke idebug on a bang (or other char). }
- '!': begin
- idebug;
- nextch;
- get_token
- end;
- {#>}
- {#G special symbol cases go here }
- ELSE begin
- if ch=chr(eofch) then
- token := stop_tokx
- else if ch=chr(eolch) then begin
- nextch;
- {#<E end-of-line token dealt with here }
- token := eol_tokx { accept an end-of-line token }
- {#:}
- get_token { go find another (significant) character }
- {#>}
- end
- else begin
- error('invalid character');
- nextch;
- get_token;
- end
- end { case alternatives }
- end; { case }
- if err_count>0 then err_count:=err_count-1;
- end { get_token };
-
- {*******************}
- procedure NEXT_TOKEN;
- begin
- if tokenx>1 then begin
- tokenx := 1;
- get_token; { goes into token, lsemp }
- tokary[1] := token;
- lsempary[1] := lsemp;
- end
- else begin
- { is in tokary }
- token := tokary[tokenx];
- lsemp := lsempary[tokenx];
- end
- end;
-
- {*****************}
- procedure TOKENREAD;
- begin
- tokenx := tokenx+1;
- end;
-
- {$I tusems}
-
- { LR(1) PARSER procedures }
-
- {****************}
- function ERROR_RECOVERY(var MSTACK: state_stack;
- var MSTACKX: int; MCSTATE: int): int;
- label 99, 100;
- var STACK: state_stack; { local copy of stack }
- TOS, { local stack pointer }
- CSTATE, { local state }
- JSTX, { temporary stack limit }
- RX, TL: int; { index into TOKNUM table }
-
- {...............}
- procedure COPY_STACK;
- var STX: int;
- begin
- if (jstx<0) or (jstx>mstackx) then abort('ERROR RECOVERY BUG');
- for stx := 0 to jstx do
- stack[stx] := mstack[stx];
- tos := jstx;
- if jstx=mstackx then
- cstate := mcstate
- else
- cstate := mstack[jstx+1];
- end;
-
- {...............}
- procedure PUSHREAD(CSTATE: int);
- { adjusts the state stack }
- begin
- tos := tos+1;
- if tos>stacksize then
- abort('stack overflow');
- stack[tos] := cstate;
- end;
-
- {...............}
- function TRIAL_PARSE: boolean;
- { parses from current read state through the inserted and the
- error token; if successful, returns TRUE. }
- label 99;
- var RX: int;
- begin
- trial_parse := true; { until proven otherwise }
- while cstate<>0 do begin
- if cstate < readstate then begin
- { a reduce state }
- {#<D dump if debugging enabled. }
- if debug > 3 then stk_dump('E*Reduce', stack,
- tos, cstate);
- {#> end conditional. }
- if popno[cstate]=0 then begin
- { empty production }
- pushread(stk_state[statex[cstate]]);
- cstate := stk_tostate[statex[cstate]];
- end
- else begin
- { non-empty production }
- tos := tos - popno[cstate] + 1;
- rx := statex[cstate]; { compute the GOTO state }
- cstate := stack[tos];
- while (stk_state[rx]<>cstate) and
- (stk_state[rx]<>0) do rx := rx+1;
- cstate := stk_tostate[rx];
- end
- end
- else if cstate < lookstate then begin
- { a read state }
- next_token; { need a token now }
- {#<D dump if debugging enabled. }
- if debug > 3 then stk_dump('E*Read', stack, tos, cstate);
- {#> end conditional. }
- rx := statex[cstate];
- while (toknum[rx]<>0) and
- (toknum[rx]<>token) do rx := rx+1;
- if toknum[rx]=0 then begin
- { failure }
- trial_parse := false;
- goto 99;
- end
- else begin
- { did read something }
- pushread(cstate);
- cstate := tostate[rx];
- tokenread; { scan the token }
- if tokenx>1 then goto 99 { successful }
- end
- end
- else begin
- { lookahead state }
- next_token; { need a token now }
- {#<D dump if debugging enabled. }
- if debug > 3 then stk_dump('E*Look', stack, tos, cstate);
- {#> end conditional. }
- rx := statex[cstate];
- while (toknum[rx]<>0) and
- (toknum[rx]<>token) do rx := rx+1;
- cstate := tostate[rx];
- end
- end;
- 99:
- end;
-
- {.................}
- procedure INCR_ERRSYM;
- { Note that this procedure assumes ASCII. }
- begin
- if errsym[6]='Z' then begin
- errsym[5] := succ(errsym[5]);
- errsym[6] := 'A';
- end
- else
- errsym[6] := succ(errsym[6]);
- end;
-
- {.................}
- procedure MAKE_DEFAULT(TOKX: int; var SEMP: semrecp);
- { creates a default token data structure }
- var SYM: symbol; TSYMP: symtabp;
- begin
- case tokx of
- ident_tokx: begin
- tsymp := makesym(symtab, errsym, symerr, 0);
- semp:=new_sem(ident);
- with semp^ do begin
- symp:=tsymp;
- incr_errsym;
- end
- end;
- int_tokx: begin
- semp:=new_sem(fixed);
- semp^.numval:=1;
- end;
- ELSE
- semp:=nil;
- end { case tokx }
- end;
-
- begin { ERROR_RECOVERY }
- if debug > 3 then writeln(rfile, 'Going into ERROR RECOVERY');
- while true do begin
- jstx := mstackx;
- while jstx>=0 do begin
- copy_stack;
- rx := statex[cstate];
- while toknum[rx]<>0 do begin
- { scan through legal next tokens }
- if debug > 3 then writeln(rfile, '...starting trial parse');
- tokary[0] := toknum[rx]; { the insertion }
- tokenx := 0;
- if trial_parse then goto 99; { it clicked! }
- rx := rx+1;
- if toknum[rx]<>0 then
- copy_stack;
- end;
- jstx := jstx-1; { reduce stack }
- end;
- if token=stop_tokx then begin
- { empty stack, no more tokens }
- cstate := 0; { halt state }
- tokenx := 2;
- jstx := 0; { bottom of stack }
- goto 100;
- end;
- {#<D}
- if debug > 3 then begin
- write(rfile, '...dropping token ');
- tl := wrtok(tokary[1]);
- writeln(rfile);
- end;
- {#>}
- tokenx := 2;
- next_token;
- {#<D}
- if debug > 3 then begin
- write(rfile, 'New token ');
- tl := wrtok(token);
- writeln(rfile);
- end
- {#>}
- end;
- 99: { found a solution }
- copy_stack;
- {#<D}
- if debug > 3 then begin
- write(rfile, 'insertion of ');
- tl := wrtok(tokary[0]);
- writeln(rfile, ' succeeded');
- end;
- {#>}
- make_default(tokary[0], lsempary[0]);
- tokenx := 0; { forces a `real' rescan of the insertion }
- if jstx<mstackx then
- cstate := stack[jstx+1]
- else
- cstate := mcstate; { cstate returned }
- 100:
- error_recovery := cstate;
- mstackx := jstx;
- if debug > 3 then writeln(rfile, 'Ending error recovery');
- end;
-
- {****************}
- procedure PARSER;
- { Carries out a complete parse, until
- the halt state is seen -- same as empty stack}
- var CSTATE, RX: int;
- TSEMP: semrecp;
-
- {...............}
- procedure PUSHREAD(CSTATE: int; SEMP: semrecp);
- { do the push part of a readstate. }
- begin
- tos := tos+1;
- if tos>stacksize then
- abort('stack overflow');
- sem[tos] := semp;
- stack[tos] := cstate;
- end;
-
- begin
- cstate := start_state;
- tos := -1;
- pushread(stk_state_1, nil);
- while cstate<>0 do begin
- tsemp:=nil;
- if cstate < readstate then begin
- { a reduce state }
- {#<D dump if debugging enabled. }
- if debug > 0 then stk_dump('Reduce', stack, tos, cstate);
- {#> end conditional. }
- if map[cstate] <> 0 then
- { the semantics action }
- apply(map[cstate], tsemp);
- if popno[cstate]=0 then begin
- { empty production }
- pushread(stk_state[statex[cstate]], tsemp);
- cstate := stk_tostate[statex[cstate]];
- end
- else begin
- { non-empty production:
- semantics is preserved on a unit production A --> w,
- where |w| = 1, unless something is in TSEMP. Note that
- if w is nonterminal, the production may be bypassed. }
- tos := tos - popno[cstate] + 1;
- if (tsemp<>nil) or (popno[cstate]<>1) then
- sem[tos] := tsemp;
- { compute the GOTO state }
- rx := statex[cstate];
- cstate := stack[tos];
- while (stk_state[rx]<>cstate) and (stk_state[rx]<>0) do
- rx := rx+1;
- cstate := stk_tostate[rx];
- end
- end
- else if cstate < lookstate then begin
- { a read state }
- next_token; { need next token now }
- {#<D dump if debugging enabled. }
- if debug > 2 then stk_dump('Read', stack, tos, cstate);
- {#> end conditional. }
- rx := statex[cstate];
- while (toknum[rx]<>0) and (toknum[rx]<>token) do
- rx := rx+1;
- if toknum[rx]=0 then begin
- error('syntax error');
- cstate := error_recovery(stack, tos, cstate);
- err_count:=merr_count;
- end
- else begin
- pushread(cstate, lsemp);
- cstate := tostate[rx];
- tokenread; { token has been scanned }
- end
- end
- else begin
- { lookahead state }
- next_token; { need another token now }
- {#<D dump if debugging enabled. }
- if debug > 2 then stk_dump('Look', stack, tos, cstate);
- {#> end conditional. }
- rx := statex[cstate];
- while (toknum[rx]<>0) and (toknum[rx]<>token) do
- rx := rx+1;
- cstate := tostate[rx];
- end
- end;
- end_sem;
- end;
-
- { PARSE INITIALIZATION }
-
- {*****************}
- procedure INITTABLES;
- var SX: int;
- CODE_SYMP: symtabp;
-
- {................}
- procedure PUTSYM(TSYM: symbol; TV: int);
- var SYMP: symtabp;
- I: int;
- begin
- symp:=makesym(symtab, tsym, reserved, -1);
- symp^.tokval:=tv;
- end;
-
- {#<D also need to init debugging tables? }
- {................}
- procedure PUTTOK(PRINTVAL: string80; TOKNUM, START: int);
- { this procedure is used to initialize the token tables.
- toknum is the number of the token to be initialized, and
- start is where it should start in the tokchar array. }
- var OFFSET: int;
- begin
- tokx[toknum] := start;
- for offset := 0 to length(printval)-1 do
- tokchar[start+offset] := printval[offset+1];
- tokchar[start+length(printval)] := chr(0)
- end;
- {#> end puttok insertion. }
-
- {................}
- procedure INIT_PARSER_TABLES;
- { initialize the parser tables }
- begin
- {#IS inline symbol table inits. }
- {#<A assignment style inits? }
- {#IP do the parser tables inline. }
- {#> end assignment inits. }
- {#<D debugging? }
- {#IT do the token tables inline. }
- {#> end debugging }
- end { init_parser_tables };
-
- begin { inittables }
- pwr10_2[0] := 1E1; {10^(2^0)}
- pwr10_2[1] := 1E2; {10^(2^1)}
- pwr10_2[2] := 1E4;
- pwr10_2[3] := 1E8;
- pwr10_2[4] := 1E16;
- pwr10_2[5] := 1E32;
- lsempary[0]:=nil;
- lsempary[1]:=nil;
- lsemp := nil;
- tokenx := 2; { no token queue }
- for sx := 0 to hlimit do begin
- symtab[sx] := nil; { initialize symbol tables }
- end;
- for sx := 0 to stacksize do begin
- sem[sx]:=nil;
- end;
- init_parser_tables;
- init_sem;
- errsym:='ERR#AA';
- strx:=0;
- line := ''; { fake a new line }
- lx := 1;
- errpos:=1;
- err_count:=0;
- nextch; { fetch the first character, forcing a line read }
- end;
-
- {**********************}
- function OPENFILES: boolean;
- begin
- openfiles:=false;
- write('Source file? ');
- readln(sfilename);
- prompt:='';
- promptlen:=3; {length of ` ; '}
- if sfilename='' then begin
- prompt:='> ';
- sfilename:=default_sfile;
- is_console:=true;
- promptlen:=promptlen+2;
- end
- else is_console:=false;
- if openfile(sfile, sfilename, false) then begin
- write('Target file? ');
- readln(rfilename);
- if rfilename='' then rfilename:=default_rfile;
- openfiles:=openfile(rfile, rfilename, true);
- end
- end;
-
- begin
- writeln('Tiny Pascal Compiler, ', version_string);
- writeln;
- errors := 0;
- debug := 0;
- trace := false;
- if openfiles then begin
- inittables;
- parser; { does it all }
- closefiles;
- end
- end.
-
-