home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Power-Programmierung
/
CD1.mdf
/
pascal
/
compcomp
/
tpyacc
/
paslex.l
< prev
next >
Wrap
Text File
|
1991-04-30
|
5KB
|
163 lines
%{
(* PASLEX.L: lexical analyzer for Pascal, adapted to TP Yacc, 2-28-89 AG *)
%}
%{
(*
* lex input file for pascal scanner
*
* extensions: to ways to spell "external" and "->" ok for "^".
*
*)
%}
%{
(* Note: Keywords are determined by scanning a keyword table, rather
than including the keyword patterns in the Lex source which is done
in the original version of this file. I prefer this method, because
it makes the grammar itself more readable (handling case-insensitive
keywords in Lex is quite cumbersome, e.g., you will have to write
something like [Aa][Nn][Dd] to match the keyword `and'), and also
produces a more (space-) efficient analyzer (184 states and 375
transitions for the keyword pattern version, against only 40 states
and 68 transitions for the keyword table version). *)
procedure commenteof;
begin
writeln('unexpected EOF inside comment at line ', yylineno);
end(*commenteof*);
function upper(str : String) : String;
(* converts str to uppercase *)
var i : integer;
begin
for i := 1 to length(str) do
str[i] := upCase(str[i]);
upper := str
end(*upper*);
function is_keyword(id : string; var token : integer) : boolean;
(* checks whether id is Pascal keyword; if so, returns corresponding
token number in token *)
const
id_len = 20;
type
Ident = string[id_len];
const
(* table of Pascal keywords: *)
no_of_keywords = 39;
keyword : array [1..no_of_keywords] of Ident = (
'AND', 'ARRAY', 'BEGIN', 'CASE',
'CONST', 'DIV', 'DO', 'DOWNTO',
'ELSE', 'END', 'EXTERNAL', 'EXTERN',
'FILE', 'FOR', 'FORWARD', 'FUNCTION',
'GOTO', 'IF', 'IN', 'LABEL',
'MOD', 'NIL', 'NOT', 'OF',
'OR', 'OTHERWISE', 'PACKED', 'PROCEDURE',
'PROGRAM', 'RECORD', 'REPEAT', 'SET',
'THEN', 'TO', 'TYPE', 'UNTIL',
'VAR', 'WHILE', 'WITH');
keyword_token : array [1..no_of_keywords] of integer = (
_AND, _ARRAY, _BEGIN, _CASE,
_CONST, _DIV, _DO, _DOWNTO,
_ELSE, _END, _EXTERNAL, _EXTERNAL,
(* EXTERNAL: 2 spellings (see above)! *)
_FILE, _FOR, _FORWARD, _FUNCTION,
_GOTO, _IF, _IN, _LABEL,
_MOD, _NIL, _NOT, _OF,
_OR, _OTHERWISE, _PACKED, _PROCEDURE,
_PROGRAM, _RECORD, _REPEAT, _SET,
_THEN, _TO, _TYPE, _UNTIL,
_VAR, _WHILE, _WITH);
var m, n, k : integer;
begin
id := upper(id);
(* binary search: *)
m := 1; n := no_of_keywords;
while m<=n do
begin
k := m+(n-m) div 2;
if id=keyword[k] then
begin
is_keyword := true;
token := keyword_token[k];
exit
end
else if id>keyword[k] then
m := k+1
else
n := k-1
end;
is_keyword := false
end(*is_keyword*);
%}
NQUOTE [^']
%%
%{
var c : char;
kw : integer;
%}
[a-zA-Z]([a-zA-Z0-9])* if is_keyword(yytext, kw) then
return(kw)
else
return(IDENTIFIER);
":=" return(ASSIGNMENT);
'({NQUOTE}|'')+' return(CHARACTER_STRING);
":" return(COLON);
"," return(COMMA);
[0-9]+ return(DIGSEQ);
"." return(DOT);
".." return(DOTDOT);
"=" return(EQUAL);
">=" return(GE);
">" return(GT);
"[" return(LBRAC);
"<=" return(LE);
"(" return(LPAREN);
"<" return(LT);
"-" return(MINUS);
"<>" return(NOTEQUAL);
"+" return(PLUS);
"]" return(RBRAC);
[0-9]+"."[0-9]+ return(REALNUMBER);
")" return(RPAREN);
";" return(SEMICOLON);
"/" return(SLASH);
"*" return(STAR);
"**" return(STARSTAR);
"->" |
"^" return(UPARROW);
"(*" |
"{" begin
repeat
c := get_char;
case c of
'}' : ;
'*' : begin
c := get_char;
if c=')' then exit else unget_char(c)
end;
#0 : begin
commenteof;
exit;
end;
end;
until false
end;
[ \n\t\f] ;
. return(ILLEGAL);