OS/2 Shareware BBS: 10 Tools

home *** CD-ROM | disk | FTP | other *** search

/ OS/2 Shareware BBS: 10 Tools / 10-Tools.zip / lifeos2.zip / LIFE-1.02 / LIB / TOKENIZE.LF < prev next >

Wrap

Text File | 1996-06-04 | 14KB | 650 lines

% $Id: tokenizer.lf,v 1.2 1994/12/09 00:26:37 duchier Exp $ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % TOKENIZER FOR LIFE % (in wild_life) % % This file contains a complete tokenizer for Life programs. The obtained % tokens are used as inputs by the life parser in life (see parser.lf) % % Tokens are of the following types: % - variable(X) where X is the name of the variable; % - construct(X) represents a constructor X. % The type of a constructor is a subsort of construct: numb, chaine, or % atom. X is the "value" of the atom (string, number, or unevaluated atom) % - any syntactic object like "[" or "?", or defined by syntact_object(X) % % The dot may be tokenized in three different ways, depending on the context in % which it appears: % - It is not returned as a token if it occurs inside a floating point % number; % - It is returned as a syntactic object "." if it is followed by a void % character (tab, nl, space, or end_of_file) % - it is returned as atom(".") otherwise. % % The tokenizer is written as an attribute grammar, using the grammar % translator. It reads two characters in advance. % % Use of this file: % tokenize(Filename) ? % reads in the file Filename and writes the obtained tokens in the file % Filename_toks. % % All the necessary files are automatically loaded if they are in the same % directory. % % % Author: Bruno Dumant % % Copyright 1992 Digital Equipment Corporation % All Rights Reserved % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% module("tokenizer") ? public( atom,construct,numb,chaine,syntact_object,variable, tokenize,first_token,rest_token,next_token, rest_chars) ? %%% load utilities import("accumulators") ? %%% set the right function for handling terminals in the grammar. set_C( token_C) ? %%% changed for efficiency acc_info(dcg,Term, Xs, Ys, acc_pred => 'C'(Term,false,Xs,Ys), in_name => 0, out_name => rest)? token_C([],true,Xs,Ys) -> succeed | Xs = Ys. token_C([],false,Xs,Ys) -> Xs = Ys. token_C([A],true,Xs,Ys) -> (`evalin(D) = Ys) | Xs = [A|D]. token_C([A],false,Xs,Ys) -> ( Xs = [A|D], `evalin(D) = Ys ). %%% Types. non_strict(atom) ? atom <| construct. numb <| construct. chaine <| construct. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % Grammar of the tokenizer. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % Main Predicates % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% token(0 => []) :- !, fail. token(0 => W:[A|B], T, rest => R) :- ( A >= 97 and A =< 122,!, non_quoted_atom(SA,0 => W,rest => R), cond( is_syntactic(SA), T = SA, ( U = str2psi(SA,current_module), T = atom(U)) ) ; A >= 65 and A=< 90,!, variable(V, 0 => W,rest => R), T = variable(str2psi(V)) ; cond( A >= 48 and A =< 57, ( number(N,0 => W,rest => R), T = numb(N) ), str2psi(strcon("tk",int2str(A))) & @(T,0 => W,rest => R) ) ). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % First Character % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % variables % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% tk95(T) --> % variables starting with _ [_], var_chars(Y), { ( Y $== "",!, T = atom(@) % _ is @ ) ; T = variable(str2psi(strcon("_",Y))) } . variable(X) --> % variables starting with [Y], % a capital letter var_chars(Z), { X = strcon(chr(Y),Z) } . var_chars(Z) --> simple_atom_chars(Y), ( primes(P),!, % variables may end with ' { Z = strcon(Y,P) } ; { Z = Y } ). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % syntactic objects % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% tk40( "(") --> [_] . tk41( ")") --> [_] . tk63( "?") --> [_] . tk123( "{") --> [_] . tk125( "}") --> [_] . %%% "." is a syntactic dot, or the special operator ".": the distinction is %%% done thanks to the following character. tk46( ".",0 => [46]) --> %%% end of file [_] , ! . %%%tk46( T,0 => [46,K|L]) --> % . is followed by space, tab, %%% % percent or newline. %%% { cond( has_feature(K,void_table), %%% T = ".", %%% T = atom(".") %%% ) %%% }, %%% [_] . tk46( T,0 => [46,K|L]) --> ( has_feature(K,void_table),!, { T = "." }, [_] ; tk46bis(T) ). tk46bis(T) --> [_], op_atom_chars(Z), { X = strcon(".",Z), ( is_syntactic(X),!, T = X ; U = str2psi(X,current_module), T = atom(U) ) } . tk91(T) --> [_], ( [124],!, ( [93],!, { T = "[|]"} ; {T = "[|"} ) ; {T = "["} ) . tk93( "]") --> [_] . %% special case: "|]" tk124(T) --> [_], ( [93],!, { T = "|]"} ; op_atom_chars(Z), { X = strcon("|",Z), cond( is_syntactic(X), T = X, ( U = str2psi(X,current_module), T = atom(U)) ) } ) . %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % constructors % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%% simple atoms non_quoted_atom(X) --> [Y], simple_atom_chars(Z), { X = strcon(chr(Y),Z) } . %%% quoted atoms tk39(T) --> [_], quoted_atom_end(X), { cond( is_syntactic(X), T = X, ( U = str2psi(X,current_module), T = atom(U) ) ) } . %%% Numbers: it is necessary to read two characters in advance. number(X, 0 => W, rest => R) :- digits( V1, 0 => W, rest => R1), ( R1 = [46,D|R2], digits(0 => [D|R2],V2,length => L2,rest => R3), R4 = evalin(R3), Vint = V1 + V2 * 10^(-L2), ! ; Vint = V1, R4 = R1 ), ( R4 = [{101;69}|R5],!, %% e or E exponent(E,0 => evalin(R5),rest=> R), X = Vint * 10^(E) ; X = Vint, R4 = R ). %%% Strings tk34(chaine(X)) --> [_], char_chaine_end(X) . %%% op_atoms gen_op_char_ass_pred_def(Char) :- S = chr(Char), str2psi(strcon("tk",int2str(Char))) = PredName, Head = PredName & @(T), ( Head --> [_], op_atom_chars(Z), { X = strcon(S,Z), cond( is_syntactic(X), T = X, ( U = str2psi(X,current_module), T = atom(U)) ) } ). maprel(gen_op_char_ass_pred_def, [33,35,36,37,38,42,43,45,47,58,60,61,62,92,94,126]) ? %%% special cases: @ , ; ` tk64( atom(@)) --> [_] . tk96( atom(`)) --> [_] . tk44( atom(,)) --> [_] . tk59( atom(;)) --> [_] . %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % Other Characters % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%% void chars void_chars --> % space, tab, new line [{9;10;32}],!, void_chars . void_chars --> % comments [37],!, comment_chars, void_chars . void_chars --> % nested comments nested_comments, void_chars . void_chars --> [] . comment_chars --> [10], ! . % a comment ends with a nl. comment_chars --> [X],!, comment_chars . comment_chars --> [] . % nothing left to read %%% nested_comments nested_comments( 0 => [47,42|_]) --> [_],[_], end_nested_comments . end_nested_comments( 0 => [42,47|_]) --> [_],[_],! . end_nested_comments --> ( nested_comments ; [_] ), end_nested_comments . %%% simple atom characters simple_atom_chars(Z) --> simple_atom_char(X), !, simple_atom_chars(Y), { Z = strcon(X,Y)} . simple_atom_chars("") --> [] . simple_atom_char(X, 0 => [Y|R1], rest => R2) :- %% Y >= 48 and Y =< 57 % chiffre %% or Y >= 65 and Y =< 90 % majuscule %% or Y =:= 95 % underscore %% or Y >= 97 and Y =< 122, % minuscule %% R2 = evalin(R1), %% X = chr(Y). has_feature(Y,simple_atom_table,X), R2 = evalin(R1). at_least_1_simple_atom_char(Z) --> simple_atom_char(X), !, simple_atom_chars(Y), { Z = strcon(X,Y) } . %%% primes primes(P) --> [39], ( primes(Q), {P = strcon("'",Q), !} ; {P = "'"} ). %%% quoted atoms quoted_atom_end(X) --> [39], !, ( [39], !,quoted_atom_end(Y), X = strcon("'",Y) ; { X = "" } ) . quoted_atom_end(X) --> any_char(Y), quoted_atom_end(Z), { X = strcon(Y,Z)} . %%% numbers digits(V, length=>L) --> digit( V1), ( digits(V2,length => L2),!, { L = L2+1, V = V1*10^L2 + V2} ; { V = V1, L = 1}) . digit(0 => [48+N|R], N, rest => Rest) :- N =< 9 and N >= 0, Rest = evalin(R). exponent(V) --> sign(S), digits(V1), {!, V = S*V1} . sign(1) --> [] . sign(-1) --> [45], ! . sign(1) --> [43] . %%% strings char_chaine_end(X) --> [34], !, ( [34], !,char_chaine_end(Y), {X = strcon("""",Y)} ; { X = "" }) . char_chaine_end(X) --> any_char(Y), char_chaine_end(Z), { X = strcon(Y,Z)} . %%% characters for "operators" op_atom_char(X, 0=> [Y|R1], rest => R2) :- has_feature(Y,op_chars_table,X), R2 = evalin(R1). op_atom_chars(X) --> op_atom_char(Y),!, op_atom_chars(Z), { X = strcon(Y,Z)} . op_atom_chars("") --> [] . %%% any character any_char(Y) --> [X], { Y = chr(X) } . % % reset C function % reset_C ? %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % characters tables % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% gen_char_table(Table,[A|B]) :- !, Table.A <<- chr(A), gen_char_table(Table,B). gen_char_table. persistent(void_table) ? gen_char_table(void_table,[9,10,32,37]) ? persistent(simple_atom_table) ? gen_char_table(simple_atom_table, [48,49,50,51,52,53,54,55,56,57,65,66,67,68,69,70, 71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88, 89,90,95,97,98,99,100,101,102,103,104,105,106,107, 108,109,110,111,112,113,114,115,116,117,118,119,120,121,122]) ? persistent(op_chars_table) ? gen_char_table(op_chars_table,[33,35,36,37,38,42,43,45,46,47, 58,60,61,62,92,94,124,126]) ? %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % definition of new syntactic objects % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% is_syntactic(X) -> has_feature(X,syntact_objects_table). persistent(syntact_objects_table) ? syntact_object(X) :- is_value(X),!, nl_err, write_err( "*** Error: numbers or strings cannot be syntactic objects."), nl_err ; syntact_objects_table.X = true. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % char handler % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% persistent(read_tok) ? read_tok <<- false ? persistent(rest_chars) ? rest_chars <<- [] ? persistent(rest_token) ? rest_token <<- none ? next_char -> L | get(X), cond( X :=< end_of_file , L=[] , L = [X|T] ), T = `next_char. next_token -> L | ( cond( R:copy_term(rest_chars) = [A,B], Chars = [A,B|`next_char], Chars = R ), call_once(read_new_token(Tok, Chars)) = TT, ( TT :== false, !, fail ; Tok :== none, !, L = [] ; rest_token <<- `Tok, fail ) ; L = [copy_term(rest_token)|`next_token] ). first_token -> L | FC = next_char, NC = evalin(FC), %% 2 characters must be read read_new_token(Tok,NC), cond( Tok :== none, L = [], L = [Tok|T]), T = `next_token. read_new_token( Tok, X) :- void_chars(0 => X, rest => R1),!, ( R1 = [], !, Tok = none ; token( 0 => R1, Tok, rest => R2), ( R2 = [A,B|R],!, rest_chars <<- [A,B] ; rest_chars <<- R2 ) ). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % Interface Predicates % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% tokens(true,X,S1) :- read_tok <<- false, void_chars(0 => X, rest => R1),!, ( R1 = [], !, open_out("stdout",S), nl,nl, write("*** File '",S1.input_file_name,"' tokenized"), nl ; ( token( 0 => R1, T, rest => R2), nl,writeq(T), read_tok <<- true, cond( R2 = [A,B|_], rest_chars <<- [A,B], rest_chars <<- R2), fail ; cond( R:copy_term(rest_chars) = [A,B], Chars = [A,B|`next_char], Chars = R ), tokens(read_tok,Chars,S1) ) ). tokens(false,_,S1) :- open_out("stdout",S), nl_err,nl_err, write_err("*** Token error near line ",S1.line_count, " in file '",S1.input_file_name,"'"), nl_err. tokenize(File:string) :- open_in(File,S1), open_out(strcon(File,"_toks"),S2), FC = next_char, NC = evalin(FC), tokens(true,NC,S1), close(S1), close(S2). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%