home *** CD-ROM | disk | FTP | other *** search
- (( This is Capitalization Figure 2 -- Modifications To EDITCONTROL
- Modules ))
-
- (* THESE GO JUST BEFORE THE INITIALIZATION SECTION, NEAR THE END *)
-
- PROCEDURE Scan; (* Capitalizes keywords and autoindents next line *)
- BEGIN
- scan(CurFilePtr^,TRUE);(* CAP the keywords in the file *)
- RedoLine; (* update the displayed line *)
- InsOneChar(CR); (* put in a CR *)
- scan(CurFilePtr^,TRUE);(* put the intro in the file *)
- RedoLine; (* update the displayed line *)
- END Scan;
- PROCEDURE AutoIndent; (* Autoindents next line *)
- BEGIN
- scan(CurFilePtr^,FALSE);(* Get the intro for the line *)
- InsOneChar(CR); (* put in a CR *)
- scan(CurFilePtr^,FALSE);(* put the intro in the file *)
- RedoLine; (* update the displayed line *)
- END AutoIndent;
-
- ((********************************************************************))
-
- (( This is Capitalization Figure 4 -- The New Module Implementation ))
-
-
- IMPLEMENTATION MODULE Scanner;
-
- (* based on the FSM approach given by Gary A. Ford and Richard S. Wiener,
- MODULA-2 A SOFTWARE DEVELOPMENT APPROACH, Wiley, 1985, page 350 *)
-
- FROM MakeEdits IMPORT EditFile, DelChars, InsChars, BackOneChar,
- ForwardOneChar, GetCurrentChar;
- ;
- CONST
- bufsize = 15; (* Max length for Modula-2 'symbol' *)
- introsize = 40; (* Half screen width - to allow 40 spaces and/or tabs *)
- LF = 0ax; (* Last character in EOL marker in file *)
- SPACE = 20x; (* White space characters *)
- TAB = 09x;
-
- TYPE
- (* These are the states of the Finite State Machine *)
- states = (start, insym, instr, incom, encom, excom);
- barray = ARRAY [1 .. bufsize] OF CHAR;
- iarray = ARRAY [1 .. introsize] OF CHAR;
- CHARSET = SET OF CHAR;
-
- VAR (* all variables are global in this module *)
- state : states; (* state of FSM *)
- ch : CHAR; (* current character in buffer *)
- delim : CHAR; (* holds string delimiter *)
- buffer : barray; (* symbol buffer *)
- intro : iarray; (* intro buffer *)
- buflen : CARDINAL;(* number of characters in symbol *)
- introlen : CARDINAL;(* number of characters in intro to line *)
- j,k,l : CARDINAL;(* counters for various loops *)
- symset : CHARSET; (* the set of characters used in symbols *)
- flag,sec : BOOLEAN; (* flag is gnl purpose BOOLEAN, sec is for pass *)
- sp : ARRAY [2 .. 15] OF CARDINAL;(* Array of indexes into the CHAR array *)
- a2 : ARRAY [0 .. 13] OF CHAR; (* this is really one big CHAR array *)
- a3 : ARRAY [0 .. 57] OF CHAR; (* but there is no easy way to do *)
- a4 : ARRAY [0 .. 75] OF CHAR; (* initialization on any array of *)
- a5 : ARRAY [0 .. 49] OF CHAR; (* 350+ characters - so I do it on *)
- a6 : ARRAY [0 .. 41] OF CHAR; (* ten smaller arrays. Obviously this*)
- a7 : ARRAY [0 .. 35] OF CHAR; (* requires that bounds checking be *)
- a8 : ARRAY [0 .. 15] OF CHAR; (* turned off and that arrays be *)
- a9 : ARRAY [0 .. 17] OF CHAR; (* be stored contiguously in memory. *)
- a10: ARRAY [0 .. 29] OF CHAR;
- a14: ARRAY [0 .. 15] OF CHAR;
-
- PROCEDURE scan(VAR f:EditFile; full:BOOLEAN);
- (*
- This procedure alternates between scanning a line and writing the
- intro from the line just scanned - IF full THEN auto capitalization
- is performed as part of the first pass. ELSE the first pass just
- gets the intro string for use on the second pass. f is the file
- being edited.
- *)
-
- PROCEDURE check ():BOOLEAN;
- (*
- this procedure checks a symbol to see if it should be capitalized
- *)
-
- BEGIN
- flag := FALSE; (* will be set true if match on all characters *)
- (* Put a marker after the end of the symbol to stop comparison *)
- buffer[buflen+1] := '!';
- (* Then point to array position corresponding to buffer length *)
- k := sp[buflen];
- (* Now scan all entries of that length for a match on all characters *)
- LOOP
- l := 0;
- WHILE a2[k+l] = buffer[l+1] DO l := l+1 END;
- (* If a match on all characters is found then return TRUE *)
- IF l = buflen THEN
- flag := TRUE;
- EXIT;
- END
- k := k + buflen;
- (* Else if all keywords of that length checked then return FALSE *)
- IF k >= sp[buflen+1] THEN EXIT END;
- END
- RETURN flag
- END check;
-
- BEGIN (* PROCEDURE scan *)
- (*
- The BOOLEAN sec keeps track of which pass we are on.
- *)
- sec := NOT sec;
- IF sec THEN (* second pass *)
- IF introlen > 0 THEN InsChars(f,intro,introlen) END;
- ELSE (* first pass *)
- (*
- Place a marker in file - to mark current position and to make sure
- that all lines ending with a symbol return to the start state.
- *)
- intro[1]:=20x;
- intro[2]:=00x;
- InsChars(f,intro,2);(* use procedure InsChars from MakeEdits *)
- (*
- Move back to beginning of line.
- *)
- WHILE BackOneChar(f) AND (GetCurrentChar(f) # LF) DO END;
- introlen := 0;
- (*
- IF NOT BEGINNING OF FILE THEN move forward to first character of line.
- IF BOF THEN we are already on the first character of the line.
- *)
- ch := GetCurrentChar(f);
- IF ch = LF THEN
- flag := ForwardOneChar(f);
- ch := GetCurrentChar(f)
- END;
- (*
- Now we save the intro to the line ( white space )
- *)
- WHILE ((ch = SPACE) OR ( ch = TAB)) AND (introlen < introsize) DO
- INC (introlen);
- intro[introlen] := ch;
- flag := ForwardOneChar(f);
- ch := GetCurrentChar(f)
- END; (* WHILE *)
- (*
- There are two possibilities on first pass: if not full then
- we just get the intro. If full we also do capitalization.
- *)
- IF full THEN
- buflen:=0; (* Initialize symbol buffer *)
- state := start; (* and state of FSM *)
- WHILE (ch # 00x) DO (* Now process line until end marker is found *)
- (*
- This is basically Ford and Weiner's FSM, but without handling of nested
- comments - their design processes a file, this one just does a line.
- *)
- CASE state OF
- start :
- CASE ch OF
- 'a' .. 'z' :
- (*
- There are three ways to exit the start state; ecountering a symbol,
- ecountering a comment, or ecountering a string. If a letter is
- encountered,it starts a symbol. We go to the insym(bol) state.
- *)
- state := insym;
- (* Symbols are stored in buffer for comparison to the keywords *)
- INC(buflen);
- buffer[buflen] := ch;
- |
- (*
- An open paren may be the start of a comment, state is en(tering)com(ment).
- *)
- '(' : state := encom;
- |
- (*
- A single or double quote is the start of a string literal.
- The new state is instr(ing). We must record the delimiter.
- *)
- '"',"'" : state := instr;
- delim := ch;
- END |
- insym :
- (*
- When we are in a symbol, we continue as long as the characters
- encountered are lowercase letters - we do not allow mixed case
- or digits. Each letter is stored in the buffer.
- *)
- IF (ch IN symset) AND (buflen < 14 ) THEN
- INC(buflen);
- buffer[buflen] := ch;
- (*
- When a character other than a letter is encountered, we check to see if
- the symbol in the buffer is in our table. If it is we CAP the symbol in
- the file. The function BackOneChar and the procedures DelChars and
- InsChars used for this process are from the module MakeEdits.
- *)
- ELSIF (buflen > 1) AND (check()) THEN
- FOR j := 1 TO buflen DO (* move back thru file *)
- flag := BackOneChar(f); (* flag is just a dummy *)
- buffer[j] := CAP(buffer[j])(* CAP the buffer too *)
- END;
- DelChars(f,buflen); (* remove the symbol *)
- InsChars(f,buffer,buflen); (* and replace it *)
- buflen := 0; (* reset for next symbol*)
- state := start; (* back to start state *)
- (* If the symbol is not in the table then we leave the file alone *)
- ELSE
- buflen := 0; (* reset for next symbol*)
- state := start; (* back to start state *)
- END |
- instr :
- (* When in a string we just watch for the string delimiter *)
- IF ch = delim THEN
- state := start;
- END |
- encom :
- (*
- When en(tering)com(ment) we go to incom(ment) if the next character
- is '*', instr if the next character is a delimiter, and insym if it
- is in symset, else back to start.
- *)
- IF ch = '*' THEN
- state := incom;
- ELSIF (ch = "'") OR (ch = '"') THEN
- state := instr;
- delim := ch;
- ELSIF (ch IN symset) THEN
- state := insym;
- INC(buflen);
- buffer[buflen] := ch;
- ELSE
- state := start;
- END |
- incom :
- (* When in comment we watch for '*', new sate is ex(iting)com(ment) *)
- IF ch = '*' THEN
- state := excom;
- END |
- excom :
- (* When exiting a comment we must find ')' for the new state to be start *)
- IF ch = ')' THEN
- state := start;
- ELSE
- state := incom;
- END
- END (* CASE *)
- (* Get the next character to process *)
- flag := ForwardOneChar(f);
- ch := GetCurrentChar(f);
- END (* WHILE *)
- ELSE (* NOT full *)
- (* If we are only getting intro we must skip over the rest of the line *)
- WHILE (GetCurrentChar(f) # 00x) AND ForwardOneChar(f) DO END;
- END (* IF *)
- (* Finally, we remove the marker we inserted at the end of the line *)
- flag := BackOneChar(f); (* We are on second character *)
- DelChars(f,2); (* Remove both characters *)
- END (* IF *)
- END scan;
-
- BEGIN (* initialization of data structures used by scan *)
- symset := CHARSET{'a' .. 'z'}; (* only lower case will be considered *)
- a2 := 'bydoifinoforto';
- a3 := 'absadrandcapchrdecdivendforincmodnewnilnotoddordsetvarval ';
- a4 := 'bytecasecharelseexclexitfromhalthighinclloopprocrealsizethentruetypewithword';
- a5 := 'arraybeginconstelsiffalsefloattrunctsizeuntilwhile';
- a6 := 'exportimportmodulerecordrepeatreturnsystem';
- a7 := 'booleandisposeintegerpointerprocess ';
- a8 := 'cardinaltransfer';
- a9 := 'procedurequalified';
- a10 :='definitioniotransfernewprocess';
- a14 :='implementation ';
- (* These are the positions 'within' a2 where each of the above arrays
- start. This is used to shorten the search by only searching that
- part of the table which contains entries of the same length as the
- symbol. These positions are correct if the arrays are stored adjacent
- to each other in the order they were declared. The arrays all have
- an even number of bytes. This allows the same declaration to work
- on a Z80 ( byte aligned variables ) and a 68000 ( word aligned ).
- I think it should work on other 16 bit processors as well, but I
- haven't tried it. *)
- sp[2] := 0; sp[3] := 14; sp[4] := 72; sp[5] := 148;
- sp[6] := 198; sp[7] := 240; sp[8] := 276; sp[9] := 292;
- sp[10] := 310; sp[11] := 354; sp[12] := 354; sp[13] := 354;
- sp[14] := 340; sp[15] := 354;
- sec := TRUE; (* set it up to start on first pass *)
- END Scanner.