home *** CD-ROM | disk | FTP | other *** search
- /*##############################################################################
-
- FUNNNELWEB COPYRIGHT
- ====================
- FunnelWeb is a literate-programming macro preprocessor.
-
- Copyright (C) 1992 Ross N. Williams.
-
- Ross N. Williams
- ross@spam.adelaide.edu.au
- 16 Lerwick Avenue, Hazelwood Park 5066, Australia.
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of Version 2 of the GNU General Public License as
- published by the Free Software Foundation.
-
- This program is distributed WITHOUT ANY WARRANTY; without even the implied
- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- See Version 2 of the GNU General Public License for more details.
-
- You should have received a copy of Version 2 of the GNU General Public
- License along with this program. If not, you can FTP the license from
- prep.ai.mit.edu/pub/gnu/COPYING-2 or write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
- Section 2a of the license requires that all changes to this file be
- recorded prominently in this file. Please record all changes here.
-
- Programmers:
- RNW Ross N. Williams ross@spam.adelaide.edu.au
-
- Changes:
- 07-May-1992 RNW Program prepared for release under GNU GPL V2.
-
- ##############################################################################*/
-
-
- /******************************************************************************/
- /* SCANNER.C */
- /******************************************************************************/
- /* */
- /* Introduction */
- /* ------------ */
- /* The FunnelWeb scanner is a little messy because it deals with two */
- /* structures at the same time while attempting to be efficient. On the one */
- /* hand it is busy constructing the line list. This means that it has to keep */
- /* an eye out for end of line characters ('\n'=EOL) so that it can add a line */
- /* record whenever it sees one. On the other hand, it has to scan the input */
- /* file into a token stream consisting of text tokens and special tokens */
- /* which usually have no regard for end of lines. It is tempting to divide */
- /* these two functions up (into perhaps a LINER and a TOKENIZER). However, */
- /* the presence of the include file facility would make this messy. Also, the */
- /* tokenizer has to count end of line markers so that it can generate */
- /* correctly positioned diagnostics. */
- /* */
- /* The long and short of it all is that the best way to do the scanning seems */
- /* to be to run a liner and a tokenizer as parallel layers. The liner */
- /* extracts characters from the input file and hands them to the tokenizer. */
- /* It also keeps an eye out for newline characters, sending a line record off */
- /* whenever it sees one, and counting lines. The tokenizer receives the */
- /* characters from the liner and performs the tokenize operation. */
- /* */
- /* Notes */
- /* ----- */
- /* - Currently FunnelWeb recognises only two characters as whitespace. */
- /* These are ' ' and EOL. */
- /* */
- /******************************************************************************/
-
- #include <ctype.h>
- #include <limits.h>
- #include "style.h"
-
- #include "as.h"
- #include "clock.h"
- #include "data.h"
- #include "dump.h"
- #include "list.h"
- #include "lister.h"
- #include "machin.h"
- #include "mapper.h"
- #include "memory.h"
- #include "misc.h"
- #include "option.h"
- #include "scanner.h"
-
- /******************************************************************************/
-
- /* The "special" character is the character that is used to introduce a */
- /* "special sequence". FunnelWeb allows the user to change this character so */
- /* as to cater for documents where the "default" character is common. This */
- /* definition defines what the default character is. */
- #define CH_DSPE ('@')
-
- /* FunnelWeb allows include files which are handled by the scanner by placing */
- /* recursive calls to scan_file. A maximum is placed on the level of nested */
- /* includes. This acts as a good sanity check as well as catching recursive */
- /* include files which are never a sensible construct in FunnelWeb as */
- /* FunnelWeb does not provide any conditional construct. */
- #define MAX_INCL (10)
-
- /* FunnelWeb is very conservative about what characters it will allow in its */
- /* input and output files. Currently the only characters allowed are */
- /* printables and end of lines. When FunnelWeb does spot an illegal character */
- /* it needs to be able to draw the user's attention to the character. The */
- /* best way to do this is to point to it in the listing file. However, if the */
- /* character is banned, it cannot appear in the listing file! The problem is */
- /* solved by having the scanner replace all illegal characters in each mapped */
- /* file by the following character. This eliminates further problems. */
- #define CENSORCH ('?')
-
- /* Following the Unix convention, mapped in files are not terminated with an */
- /* end-of-file character. However, the presence of such a character at the */
- /* end of the mapped file simplifies scanning and so we add one. This */
- /* definition defines what the character is to be. It doesn't matter what the */
- /* character is, so long as it cannot legally appear in the file. A control */
- /* character is a good choice as these are filtered out by the liner (see */
- /* above). */
- /* We undef EOF (from <stdio.h>) because it is too dangerously close to EOFCH.*/
- /* (EOF wasn't redefined as that might confuse readers used to <stdio.h>. */
- /* However, we still use EOF as an acronym for End Of File. */
- #define EOFCH (26)
- #undef EOF
-
- /* Tokens have a field for a general attribute which has meaning for some */
- /* token kinds. For other kinds, it has no meaning. This constant is used to */
- /* indicate a "don't care" value. */
- #define DONTCARE 0
-
- /* A nominal maximum value for the maximum length of an input line. */
- #define INMAXINF (ULONG_MAX)
-
- /******************************************************************************/
-
- /* The following type is used in the suite of pragma routines for parsing. */
- typedef
- struct
- {
- ps_t pt_ps; /* Position of the start of this argument. */
- char *pt_pstr; /* Pointer to a string containing the argument. */
- char *pt_pinl; /* Pointer to first byte of the argument in commndline. */
- } pt_t;
- typedef pt_t *p_pt_t;
-
- /******************************************************************************/
-
- /* Variables Instantiated Over The Entire Scan */
- /* ------------------------------------------- */
- LOCVAR p_ck_t p_mapp; /* Pointer to mapper's clock. */
- LOCVAR p_ck_t p_scan; /* Pointer to scanner's clock. */
- LOCVAR ulong globalno; /* Global line number of line being scanned. */
- LOCVAR ulong inclevel; /* Include level of current file. Top file is zero. */
- LOCVAR bool seenind; /* TRUE iff we have seen an indentation pragma. */
- LOCVAR ps_t ps_ind; /* seenind==TRUE => ps_ind is position of pragma. */
- LOCVAR bool seentyp; /* TRUE iff we have seen a typesetter pragma. */
- LOCVAR ps_t ps_typ; /* seentyp==TRUE => ps_typ is position of pragma. */
- LOCVAR bool seenlimo; /* TRUE iff we have seen an out lin len limit pragma. */
- LOCVAR ps_t ps_limo; /* seenlimo==TRUE => ps_limo is position of pragma. */
-
- /* Variables Instantiated Over The Current File */
- /* -------------------------------------------- */
- LOCVAR ulong inln_max; /* Maximum permitted length of an input line. */
- LOCVAR char specialch; /* Current special (escape) character. */
- LOCVAR char *p_eof; /* Pointer to EOFCH byte at the end of current file. */
- LOCVAR ulong localno; /* Local line number of line being scanned. */
-
- /* Variables Instantiated Over The Current Line */
- /* -------------------------------------------- */
- LOCVAR char *p_sol; /* Pointer to Start (first char) Of current Line. */
- LOCVAR char *p_ch; /* Pointer to current character in current line. */
- LOCVAR char ch; /* *p_ch. */
-
- /******************************************************************************/
- /* Line Processing Layer */
- /******************************************************************************/
- /* */
- /* This mini-section contains the two routines (prepline and NEXTCH) that */
- /* take care of the line based-scanning and feed characters to the */
- /* token-based scanner routines which have the top level of control. */
- /* After mapping in a file to be read, place a call to prepline passing the */
- /* address of the first byte of the mapped file as an argument. At that */
- /* point the current position will be the first byte on the first line and */
- /* the "variables instantiated over the current line" will be well defined. */
- /* Calls to NEXTCH then move the position through the mapped file one byte at */
- /* a time, stopping at the end of file at which point calls will not move the */
- /* marker which will point to the EOF character. */
- /* */
- /******************************************************************************/
-
- LOCAL void prepline P_((char *));
- LOCAL void prepline(p_line)
- /* This function should be called at the end of each line to prepare the next */
- /* line for scanning. The user of the liner mini-package should place a */
- /* single call to this function at the start of scanning a mapped file. */
- /* The user should then place calls to NEXTCH (which calls prepline when */
- /* necessary). */
- /* This function serves two purposes: */
- /* 1. It looks at the next line and converts all non-printables into */
- /* CENSORCH and issues errors for each non-printable. */
- /* 2. It initializes the line scanning variables for the next line. */
- /* The argument is a pointer to the first byte of the next line. */
- char *p_line;
- {
- char *p; /* Scans through the line and winds up sitting on the EOL. */
-
- /* Test to see if the "line" we have been given is the end of file marker. */
- /* We have to be careful here because the byte we are using to mark the end */
- /* of file could appear as an illegal unprintable. This is the reason for */
- /* the test p_line==p_eof. */
- if (*p_line==EOFCH && p_line==p_eof)
- {
- /* The line we have to process is in fact the end of file marker. */
- p_sol = p_line;
- p_ch = p_line;
- ch = EOFCH;
- return;
- }
-
- /* At this point we know that we are faced with a run of bytes terminated by */
- /* an EOL character (we know this cos we put an EOL before EOF earlier on). */
- /* We know that we have a line, so we can now bump up the line counters. */
- globalno++;
- localno++;
-
- /* Run through the line checking for non-printables and issuing errors. */
- p = p_line;
- while (*p != EOL)
- {
- /* The following test tests to see if the character is a printable in */
- /* seven bit ascii. FunnelWeb is not currently designed to work with */
- /* any character set other than seven-bit ascii and so we flag and */
- /* convert all out-of-range characters here before they are exposed to */
- /* the rest of the scanner code which assumes that each line that it is */
- /* handed consists entirely of printables except for the EOL char on the */
- /* end and possibly an EOF char at the "Start" of a line. */
- /* In particular, the NEXTCH macro will fail on machines with siged */
- /* chars if non-printables are not removed. It goes into an infinite */
- /* loop. */
- /* Note: I don't use library function "isprint" here because on the vax */
- /* it's definition is too loose (seems to accept characters with the top */
- /* bit set as printable). */
- if (!isascprn(*p)) /* If not a printable character. */
- {
- ps_t ps;
- char c = *p;
- ubyte_ uc = *((ubyte_ *) p);
- ps.ps_line = globalno;
- ps.ps_column = p-p_line+1;
- if (strlen(chabbrev(c))==0)
- sprintf(linet1,
- "Non printable character (Sym=<none>, Dec=%03u, Hex=%02X, Oct=%03o).",
- (unsigned) uc,(unsigned) uc,(unsigned) uc);
- else
- sprintf(linet1,
- "Non printable character (Sym=%s, Dec=%03u, Hex=%02X, Oct=%03o).",
- chabbrev(c),(unsigned) uc,(unsigned) uc,(unsigned) uc);
- lr_err(&ps,linet1);
- *p=CENSORCH;
- }
- p++;
- }
- /* Assert: p_line points to the start of the current line. */
- /* Assert: p points to the EOL at the end of the current line. */
-
- /* Check that the line is not too long. */
- if ((p-p_line)>inln_max)
- {
- ps_t ps;
- ps.ps_line = globalno;
- ps.ps_column = inln_max+1;
- lr_err(&ps,"Input line is too long (this character is the first offender).");
- sprintf(linet1,"Currently, the maximum allowable input line length is %lu.",
- (unsigned long) inln_max);
- lr_mes(&ps,linet1);
- lr_mes(&ps,"Note: You can change this using a pragma directive (@p).");
- }
-
- /* Now check for trailing spaces. */
- if ((p != p_line) && (*(p-1) == ' '))
- {
- ps_t ps;
- ps.ps_line = globalno;
- ps.ps_column = p-p_line;
- lr_war(&ps,"Line has trailing spaces up to and including this space.");
- }
-
- /* Construct a line record and append the record to the line list. */
- /* Note that the line scrap encompasses the trailing EOL. */
- {
- ln_t line;
- line.ln_global = globalno;
- line.ln_local = localno;
- line.ln_body.sc_first = p_line;
- line.ln_body.sc_last = p;
- /* Note: We do not set sc_white as it is not used in lines. */
- ls_add(line_list,PV &line);
- }
-
- /* Finally, set the line scanning variables to the start of the line. */
- /* We can't do this earlier in case the start of the line was a */
- /* non-printable and got substituted (ch might pick it up). */
- p_sol = p_line;
- p_ch = p_line;
- ch = *p_line;
-
- } /* End of prepline. */
-
- /* NEXTCH can be called continuously after an initializing call to prepline. */
- /* After a call to NEXTCH, p_sol, p_ch, ch are all well-defined. p_sol points */
- /* to the start of the current line, p_ch points to the current character, */
- /* and ch contains *p_ch. NEXTCH can be called repeatedly forever. When it */
- /* hits the EOF character, it sticks on it and returns it forever. */
- /* Note: The "ch<' '" is an optimized form of "(ch==EOL)||(ch=EOFCH)". Speed */
- /* is very important here as this macro is called in scanning tightloops. */
- /* This line of code is a little tricky so read it carefully. */
- /* WARNING: The ch<' ' will cause an infinite loop if a character appears */
- /* that satisfies this condition without being EOF or EOL (e.g. a control */
- /* char (meant to be filtered out earlier) or a top-bit-set character on */
- /* machines with signed character type. */
- #define NEXTCH {if (ch<' ') {if (ch==EOL) prepline(p_ch+1);} else ch= *++p_ch;}
-
- /******************************************************************************/
- /* Scanner Support Routines */
- /******************************************************************************/
-
- LOCAL ps_t *psofch P_((void));
- LOCAL ps_t *psofch()
- /* Returns a pointer to an internal static ps structure holding the line and */
- /* column number of the current character ch. */
- {
- STAVAR ps_t chps;
- chps.ps_line = globalno;
- chps.ps_column = p_ch-p_sol+1;
- return &chps;
- }
-
- /******************************************************************************/
-
- LOCAL void grabchps P_((p_ps_t));
- LOCAL void grabchps(p_ps)
- /* Writes the position of the current ch into the argument position struct. */
- p_ps_t p_ps;
- {
- p_ps->ps_line = globalno;
- p_ps->ps_column = p_ch-p_sol+1;
- }
-
- /******************************************************************************/
-
- LOCAL void sendspec P_((p_ps_t,tk_k_t,ubyte));
- LOCAL void sendspec(p_tkps,tk_kind,tk_gen)
- /* Appends a non-text token of kind tk_kind to the end of the token list. */
- /* p_ps is a pointer to a position structure giving the position of the */
- /* first character of the token. tk_gen is the general token attribute. */
- p_ps_t p_tkps;
- tk_k_t tk_kind;
- ubyte tk_gen;
- {
- tk_t token;
- token.tk_kind = tk_kind;
- ASSIGN(token.tk_ps,*p_tkps);
- token.tk_sc.sc_first = NULL;
- token.tk_sc.sc_last = NULL;
- token.tk_sc.sc_white = TRUE;
- token.tk_gen = tk_gen;
- ls_add(token_list,PV &token);
- }
-
- /******************************************************************************/
-
- LOCAL void sendtext P_((p_ps_t,char *,char *,bool));
- LOCAL void sendtext(p_tkps,p_first,p_last,is_white)
- /* Appends a text token to the end of the token list. */
- /* IN: p_ps is a pointer to a position structure giving the position of the */
- /* first character of the token. */
- /* IN: p_first and p_last point to the first and last byte of the text scrap. */
- /* IN: is_white should be set to TRUE iff scrap is entirely whitespace. */
- p_ps_t p_tkps;
- char *p_first;
- char *p_last;
- bool is_white;
- {
- tk_t token;
-
- /* Empty text scraps should never be generated. */
- as_cold(p_first<=p_last,"sendtext: Text scrap bounds are bad.");
-
- /* If ch=EOL then we should be scanning more text, not shipping it! */
- as_cold(ch!=EOL,"senttext: Shipping text while still more to scan.");
-
- /* Send the text token. */
- token.tk_kind = TK_TEXT;
- ASSIGN(token.tk_ps,*p_tkps);
- token.tk_sc.sc_first = p_first;
- token.tk_sc.sc_last = p_last;
- token.tk_sc.sc_white = is_white;
- token.tk_gen = DONTCARE;
- ls_add(token_list,PV &token);
- }
-
- /******************************************************************************/
-
- LOCAL void add_eof P_((void));
- LOCAL void add_eof()
- /* This function adds terminators to the line and token list. */
- /* 1. It adds a TK_EOF token to the end of the token list. */
- /* 2. It adds a visible <eof> line to the end of the line list. */
- /* This assists the parser by allowing it to point diagnostic messages to a */
- /* visible EOF marker rather than pointing vaguely to the end of the last */
- /* line of the input file which (by the way) may not even exist! */
- {
- STAVAR char *eofstr = "<End-Of-File>\n";
- ln_t line;
- ps_t ps;
-
- /* When the liner mini package encounters an end of file marker, it stops */
- /* dead on the marker and returns EOFCH forever. scan_file() eventually gets */
- /* the message and drops out. However, in all of this, the line numbers are */
- /* not incremented to indicate that we have moved to an EOF line. This is */
- /* intended, as we do not want EOFs to appear in the listing for include */
- /* files; only at the end of the main input file. Thus, here we effectively */
- /* perform the liner function of moving from the last line of the input file */
- /* to the imaginary line containing the EOF marker. This is done by */
- /* incrementing the line numbers. Note that the fact that these line number */
- /* variables are incorrect from the point of detection of the final EOF to */
- /* here doesn't matter as no tokens or diagnostics are ever added after an */
- /* EOF is detected. */
- globalno++;
- localno++;
-
- /* Add a line to represent the EOF marker. */
- line.ln_global = globalno;
- line.ln_local = localno;
- line.ln_body.sc_first = eofstr;
- line.ln_body.sc_last = eofstr+strlen(eofstr)-1;
- /* Note: We do not set sc_white as it is not used in lines. */
- ls_add(line_list,PV &line);
-
- /* Add a TK_EOF token to the end of the token list. */
- ps.ps_line = globalno;
- ps.ps_column = 1;
- sendspec(&ps,TK_EOF,DONTCARE);
- }
-
- /******************************************************************************/
- /* The Scanner Proper */
- /******************************************************************************/
-
- LOCAL void skiptoeol P_((void));
- LOCAL void skiptoeol()
- {
- while (ch != EOL)
- NEXTCH;
- }
-
- /******************************************************************************/
-
- /* The incl_fil function calls this, so we have to declare it in advance. */
- LOCAL void scan_file P_((char *));
-
- LOCAL void incl_fil P_((p_ps_t));
- LOCAL void incl_fil(p_ps)
- /* Upon entry, the current character is the "i" of an "@i" sequence. Our task */
- /* is first to see if the sequence occurred at the start of a line (the only */
- /* point at which it is legal) and issue an error if it isn't. If it is legal,*/
- /* we have to read in the specified file and scan that. The included file */
- /* replaces exactly the line starting with the "@i" command and we return */
- /* to the "calling" file with the current position being the EOL character of */
- /* the include line. */
- p_ps_t p_ps;
- {
- /* Complain if the include directive was not at the start of a line. */
- if (p_ch-1 != p_sol)
- {
- lr_err(p_ps,"Include sequence must be at the beginning of a line.");
- lr_mes(p_ps,"Include ignored.");
- skiptoeol();
- return;
- }
-
- /* The include command should be followed by a blank. Get the next char. */
- NEXTCH;
-
- /* Complain if the next character is not a blank. */
- if (ch != ' ')
- {
- ps_t ps;
- ASSIGN(ps,*p_ps);
- ps.ps_column+=2;
- lr_err(&ps,"Include sequence (@i) must be followed by a blank.");
- lr_mes(&ps,"Example include: @i macros.fwi");
- lr_mes(&ps,"Include ignored.");
- skiptoeol();
- return;
- }
-
- /* Complain if the include level is too high. */
- if (inclevel == MAX_INCL)
- {
- lr_err(p_ps,"This include file is nested too deeply. It's probably recursive.");
- sprintf(linet1,"The maximum level of nested includes is %u.",
- (unsigned) MAX_INCL);
- lr_mes(p_ps,linet1);
- lr_mes(p_ps,"Include ignored.");
- skiptoeol();
- return;
- }
-
- {/* This construct does the work of the include. */
- /* Warning: The following variables MUST be declared automatic. */
- char *p_filename;
- char *p_tempname;
- ulong length;
- char *p;
- ulong xinln_max;
- char xspecial;
- char *xp_eof;
- ulong xlocalno;
- char *xp_sol;
- char *xp_ch;
- char xch;
-
- /* We save stack space by sticking this filename in the heap. */
- p_filename=mm_temp((size_t) FILENAME_MAX+1+10); /* 10 is for paranoia. */
- p_tempname=mm_temp((size_t) FILENAME_MAX+1+10); /* 10 is for paranoia. */
-
- /* The rest of the line is supposed to hold a filename. Copy it. */
- NEXTCH;
- p=p_tempname;
- length=0;
- while (ch!=EOL)
- {
- if (++length > FILENAME_MAX)
- {
- lr_err(p_ps,
- "This include command's file specification is too long.");
- if (option.op_b7_b)
- sprintf(linet1,"The maximum file name length is %s characters.",
- SUPPVAL);
- else
- sprintf(linet1,"The maximum file name length is %u characters.",
- (unsigned) FILENAME_MAX);
- lr_mes(p_ps,linet1);
- lr_mes(p_ps,"Include ignored.");
- skiptoeol();
- return;
- }
- *p++=ch;
- NEXTCH;
- }
- *p=EOS;
- /* Note: Current position is now on the EOL at the end of the @i line. */
-
- /* Complain if the user has not specified a filename. */
- if (strlen(p_tempname) ==0)
- {
- lr_err(psofch(),"Expecting the name of a file to include.");
- return;
- }
-
- /* Perform the necessary filename inheritance. */
- strcpy(p_filename,"");
- fn_ins(p_filename,option.op_f_s);
- fn_ins(p_filename,".fwi");
- fn_ins(p_filename,option.op_i_s);
- fn_ins(p_filename,p_tempname);
-
- /* Include the included file by calling scan_file recursively. */
- /* Save and restore all variables in instantiation scope. */
- xinln_max = inln_max;
- xspecial = specialch;
- xp_eof = p_eof;
- xlocalno = localno;
- xp_sol = p_sol;
- xp_ch = p_ch;
- xch = ch;
- inclevel++;
- scan_file(p_filename);
- inclevel--;
- ch = xch;
- p_ch = xp_ch;
- p_sol = xp_sol;
- localno = xlocalno;
- p_eof = xp_eof;
- specialch = xspecial;
- inln_max = xinln_max;
- }
- }
-
- /******************************************************************************/
-
- LOCAL void do_ascii P_((p_ps_t));
- LOCAL void do_ascii(p_psspec)
- /* Upon entry, the current character is the '^' of a @^ sequence. The task is */
- /* to parse the following ascii code and generate a text token. */
- p_ps_t p_psspec;
- {
- ubyte base; /* Base of the number we are scanning. */
- ubyte digits; /* Number of digits expected. */
- uword val; /* Value of target character. */
- ubyte i; /* Looping variable. */
- STAVAR char alphab[256]; /* Static alphabet array to which to point scraps. */
- STAVAR bool init=FALSE; /* Tells if alphab has been initialized. */
-
- /* Establish an array containing the ascii character set. Later on we can */
- /* point the sc_first and sc_last pointers to particular characters. */
- if (!init) {uword i; for (i=0;i<256;i++) alphab[i]=(char) i; init=TRUE;}
-
- /* Make sure that the base character is legal. */
- NEXTCH;
- switch(toupper(ch))
- {
- case 'B': base= 2; digits=8; break;
- case 'O':
- case 'Q': base= 8; digits=3; break;
- case 'D': base=10; digits=3; break;
- case 'H':
- case 'X': base=16; digits=2; break;
- default : lr_err(psofch(),"Expecting one of 'B', 'Q', 'D', 'H'.");
- lr_mes(psofch(),"(For Binary, Octal, Decimal, and Hexadecimal).");
- base=10;
- goto trouble;
- }
-
- /* Parse opening parenthesis. */
- NEXTCH;
- if (ch!='(')
- {lr_err(psofch(),"Expecting '('.");goto trouble;}
-
- val=0;
- for (i=0;i<digits;i++)
- {
- char uch;
- ubyte d;
-
- NEXTCH;
- uch=toupper(ch);
- if (('0'<=uch) && (uch<='9'))
- d=uch-'0';
- else
- if ('A'<=uch && uch<='F')
- d=10+uch-'A';
- else
- d=100;
- if (d>=base)
- {lr_err(psofch(),"Illegal digit."); goto trouble;}
- val = base*val + d;
- }
-
- /* Parse closing parenthesis. */
- NEXTCH;
- if (ch!=')')
- {lr_err(psofch(),"Expecting ')'.");goto trouble;}
-
- /* Make sure that the number is not too big (this is possible in decimal). */
- if (val>255)
- {
- lr_err(psofch(),"Character number is too large.");
- lr_mes(psofch(),"Character number must be in the range [0,255] (decimal).");
- goto trouble;
- }
-
- /* Success! Now we can parcel it up into a scrap! */
- sendtext(p_psspec,&alphab[val],&alphab[val],ch==' ' || ch==EOL);
- return;
-
- trouble:
- /* Jump here after a specific diagnostic to give the user a reminder of */
- /* how to specify an ascii character constant. */
- switch (base)
- {
- case 2:
- lr_mes(psofch(),
- "A binary character representation takes the form \"@^B(dddddddd)\".");
- lr_mes(psofch(),
- "(exactly 8 digits) where each digit d is either 0 or 1.");
- break;
- case 8:
- lr_mes(psofch(),
- "An octal character representation takes the form \"@^Q(ddd)\" (or \"@^O(ddd)\").");
- lr_mes(psofch(),
- "(exactly 3 digits) where each digit d is in the range 0..7.");
- break;
- case 10:
- lr_mes(psofch(),
- "A decimal character representation takes the form \"@^D(ddd)\".");
- lr_mes(psofch(),
- "(exactly 3 digits) where each digit d is in the range 0..9.");
- break;
- case 16:
- lr_mes(psofch(),
- "A hexadecimal character representation takes the form \"@^X(dd)\" (or \"@^H(dd)\").");
- lr_mes(psofch(),
- "(exactly 2 digits) where each digit d is in the range 0..9,A..F.");
- break;
- default: as_bomb("do_ascii: trouble base switch defaulted.");
- }
- }
-
- /******************************************************************************/
-
- LOCAL void do_name P_((p_ps_t));
- LOCAL void do_name (p_psspec)
- /* Upon entry, the current character is the # of a @#. The task is to parse */
- /* it and transmit a name token. */
- p_ps_t p_psspec;
- {
- as_cold(ch=='#',"do_name: character is wrong.");
- NEXTCH;
- if ((ch==EOL) || (ch==' '))
- {lr_err(psofch(),"Expecting a printable character."); return;}
-
- /* Transmit a name token. */
- sendspec(p_psspec,TK_NAME,(ubyte) ch);
- }
-
- /******************************************************************************/
-
- LOCAL void do_pgind P_((uword,p_pt_t));
- LOCAL void do_pgind(numarg,arg)
- /* Parse an indentation pragma. */
- uword numarg;
- p_pt_t arg;
- {
- bool pragind;
- ps_t psprag;
-
- /* Make sure that there are exactly three arguments. */
- if (numarg != 3) /* "indentation" "none|blank". */
- {
- lr_err(&arg[0].pt_ps,
- "This indentation pragma has the wrong number of arguments.");
- goto help;
- }
-
- /* Make sure that the second argument is an "=". */
- if (0 != strcmp(arg[2].pt_pstr,"="))
- {
- lr_err(&arg[2].pt_ps,"Expecting \"=\".");
- goto help;
- }
-
- /* Check the third argument. */
- if (strcmp(arg[3].pt_pstr,"none" )==0) pragind=FALSE;
- else if (strcmp(arg[3].pt_pstr,"blank")==0) pragind=TRUE;
- else
- {
- lr_err(&arg[3].pt_ps,"Expecting either \"none\" or \"blank\".");
- goto help;
- }
-
- /* Construct a shorthand for the start of the pragma. */
- ASSIGN(psprag,arg[0].pt_ps);
-
- /* Make sure that the pragma does not contradict an earlier pragma. */
- if (seenind && (tgindent!=pragind))
- {
- sprintf(linet1,"This pragma is opposed by the pragma at line %lu.",
- (unsigned long) psprag.ps_line);
- lr_mes(&ps_ind,linet1);
- sprintf(linet1,"This pragma opposes the pragma at line %lu.",
- (unsigned long) ps_ind.ps_line);
- lr_err(&psprag,linet1);
- lr_mes(&psprag,"You can have as many indentation pragmas as you like,");
- lr_mes(&psprag,"but they all have to be the same!");
- lr_mes(&psprag,"Pragma ignored.");
- return;
- }
-
- /* Success: Record the pragma information. */
- seenind = TRUE; /* Record that we have seen a pragma. */
- tgindent = pragind; /* Record what the pragma said. */
- ASSIGN(ps_ind,psprag); /* Record where the pragma was. */
- return;
-
- help:
- lr_mes(&arg[0].pt_ps,
- "The correct format is: \"@p indentation = none|blank\".");
- lr_mes(&arg[0].pt_ps,"Pragma ignored.");
- return;
- }
-
- /******************************************************************************/
-
- LOCAL void do_pginl P_((uword,p_pt_t));
- LOCAL void do_pginl(numarg,arg)
- /* Parse a maximum input line length pragma. */
- uword numarg;
- p_pt_t arg;
- {
- char *numstr;
- uword spn;
-
- /* Make sure that there are exactly three arguments. */
- if (numarg != 3) /* "max..length = <num>". */
- {
- lr_err(&arg[0].pt_ps,"This pragma has the wrong number of arguments.");
- goto help;
- }
-
- /* Make sure that the second argument is "=". */
- if (0 != strcmp(arg[2].pt_pstr,"="))
- {lr_err(&arg[2].pt_ps,"Expecting \"=\"."); goto help;}
-
- /* Set up an abbreviation. */
- numstr=arg[3].pt_pstr;
-
- /* See if the value is "infinity". */
- if (strcmp(numstr,"infinity")==0)
- {inln_max=INMAXINF; return;}
-
- /* Calculate length of longest prefix containing all decimal digits. */
- /* Check that there are no illegal digits. */
- spn=strspn(numstr,"0123456789");
- if (spn != strlen(numstr))
- {
- ps_t ps;
- ASSIGN(ps,arg[3].pt_ps);
- ps.ps_column+=spn;
- lr_err(&ps,"Illegal digit. Value must consist entirely of decimal digits.");
- lr_mes(&ps,"You can also use the value \"infinity\".");
- lr_mes(&ps,"Pragma ignored.");
- return;
- }
-
- /* Check that the number is not too long. */
- if (strlen(numstr)>8)
- {
- lr_err(&arg[3].pt_ps,"Too many digits. The maximum is eight.");
- lr_mes(&arg[3].pt_ps,"Pragma ignored.");
- return;
- }
-
- /* Convert the argument into an integer. */
- {
- ulong val;
- int result;
- /* Note: Should really be %lu, but the Vax doesn't know about the %u */
- /* in sscanf and so we make do with %ld. */
- result=sscanf(numstr,"%ld",&val);
- as_cold(result==1,"do_pginl:sscanf failed.");
- inln_max=val;
- }
- return;
-
- help:
- lr_mes(&arg[0].pt_ps,
- "The correct format is: \"@p maximum_input_line_length = <num>|infinity\".");
- lr_mes(&arg[0].pt_ps,"Pragma ignored.");
- return;
- }
-
- /******************************************************************************/
-
- LOCAL void do_pgotl P_((uword,p_pt_t));
- LOCAL void do_pgotl(numarg,arg)
- /* Parse a maximum product file line length pragma. */
- uword numarg;
- p_pt_t arg;
- {
- char *numstr;
- uword spn;
- ulong val;
- ps_t psprag;
-
- /* Set up an abbreviation. */
- ASSIGN(psprag,arg[0].pt_ps);
-
- /* Make sure that there are exactly three arguments. */
- if (numarg != 3) /* "max..length" "=" "value". */
- {
- lr_err(&arg[0].pt_ps,"This pragma has the wrong number of arguments.");
- goto help;
- }
-
- /* Make sure that the second argument is an "=". */
- if (0 != strcmp(arg[2].pt_pstr,"="))
- {lr_err(&arg[2].pt_ps,"Expecting \"=\"."); goto help;}
-
- /* Set up an abbreviation. */
- numstr=arg[3].pt_pstr;
-
- /* See if the value is "infinity". */
- if (strcmp(numstr,"infinity")==0)
- {val=TGMAXINF; goto gotvalue;}
-
- /* Calculate length of longest prefix containing all decimal digits. */
- /* Check that there are no illegal digits. */
- spn=strspn(numstr,"0123456789");
- if (spn != strlen(numstr))
- {
- ps_t ps;
- ASSIGN(ps,arg[3].pt_ps);
- ps.ps_column+=spn;
- lr_err(&ps,"Illegal digit. Value must consist entirely of decimal digits.");
- lr_mes(&ps,"You can also use the value \"infinity\".");
- lr_mes(&ps,"Pragma ignored.");
- return;
- }
-
- /* Check that the number is not too long. */
- if (strlen(numstr)>8)
- {
- lr_err(&arg[3].pt_ps,"Too many digits. The maximum is eight.");
- lr_mes(&arg[3].pt_ps,"Pragma ignored.");
- return;
- }
-
- /* Convert the argument into an integer. */
- {
- int result=sscanf(numstr,"%ld",&val);
- as_cold(result==1,"do_pgotl:sscanf failed.");
- }
-
- gotvalue:
- /* Make sure that the pragma does not contradict an earlier pragma. */
- if (seenlimo && (tglinmax!=val))
- {
- sprintf(linet1,"This pragma is opposed by the pragma at line %lu.",
- (unsigned long) psprag.ps_line);
- lr_mes(&ps_limo,linet1);
- sprintf(linet1,"This pragma opposes the pragma at line %lu.",
- (unsigned long) ps_limo.ps_line);
- lr_err(&psprag,linet1);
- lr_mes(&psprag,"You can have as many output line length pragmas");
- lr_mes(&psprag,"as you like, but they all have to be the same!");
- lr_mes(&psprag,"Pragma ignored.");
- return;
- }
-
- /* If we got this far then the pragma is just the same as an earlier one. */
- /* We don't want to set the pragma position to the later pragma so we */
- /* return now. */
- if (seenlimo) return;
-
- /* Success. Set the variables. */
- tglinmax=val;
- seenlimo=TRUE;
- ASSIGN(ps_limo,psprag);
- return;
-
- help:
- lr_mes(&arg[0].pt_ps,
- "The correct format is: \"@p maximum_output_line_length = <num>|infinity\".");
- lr_mes(&arg[0].pt_ps,"Pragma ignored.");
- return;
- }
-
- /******************************************************************************/
-
- LOCAL void do_pgnpg P_((uword,p_pt_t));
- LOCAL void do_pgnpg(numarg,arg)
- /* Parse a newpage typesetter directive. */
- uword numarg;
- p_pt_t arg;
- {
- /* Make sure that there is exactly one argument. */
- if (numarg > 1) /* "new_page" */
- {
- lr_err(&arg[2].pt_ps,"The new_page directive does not take arguments.");
- lr_mes(&arg[2].pt_ps,"Directive ignored.");
- return;
- }
- sendspec(&arg[0].pt_ps,TK_NPAG,DONTCARE);
- }
-
- /******************************************************************************/
-
- LOCAL void do_pgtoc P_((uword,p_pt_t));
- LOCAL void do_pgtoc(numarg,arg)
- /* Parse a table of contents typesetter directive. */
- uword numarg; /* Number of arguments to table of contents directive. */
- p_pt_t arg; /* Array describing arguments. */
- {
- /* Make sure that there is exactly one argument. */
- if (numarg > 1) /* "table_of_contents" */
- {
- lr_err(&arg[2].pt_ps,
- "The table_of_contents directive does not take arguments.");
- lr_mes(&arg[2].pt_ps,"Directive ignored.");
- return;
- }
- sendspec(&arg[0].pt_ps,TK_TOCS,DONTCARE);
- }
-
- /******************************************************************************/
-
- LOCAL void do_pgvsk P_((uword,p_pt_t));
- LOCAL void do_pgvsk(numarg,arg)
- /* Parse a vskip typesetter directive. */
- uword numarg; /* Number of arguments to indentation directive. */
- p_pt_t arg; /* Array describing arguments. */
- {
- char *numstr;
- uword spn;
-
- /* Make sure that there are exactly three arguments. */
- if (numarg != 3) /* "vskip" n "mm". */
- {
- lr_err(&arg[0].pt_ps,"This directive has the wrong number of arguments.");
- goto help;
- }
-
- /* Make sure that the third argument is "mm". */
- if (0 != strcmp(arg[3].pt_pstr,"mm"))
- {lr_err(&arg[3].pt_ps,"Expecting \"mm\"."); goto help;}
-
- /* Set up an abbreviation. */
- numstr=arg[2].pt_pstr;
-
- /* Calculate length of longest prefix containing all decimal digits. */
- /* Check that there are no illegal digits. */
- spn=strspn(numstr,"0123456789");
- if (spn != strlen(numstr))
- {
- ps_t ps;
- ASSIGN(ps,arg[2].pt_ps);
- ps.ps_column+=spn;
- lr_err(&ps,"Illegal digit.");
- lr_mes(&ps,"Value must consist entirely of decimal digits.");
- lr_mes(&ps,"Directive ignored.");
- return;
- }
-
- /* Check that the number is not too long. */
- if (strlen(numstr)>3)
- {
- lr_err(&arg[2].pt_ps,"Too many digits. The maximum is three.");
- lr_mes(&arg[2].pt_ps,"Directive ignored.");
- return;
- }
-
- /* Convert the argument into an integer. */
- {
- ulong val;
- int result;
- result=sscanf(numstr,"%ld",&val);
- as_cold(result==1,"do_pginl:sscanf failed.");
- if (val>255)
- {
- lr_err(&arg[2].pt_ps,"Value too large. Maximum is 255.");
- lr_mes(&arg[2].pt_ps,"Directive ignored.");
- return;
- }
- sendspec(&arg[0].pt_ps,TK_SKIP,(ubyte) val);
- }
- return;
-
- help:
- lr_mes(&arg[0].pt_ps,"The correct format is: \"@t vskip <num> mm\".");
- lr_mes(&arg[0].pt_ps,"Directive ignored.");
- return;
- }
-
- /******************************************************************************/
-
- LOCAL void do_pgtit P_((uword,p_pt_t));
- LOCAL void do_pgtit(numarg,arg)
- /* Parse a title typesetter directive. */
- uword numarg; /* Number of arguments to title directive. */
- p_pt_t arg; /* Array describing arguments. */
- {
- uword align;
- uword font;
- char *p_sot,*p_eot;
-
- /* Make sure that there are at least three arguments. */
- if (numarg < 4) /* "title <font> <align> <text>". */
- {lr_err(&arg[0].pt_ps,"This directive has too few arguments."); goto help;}
-
- /* Check the font argument. */
- if (strcmp(arg[2].pt_pstr,"normalfont" )==0) font=FT_NORM;
- else if (strcmp(arg[2].pt_pstr,"titlefont" )==0) font=FT_TITL;
- else if (strcmp(arg[2].pt_pstr,"smalltitlefont")==0) font=FT_STIT;
- else
- {
- lr_err(&arg[2].pt_ps,
- "Expecting one of {titlefont,smalltitlefont,normalfont}.");
- lr_mes(&arg[2].pt_ps,"Directive ignored.");
- return;
- }
-
- /* Check the alignment argument. */
- if (strcmp(arg[3].pt_pstr,"left" )==0) align=LR_LEFT;
- else if (strcmp(arg[3].pt_pstr,"right" )==0) align=LR_RIGH;
- else if (strcmp(arg[3].pt_pstr,"centre")==0) align=LR_CENT;
- else
- {
- lr_err(&arg[3].pt_ps,"Expecting one of {left,right,centre}.");
- if (strcmp(arg[3].pt_pstr,"center")==0)
- {
- lr_mes(&arg[3].pt_ps,"Note: Centre is spelt centRE, not centER.");
- lr_mes(&arg[3].pt_ps," This is my revenge for years of getting error messages");
- lr_mes(&arg[3].pt_ps," from TeX whenever I accidentally wrote \\centreline - Ross Williams.");
- }
- lr_mes(&arg[3].pt_ps,"Directive ignored.");
- return;
- }
-
- /* Now make sure that the remainder of the line is delimited by quotes. */
- p_sot=arg[4].pt_pinl;
- p_eot=p_sot+strlen(p_sot)-1;
- if (*p_sot!='"' || *p_eot!='"' || p_sot==p_eot)
- {
- lr_err(&arg[4].pt_ps,"Text argument must be delimited by double quotes.");
- lr_mes(&arg[4].pt_ps,"Directive ignored.");
- return;
- }
- p_sot++; p_eot--;
-
- /* Ship out a token whose fields are all fully laden. */
- {
- tk_t token;
- token.tk_kind = TK_TITL;
- ASSIGN(token.tk_ps,arg[0].pt_ps);
- token.tk_sc.sc_first = p_sol+ (3+(p_sot-arg[1].pt_pinl));
- token.tk_sc.sc_last = p_sol+ (3+(p_eot-arg[1].pt_pinl));
- token.tk_sc.sc_white = FALSE;
- token.tk_gen = LRFT_PACK*font+align;
- ls_add(token_list,PV &token);
- }
- return;
-
- help:
- lr_mes(&arg[0].pt_ps,
- "The correct format is: \"@t title <font> <align> <text>\".");
- lr_mes(&arg[0].pt_ps,
- " where <font> = titlefont | smalltitlefont | normalfont.");
- lr_mes(&arg[0].pt_ps,
- " and <align> = left | centre | right.");
- lr_mes(&arg[0].pt_ps,
- " and <text> = text delimited by double quotes.");
- lr_mes(&arg[0].pt_ps,"Directive ignored.");
- return;
- }
-
- /******************************************************************************/
-
- LOCAL void do_pgtyp P_((uword,p_pt_t));
- LOCAL void do_pgtyp(numarg,arg)
- /* Parse a typesetter pragma. */
- uword numarg;
- p_pt_t arg;
- {
- tr_k_t pragtyp;
- ps_t psprag;
-
- /* Make sure that there are exactly three arguments. */
- if (numarg != 3) /* "typesetter" "=" "name". */
- {
- lr_err(&arg[0].pt_ps,
- "This typesetter pragma has the wrong number of arguments.");
- goto help;
- }
-
- /* Make sure that the second argument is "=". */
- if (0 != strcmp(arg[2].pt_pstr,"="))
- {
- lr_err(&arg[2].pt_ps,"Expecting \"=\".");
- goto help;
- }
-
- /* Check the third argument. */
- if (strcmp(arg[3].pt_pstr,"none")==0) pragtyp=TR_NONE;
- else if (strcmp(arg[3].pt_pstr,"tex" )==0) pragtyp=TR_TEX;
- else
- {
- lr_err(&arg[3].pt_ps,"Expecting either \"none\" or \"tex\".");
- goto help;
- }
-
- /* Construct a shorthand for the start of the pragma. */
- ASSIGN(psprag,arg[0].pt_ps);
-
- /* Make sure that the pragma does not contradict an earlier pragma. */
- if (seentyp && (tr_codes != pragtyp))
- {
- sprintf(linet1,"This pragma is opposed by the pragma at line %lu.",
- (unsigned long) psprag.ps_line);
- lr_mes(&ps_typ,linet1);
- sprintf(linet1,"This pragma opposes the pragma at line %lu.",
- (unsigned long) ps_typ.ps_line);
- lr_err(&psprag,linet1);
- lr_mes(&psprag,"You can have as many typesetter pragmas as you like,");
- lr_mes(&psprag,"but they all have to be the same!");
- lr_mes(&psprag,"Pragma ignored.");
- return;
- }
-
- /* Success: Record the pragma information. */
- seentyp = TRUE; /* Record that we have seen a pragma. */
- tr_codes = pragtyp; /* Record what the pragma said. */
- ASSIGN(ps_typ,psprag); /* Record where the pragma was. */
- return;
-
- help:
- lr_mes(&arg[0].pt_ps,
- "The correct format is: \"@p typesetter = none|tex\".");
- lr_mes(&arg[0].pt_ps,"Pragma ignored.");
- return;
- }
-
- /******************************************************************************/
-
- LOCAL void do_pragma P_((p_ps_t,bool));
- LOCAL void do_pragma(p_ps,is_typ)
- /* Upon entry, the current character is: */
- /* is_typ=FALSE => The P of a @p. */
- /* is_typ=TRUE => The T of a @t. */
- /* This function processes these contructs. */
- p_ps_t p_ps;
- bool is_typ;
- {
- #define MAXPARG 10 /* Maximum recorded arguments to a pragma. */
- #define PRAGMA_MAX 100 /* Maximum length of a pragma. */
- char praglin[PRAGMA_MAX+1]; /* Array to hold pragma as a complete line. */
- char pragstr[PRAGMA_MAX+1]; /* Array to hold pragma as strings. */
- pt_t pragarg[MAXPARG+1]; /* Array of pragma arguments. */
- uword length; /* Helps prevent scanning overrun. */
- char *p,*q; /* Temporary. */
- uword numarg,na; /* Number of arguments seen so far. */
-
- /* Complain if the pragma directive is not at the start of a line. */
- if (p_ch-1 != p_sol)
- {
- if (is_typ)
- {
- lr_err(p_ps,"Typesetter directive @t must be at the start of a line.");
- lr_mes(p_ps,"The rest of this line will be ignored.");
- }
- else
- {
- lr_err(p_ps,"Pragma sequence @p must be at the start of a line.");
- lr_mes(p_ps,"The rest of this line will be ignored.");
- }
- skiptoeol();
- goto help;
- }
-
- /* The include command should be followed by a blank. Get the next char. */
- NEXTCH;
-
- /* Complain if the next character is not a blank. */
- if (ch != ' ')
- {
- /* Note: If we position this error correctly, it gets put after the */
- /* help message! */
- if (is_typ)
- lr_err(p_ps,"Typesetter directive @t must be followed by a blank.");
- else
- lr_err(p_ps,"Pragma sequence @p must be followed by a blank.");
- skiptoeol();
- goto help;
- }
-
- /* Copy the rest of the line to the pragma arrays. */
- NEXTCH;
- p = &praglin[0];
- q = &pragstr[0];
- length=0;
- while (ch!=EOL)
- {
- if (++length > PRAGMA_MAX-3) /* 3 is for "@p " or "@t " */
- {
- if (is_typ)
- {
- lr_err(p_ps,"This typestter directive line is too long.");
- sprintf(linet1,"The maximum typesetter directive line length is %u characters.",
- (unsigned) PRAGMA_MAX);
- lr_mes(p_ps,linet1);
- }
- else
- {
- lr_err(p_ps,"This pragma line is too long.");
- sprintf(linet1,"The maximum pragma line length is %u characters.",
- (unsigned) PRAGMA_MAX);
- lr_mes(p_ps,linet1);
- }
- skiptoeol();
- goto help;
- }
- *p++=ch;
- *q++=ch;
- NEXTCH;
- }
- *p=EOS;
- *q=EOS;
- /* Note: Current position is now on the EOL at the end of the @p line. */
- /* That is the way we want to leave it for the scanspec() routine. */
-
- /* So far we have copied the body of the pragma line into two arrays. The */
- /* next lump of code parses that line into a sequence of non-blank arguments.*/
- /* The result is an array of pt_t objects each of which contains the */
- /* position of each argument, a pointer to the first character of each */
- /* argument in praglin, and also a pointer to a string containing the arg. */
- /* The string resides in the array pragstr which is the same as praglin */
- /* except that some blanks have been replaced by EOSs so as to allow us to */
- /* point into it to form strings. All this probably seems rather overdone */
- /* for the analysis of a "simple" pragma, but I have found that pulling the */
- /* different kinds of pragma lines apart separately is very messy. Far */
- /* better to suffer here in what is at least reasonably neat code than */
- /* later in the specific pragma routines. */
- numarg=0;
- p= &praglin[0];
- q= &pragstr[0];
- while (TRUE)
- {
- /* Skip whitespace between arguments. */
- while (*p==' ') {p++;q++;}
-
- /* Exit if we have hit the end of the line. */
- if ((numarg==MAXPARG) || (*p==EOS)) break;
-
- /* We have found another argument! */
- numarg++;
-
- /* Record the argument. */
- ASSIGN(pragarg[numarg].pt_ps,*p_ps);
- pragarg[numarg].pt_ps.ps_column=4+(p-praglin);
- pragarg[numarg].pt_pinl=p;
- pragarg[numarg].pt_pstr=q;
-
- /* Skip to the end of the argument. */
- while (*p!=' ' && *p!=EOS) {p++;q++;}
-
- /* Drop a null in the string array to complete string rep of argument. */
- *q=EOS;
- }
-
- /* At this point numarg is MIN(arguments,MAXPARG), and pragargs contains an */
- /* entry for each of the numarg arguments. */
-
- /* It is handy to have the position of the pragma itself handy. */
- ASSIGN(pragarg[0].pt_ps,*p_ps);
-
- /* CHECK: Make sure that the line and string arrays square up. */
- {
- uword i;
- for (i=1;i<=numarg;i++)
- {
- uword j;
- uword t=strlen(pragarg[i].pt_pstr);
- for (j=0;j<t;j++)
- {
- as_cold(pragarg[i].pt_pstr[j]==pragarg[i].pt_pinl[j],
- "do_pragma: String and line arrays are not equal.");
- as_cold((pragarg[i].pt_pstr-pragstr)==(pragarg[i].pt_pinl-praglin),
- "do_pragma: String and line arrays are out of synch.");
- }
- }
- }
-
- /* Complain if there are no arguments at all. */
- if (numarg==0)
- {
- if (is_typ)
- lr_err(p_ps,"Typesetter directive @t must be followed by a keyword.");
- else
- lr_err(p_ps,"Pragma sequence @p must be followed by a keyword.");
- skiptoeol();
- goto help;
- }
-
- /* Branch off to specific routines based on the first argument. */
- p=pragarg[1].pt_pstr; na=numarg;
- if (is_typ)
- {
- if (0==strcmp(p,"new_page" )) {do_pgnpg(na,pragarg);return;}
- if (0==strcmp(p,"table_of_contents" )) {do_pgtoc(na,pragarg);return;}
- if (0==strcmp(p,"title" )) {do_pgtit(na,pragarg);return;}
- if (0==strcmp(p,"vskip" )) {do_pgvsk(na,pragarg);return;}
- }
- else
- {
- if (0==strcmp(p,"indentation" )) {do_pgind(na,pragarg);return;}
- if (0==strcmp(p,"maximum_input_line_length" )) {do_pginl(na,pragarg);return;}
- if (0==strcmp(p,"maximum_output_line_length")) {do_pgotl(na,pragarg);return;}
- if (0==strcmp(p,"typesetter" )) {do_pgtyp(na,pragarg);return;}
- }
-
- help:
- if (is_typ)
- {
- lr_err(p_ps,"Unrecognised typesetter directive. Legal ones are:");
- lr_mes(p_ps," @t new_page");
- lr_mes(p_ps," @t table_of_contents");
- lr_mes(p_ps," @t title <font> <align> <string>");
- lr_mes(p_ps," @t vskip <num> mm");
- lr_mes(p_ps,"The blanks between arguments are important.");
- lr_mes(p_ps,"Typesetter directive ignored.");
- }
- else
- {
- lr_err(p_ps,"Unrecognised pragma. Possible legal pragmas are:");
- lr_mes(p_ps," @p indentation = none | blank");
- lr_mes(p_ps," @p maximum_input_line_length = <num>|infinity");
- lr_mes(p_ps," @p maximum_output_line_length = <num>|infinity");
- lr_mes(p_ps," @p typesetter = none | tex");
- lr_mes(p_ps,"The blanks between arguments are important.");
- lr_mes(p_ps,"Pragma ignored.");
- }
- }
-
- /******************************************************************************/
-
- LOCAL void chksol P_((void));
- LOCAL void chksol()
- /* This function is called when the current character is the character after */
- /* an @. The function checks to see if the @ was at the start of a line and */
- /* issues a error message if it isn't. */
- {
- ps_t ps;
- grabchps(&ps);
- if (ps.ps_column != 2)
- {
- ps.ps_column--;
- sprintf(linet1,"@%c is legal only at the start of a line.",ch);
- lr_err(&ps,linet1);
- }
- }
-
- /******************************************************************************/
-
- LOCAL void scanspec P_((void));
- LOCAL void scanspec()
- /* Upon entry the current character is the special character (usually '@'). */
- /* The task is to scan the special sequence. Upon exit, the current character */
- /* is the character following the special sequence. */
- {
- ps_t ps_spec; /* Position of start of special sequence. */
-
- /* Make a note of where the special sequence starts. */
- grabchps(&ps_spec);
-
- /* Move onto the character following the special (escape) character. */
- NEXTCH;
-
- /* Now react to the character. In most cases, the special sequence is simply */
- /* a marker in the input and we can simply transmit it. The nasty special */
- /* case sequences are left until the end of the switch statement. */
- /* Purists will complain about how all the case options are hardwired and */
- /* say that symbols should have been used. They once were, but were taken */
- /* out when it was discovered that the symbols had cryptic names (because of */
- /* the portability eight-character rule) and were only used here anyway. */
- switch (toupper(ch))
- {
- case '<': sendspec(&ps_spec,TK_ONAM,DONTCARE); break;
- case '>': sendspec(&ps_spec,TK_CNAM,DONTCARE); break;
- case '{': sendspec(&ps_spec,TK_ODEF,DONTCARE); break;
- case '}': sendspec(&ps_spec,TK_CDEF,DONTCARE); break;
- case '(': sendspec(&ps_spec,TK_OPAR,DONTCARE); break;
- case ')': sendspec(&ps_spec,TK_CPAR,DONTCARE); break;
- case ',': sendspec(&ps_spec,TK_COMA,DONTCARE); break;
- case '"': sendspec(&ps_spec,TK_QUOT,DONTCARE); break;
- case '/': sendspec(&ps_spec,TK_EMPH,DONTCARE); break;
- case 'A': sendspec(&ps_spec,TK_NSEC,1); chksol(); break;
- case 'B': sendspec(&ps_spec,TK_NSEC,2); chksol(); break;
- case 'C': sendspec(&ps_spec,TK_NSEC,3); chksol(); break;
- case 'D': sendspec(&ps_spec,TK_NSEC,4); chksol(); break;
- case 'E': sendspec(&ps_spec,TK_NSEC,5); chksol(); break;
- case '1': sendspec(&ps_spec,TK_PARM,1); break;
- case '2': sendspec(&ps_spec,TK_PARM,2); break;
- case '3': sendspec(&ps_spec,TK_PARM,3); break;
- case '4': sendspec(&ps_spec,TK_PARM,4); break;
- case '5': sendspec(&ps_spec,TK_PARM,5); break;
- case '6': sendspec(&ps_spec,TK_PARM,6); break;
- case '7': sendspec(&ps_spec,TK_PARM,7); break;
- case '8': sendspec(&ps_spec,TK_PARM,8); break;
- case '9': sendspec(&ps_spec,TK_PARM,9); break;
- case 'M': sendspec(&ps_spec,TK_MANY,DONTCARE); break;
- case 'Z': sendspec(&ps_spec,TK_ZERO,DONTCARE); break;
- case 'O': sendspec(&ps_spec,TK_FDEF,DONTCARE); chksol(); break;
- case '$': sendspec(&ps_spec,TK_MDEF,DONTCARE); chksol(); break;
- case EOL:
- lr_err(&ps_spec,"<special><endofline> is not a legal special sequence.");
- break;
- case ' ':
- lr_err(&ps_spec,"<special><space> is not a legal special sequence.");
- break;
- case '@':
- /* @ instructs FunnelWeb to replace the special construct with the */
- /* special character. Luckily one appears just before the @ !! */
- /* Note: FALSE is OK because space is not a legal specialch. */
- sendtext(&ps_spec,p_ch-1,p_ch-1,FALSE);
- break;
- case '-':
- /* - instructs FunnelWeb to suppress the following end of line. */
- if (*(p_ch+1) == EOL)
- NEXTCH
- else
- lr_err(&ps_spec,
- "Suppress EOL sequence is legal only at the end of a line.");
- break;
- case '+':
- /* + instructs FunnelWeb to insert an EOL. We can't look to the end of */
- /* the previous line to find an EOL as this might be the first line. */
- /* Running ahead to the end of the line is expensive, and having the */
- /* liner mini-package maintain a variable for it would be extra */
- /* housekeeping. Instead of all this, we just point to a static. */
- {STAVAR char stateol = EOL;
- sendtext(&ps_spec,&stateol,&stateol,TRUE);}
- break;
- case '=':
- /* = instructs FunnelWeb to change the special character to the */
- /* character following the <special>= sequence. */
- NEXTCH;
- if (ch == ' ')
- {
- lr_err(&ps_spec,"You cannot set the special character to <space>!");
- lr_mes(&ps_spec,"Special sequence ignored.");
- }
- else if (ch == EOL)
- {
- lr_err(&ps_spec,
- "You cannot set the special character to <endofline>!");
- lr_mes(&ps_spec,"Special sequence ignored.");
- }
- else
- specialch=ch;
- break;
- case '!':
- /* ! instructs FunnelWeb to ignore the rest of the line (a comment). */
- skiptoeol();
- break;
- case 'I':
- /* i instructs FunnelWeb to include a file. */
- incl_fil(&ps_spec);
- break;
- case '^':
- /* ^ instructs FunnelWeb to insert a specific ascii character. */
- do_ascii(&ps_spec);
- break;
- case '#':
- /* # instructs FunnelWeb to transmit a two character name "#c". */
- do_name(&ps_spec);
- break;
- case 'P':
- /* P is used as a miscellaneous PRAGMA. */
- do_pragma(&ps_spec,FALSE);
- break;
- case 'T':
- /* T introduces a one-line typesetting directive. */
- do_pragma(&ps_spec,TRUE);
- break;
- default:
- lr_err(&ps_spec,"Unknown special sequence.");
- break;
- }
-
- /* The switch statment absorbs the special sequence and its effects. */
- /* This NEXTCH places us on the character following the special sequence. */
- NEXTCH;
- }
-
- /******************************************************************************/
-
- LOCAL void scantext P_((void));
- LOCAL void scantext()
- /* Upon entry, we know that the current character is not EOF and that it is */
- /* not the special character. Our task is to parse as much text as we can and */
- /* ship it off as a text token. The scanner will probably spend most of its */
- /* time in the loops in this function so it is important that they be */
- /* efficient. That is why two loops are used to deal with detecting */
- /* whitespace rather than a flag. */
- /* Upon return, the current character is the character following the text */
- /* sequence. This is guaranteed to be the special character or an EOF. */
- {
- ps_t ps_start; /* Position of first character of text sequence. */
- char *p_first = p_ch; /* Pointer to first character of text sequence. */
-
- /* Grab a copy of the position of this token. */
- grabchps(&ps_start);
-
- /* Scan whitespace. */
- while (ch==' ' || ch==EOL)
- NEXTCH;
-
- /* If we hit something that ends a text token */
- /* then we can transmit a white text token. */
- if (ch==specialch || ch==EOFCH)
- {sendtext(&ps_start,p_first,p_ch-1,TRUE); return;}
-
- /* Otherwise we have some more (non-white) text to scan. */
- /* We can then send a non-white text token. */
- while (ch!=specialch && ch!=EOFCH)
- NEXTCH;
- sendtext(&ps_start,p_first,p_ch-1,FALSE);
- }
-
- /******************************************************************************/
-
- LOCAL void scan_file(p_fname)
- /* This function scans a single file. It's argument is the name of the file */
- /* to be scanned. scan_file calls the mapper to map in the file and then */
- /* scans the text of the mapped file using the liner mini-package. The result */
- /* of the scan is additions to the line and token list, and diagnostics sent */
- /* to the lister package. If an include directive is encountered, this */
- /* function is called recursively. */
- char *p_fname;
- {
- char *p_mapped; /* Pointer to the mapped file. */
- ulong length; /* Number of bytes in the mapped file. */
- char *errstr; /* Error string returned by mapper. */
- bool addedeol; /* Did we have to add an EOL to the end of the last line? */
-
- /* Check to see if the file exists. */
- if (!fexists(p_fname))
- {
- if (inclevel==0)
- {
- /* Failure to find the main file is a severe error. */
- if (option.op_b7_b)
- sprintf(linet1,"S: Error opening input file \"%s\".",SUPPNAME);
- else
- sprintf(linet1,"S: Error opening input file \"%s\".",p_fname);
- wl_l(linet1);
- /* Although strictly speaking we should suppress this error from the */
- /* screen stream unless option.op_s_b is set, absence of an input file */
- /* is such an important error, that we write it out anyway. */
- /* if (option.op_s_b) */
- wl_sj(linet1);
- num_sev++;
- return;
- }
- else
- {
- /* Failure to find an include file is an ordinary error. */
- ps_t ps;
- ps.ps_line = globalno;
- ps.ps_column = 4;
- lr_err(&ps,"Error opening include file.");
- if (option.op_b7_b)
- sprintf(linet1,
- "The include file's expanded name was \"%s\".",SUPPNAME);
- else
- sprintf(linet1,
- "The include file's expanded name was \"%s\".",p_fname);
- lr_mes(&ps,linet1);
- return;
- }
- }
-
- /* Map the specified file into memory. We need to change from the scanner */
- /* clock to the mapper clock to keep the time accounting correct here. */
- ck_stop(p_scan);
- ck_start(p_mapp);
- errstr=map_file(p_fname,&p_mapped,&length);
- ck_stop(p_mapp);
- ck_start(p_scan);
-
- /* Abort if the mapping was not possible. */
- if (errstr != NULL)
- if (inclevel==0)
- {
- /* Failure to map the main file is a severe error. */
- if (option.op_b7_b)
- sprintf(linet1,"S: Error reading input file \"%s\".",SUPPNAME);
- else
- sprintf(linet1,"S: Error reading input file \"%s\".",p_fname);
- wl_l(linet1); if (option.op_s_b) wl_sj(linet1);
- wl_l(errstr); if (option.op_s_b) wl_sj(errstr);
- num_sev++;
- return;
- }
- else
- {
- /* Failure to find an include file is an ordinary error. */
- ps_t ps;
- ps.ps_line = globalno;
- ps.ps_column = 4;
- lr_err(&ps,"Error reading include file.");
- lr_mes(&ps,errstr);
- if (option.op_b7_b)
- sprintf(linet1,"The include file's expanded name was \"%s\".",
- SUPPNAME);
- else
- sprintf(linet1,"The include file's expanded name was \"%s\".",
- p_fname);
- lr_mes(&ps,linet1);
- return;
- }
-
- /* Dump the mapped file if requested. */
- if (option.op_b1_b)
- {
- if (option.op_b7_b)
- sprintf(linet1,"Dump of mapped file \"%s\".",SUPPNAME);
- else
- sprintf(linet1,"Dump of mapped file \"%s\".",p_fname);
- wl_l(linet1);
- dm_mem(&f_l,p_mapped,length);
- }
-
- /* If the file is absolutely empty, we have to warn the user. Also, this is */
- /* a special case we can do without, and so we return here if file is empty. */
- if (length==0)
- {
- ps_t ps;
- /* The empty file could be the main file or an include file. */
- /* If the empty file is the main file, we want the diagnostic to point to */
- /* the EOF marker which will appear as line 1. */
- /* If the empty file is an include file, we wish to point the diagnostic */
- /* to the line containing the include command. This is globalno. */
- /* In both cases, we want the diagnostic to point to column 1. */
- ps.ps_column=1;
- if (inclevel==0)
- {
- ps.ps_line=1;
- lr_war(&ps,"Input file is empty (not a byte in syte)!");
- }
- else
- {
- ps.ps_line=globalno;
- lr_war(&ps,"Include file is empty (not a byte in syte)!");
- }
- return;
- }
-
- /* Scanning is considerably simplified if we can guarantee that we will not */
- /* run into an EOF without first hitting an EOL. The following code takes */
- /* care of this by tacking one on the end if necessary and also adds an */
- /* EOF character on the end, which also simplifies the scanning. We can get */
- /* away with all this because the mapper purposefully leaves at least two */
- /* bytes free for us at the end of the mapped file. */
- addedeol=FALSE;
- if (p_mapped[length-1] != EOL)
- {p_mapped[length++]=EOL; addedeol=TRUE;}
- p_mapped[length]=EOFCH;
-
- /* Initialize the variables "instantiated over a single file". */
- inln_max = 80;
- specialch = CH_DSPE;
- localno = 0;
- p_eof = &p_mapped[length];
-
- /* Crank up the line subscanner system with a call to prepline. */
- /* Then enter the main scanning loop. */
- /* All input consists of alternating special and text sequences */
- /* terminated by EOF. */
- prepline(p_mapped);
- while (ch!=EOFCH)
- if (ch==specialch)
- scanspec();
- else
- scantext();
-
- /* Now that we are at the end of the scanned file and the scanning markers */
- /* are all sitting on the end of the file, it is a good time to issue */
- /* diagnostics about problems at the end of the file. */
- if (addedeol)
- {
- ps_t ps;
- /* We want the diagnostic to point to the EOF line. Hence "global+1". */
- ps.ps_line = globalno+1;
- ps.ps_column = 1;
- if (inclevel==0)
- lr_war(&ps,"The last line of the input file was terminated by EOF.");
- else
- lr_war(&ps,"The last line of the include file was terminated by EOF.");
- lr_mes(&ps,"An EOL was inserted at the end of the last line.");
- }
- }
-
- /******************************************************************************/
-
- EXPORT void scanner(p_amapp,p_ascan)
- /* This is the scanner's main routine and the only exported function. */
- p_ck_t p_amapp; /* Mapper's clock (stopped). */
- p_ck_t p_ascan; /* Scanner's clock (running). */
- {
- /* Copy the arguments into globals where we can get at them. */
- p_mapp=p_amapp;
- p_scan=p_ascan;
-
- /* Apart from diagnostic messages sent to the lister, the only output of */
- /* the scanner is two global lists holding a list of lines and a list of */
- /* tokens. The scanner creates these lists simultaneously. */
- /* We have to initialize them here before we get into 'scan_file' which */
- /* calls itself recursively if an include file command is encountered. */
- line_list =ls_cre(sizeof(ln_t));
- token_list=ls_cre(sizeof(tk_t));
-
- /* Initialize all the variables instantiated throughout the entire scan. */
- globalno = 0;
- inclevel = 0;
- seenind = FALSE;
- seentyp = FALSE;
- seenlimo = FALSE;
-
- /* We also have to initialize localno in case the input file is empty and */
- /* it never gets initialized before being sucked into being used as the */
- /* local number for the end of file marker. */
- localno=0;
-
- /* Initialize the global indentation flag to the default value. */
- tgindent=TRUE;
-
- /* Initialize the global product line length limit to the default value. */
- tglinmax=80;
-
- /* Initialize the global typesetter flag to the default value. */
- tr_codes=TR_NONE;
-
- /* Scan the top level file whose name is obtained from the command line. */
- as_cold(option.op_f_b,"scanner: -F!!!!");
-
- /* Work out what the input file name should be. */
- {
- fn_t fname;
- strcpy(fname,""); /* Start with an empty string. */
- fn_ins(fname,".fw");
- fn_ins(fname,option.op_f_s);
- scan_file(fname);
- }
-
- /* The scan_file function scans the main input file and all of its included */
- /* files, but it does not append a TK_EOF token to the end. This call does */
- /* this and also adds a line to the line list for EOF. */
- add_eof();
- }
-
- /******************************************************************************/
- /* End of SCANNER.C */
- /******************************************************************************/
-