home *** CD-ROM | disk | FTP | other *** search
- /* Texchk -- a LaTeX syntax and spelling checker.
- Written by JP Massar, Thinking Machines Corporation, Cambridge, MA
- This code is hereby released into the public domain, for better or worse.
- */
-
- #include <stdio.h>
- #include <ctype.h>
-
- /* if your system doesn't have either string.h or strings.h you */
- /* may have to declare the string functions yourself */
- #ifdef BSD42
- #include <strings.h>
- #else
- #include <string.h>
- #endif
-
- #ifdef TMC
- #include <ctools.h>
- #else
- #include "ctools.h"
- #endif
-
- #include "texchk.h"
- #include "cmds.h"
- #include "texchars.h"
-
- Bool Verbose_Mode = F; /* -v option */
- Bool Check_Mode = F; /* -c option */
-
- int Indent_Level = 0; /* for verbose output mode */
-
- Stack_Entry Lex_Stack[MAX_ENTRIES]; /* environment stack */
- int Lex_TOS = -1;
-
- FILE *fp; /* file being processed */
- Bool Already_At_Eof = F;
-
- long Current_Line = 0; /* where we are in input text */
- long Current_Char = 0; /* where we are in input text */
- long Line_Length = 0; /* current line length */
- char Line_Buffer[MAXLL]; /* buffer for input text */
-
- Bool In_Math_Mode = F;
- int Math_Mode_Depth = 0;
-
- char Keyword_Buffer[MAX_KEYWORD_LENGTH];
-
-
- new_file ()
- {
- Current_Line = 0;
- Current_Char = 0;
- Line_Length = 0;
- In_Math_Mode = F;
- Math_Mode_Depth = 0;
- Indent_Level = 0;
- }
-
- do_indent (level) int level;
- {
- int j,i;
- for (j = 0; j < level; j++)
- for (i = 0; i < SPACES_PER_INDENT_LEVEL; i++) putc(' ',stderr);
- }
-
-
- lex_push (etype,keyword,linenum) envtype etype; char *keyword; long linenum;
-
- /* push an environment onto the stack */
-
- {
- if (++Lex_TOS >= MAX_ENTRIES) {
- fprintf(stderr,"Stack overflow...Process terminating.\n");
- texit();
- }
- Lex_Stack[Lex_TOS].etype = etype;
- Lex_Stack[Lex_TOS].keyword = keyword;
- Lex_Stack[Lex_TOS].linenum = linenum;
- }
-
-
- lex_pop (ptr_etype,ptr_keyword,ptr_linenum)
-
- /* pop an environment and return its components */
-
- envtype *ptr_etype;
- char **ptr_keyword;
- long *ptr_linenum;
-
- {
- if (Lex_TOS < 0) {
- fprintf(stderr,"Stack underflow...Process terminating\n");
- texit();
- }
- *ptr_etype = Lex_Stack[Lex_TOS].etype;
- *ptr_keyword = Lex_Stack[Lex_TOS].keyword;
- *ptr_linenum = Lex_Stack[Lex_TOS].linenum;
- Lex_TOS--;
- }
-
-
- curstack (ptr_etype,ptr_keyword,ptr_linenum)
-
- /* get the components of the current stack entry, but leave the entry */
- /* on the stack. */
-
- envtype *ptr_etype;
- char **ptr_keyword;
- long *ptr_linenum;
-
- {
- if (Stack_Empty) {
- fprintf(stderr,"Fatal error, bad call to curstack\n");
- texit();
- }
- lex_pop(ptr_etype,ptr_keyword,ptr_linenum);
- lex_push(*ptr_etype,*ptr_keyword,*ptr_linenum);
- }
-
-
- char *copy_keyword (starttoken,endtoken) int starttoken,endtoken;
-
- /* grab a keyword from the Line_Buffer and copy it into a static buffer */
-
- {
- int len;
- if (MAX_KEYWORD_LENGTH <= (len = (endtoken - starttoken) + 1)) {
- keyword_length_error();
- texit();
- }
- strncpy(Keyword_Buffer,Line_Buffer + starttoken,len);
- Keyword_Buffer[len] = '\0';
- return(Keyword_Buffer);
- }
-
-
- do_pop (etype,keyword) envtype etype; char *keyword;
-
- /* make sure that the current environment is the matching begin-environment */
- /* for the end-environment that we have just discovered. If so, pop the */
- /* environment off the stack. If not its an error. */
-
- {
-
- envtype oldetype;
- char *oldkeyword;
- long oldlinenum;
- char *s, *e;
-
- lex_pop(&oldetype,&oldkeyword,&oldlinenum);
-
- switch (etype) {
-
- case ESCAPE_END :
- s = "\\begin";
- e = "\\end";
- if (oldetype != ESCAPE_BEGIN) goto nesterror;
- if (0 != strcmp(oldkeyword,keyword)) goto nesterror;
- break;
-
- case RIGHT_SQUARE_BRACKET :
- s = "[";
- e = "]";
- if (oldetype != LEFT_SQUARE_BRACKET) goto nesterror;
- break;
-
- case RIGHT_CURLY_BRACKET :
- s = "{";
- e = "}";
- if (oldetype != LEFT_CURLY_BRACKET) goto nesterror;
- break;
-
- case MATH :
- s = "Begin Math Mode";
- e = "End Math Mode";
- if (oldetype != etype) goto nesterror;
- break;
-
- case DOUBLE_MATH :
- s = "Begin Display Math Mode";
- e = "End Display Math Mode";
- if (oldetype != etype) goto nesterror;
- break;
-
- }
-
- return(0);
-
- nesterror:
-
- nest_error(s,e,oldlinenum,oldkeyword);
- texit();
-
- }
-
-
- int get_a_char ()
-
- /* buffered input routine, to keep track of line number */
-
- {
- int ch,rval;
- if (Current_Char >= Line_Length) {
- switch (rval = getline(fp,Line_Buffer,MAXLL-2)) {
- case AT_EOF:
- return(EOF);
- break;
- case TOO_MANY_CHARS :
- line_too_long_error();
- texit();
- break;
- default :
- Line_Buffer[rval] = '\n';
- Line_Buffer[++rval] = '\0';
- Line_Length = rval;
- Current_Char = 0;
- Current_Line++;
- break;
- }
- }
- ch = (int) (255 & Line_Buffer[Current_Char++]);
- if (!LGL_CHAR(ch)) bad_char_error(ch,T);
- return(ch);
- }
-
-
- unget_a_char ()
-
- {
- if (Current_Char == 0) {
- fprintf(stderr,"Invalid unget...process terminating\n");
- texit();
- }
- Current_Char--;
- }
-
-
- char *get_keyword ()
-
- /* read a keyword. Keywords consist of contiguous alphabetic characters */
- /* keyword returned is in a static buffer. */
-
- {
- int starttoken,endtoken,ch;
- starttoken = Current_Char - 1;
- endtoken = Current_Char - 1;
- while (isalpha(ch = get_a_char())) {
- endtoken++;
- }
- if (ch == EOF) {
- Already_At_Eof = 1;
- }
- else unget_a_char();
- return(copy_keyword(starttoken,endtoken));
- }
-
-
- char *get_begin_end_keyword ()
-
- /* called after a \begin or \end construct is found. */
- /* begin and end keywords are enclosed in {}. */
- /* a warning is issued if there is any whitespace within the {}s */
- /* returns a string constituting what is in between the {}s save for */
- /* whitespace immediately after the { and immediately before the } */
-
- /* keyword returned is in a static buffer. */
-
- {
- int ch;
- int starttoken,endtoken;
-
- ch = get_a_char();
- if (ch != LCB) {
- no_brace_after_begin_end_error();
- texit();
- }
-
- starttoken = Current_Char;
- endtoken = starttoken - 1;
- while (RCB != (ch = get_a_char())) {
- if (ch == '\n')
- warning_close_brace();
- else if (ch == EOF) {
- eof_error();
- texit();
- }
- else
- endtoken++;
- }
-
- /* ignore whitespace after '{' and before '}' */
-
- if (ISWHITE(Line_Buffer[starttoken]) || ISWHITE(Line_Buffer[endtoken])) {
- warning_blanks_in_cb();
- }
-
- while (starttoken < endtoken && ISWHITE(Line_Buffer[starttoken]))
- starttoken++;
- if (starttoken >= endtoken) {
- blank_begin_end_error();
- texit();
- }
- while (endtoken > starttoken && ISWHITE(Line_Buffer[endtoken]))
- endtoken--;
- return(copy_keyword(starttoken,endtoken));
-
- }
-
-
- get_token (action,etype,keyword)
-
- /* get the next significant token from the input stream. Based on its type */
- /* an action to perform is computed. The significant part of the token is */
- /* returns in *keyword, which points to a static buffer. */
-
- /* returns 0 on encountering EOF, otherwise returns 1. */
-
- Actions *action;
- envtype *etype;
- char **keyword;
-
- {
- int ch,isbegin,isend;
-
- *keyword = 0;
- if (Already_At_Eof) return(0);
-
- readloop:
-
- if (EOF == (ch = get_a_char())) return(0);
-
- switch (ch) {
-
- case LSB :
- *etype = LEFT_SQUARE_BRACKET;
- *action = PUSH;
- *keyword = "[";
- return(1);
-
- case RSB :
- *etype = RIGHT_SQUARE_BRACKET;
- *action = POP;
- *keyword = "]";
- return(1);
-
- case LCB :
- *etype = LEFT_CURLY_BRACKET;
- *action = PUSH;
- *keyword = "{";
- return(1);
-
- case RCB :
- *etype = RIGHT_CURLY_BRACKET;
- *action = POP;
- *keyword = "}";
- return(1);
-
- case MATH_CHAR :
-
- /* Is the next character also a '$'? If so this is 'Display Math Mode' */
-
- if (EOF == (ch = get_a_char())) {
- *action = DOLLAR;
- *etype = MATH;
- *keyword = "$";
- Already_At_Eof = 1;
- }
- else if (ch == MATH_CHAR) {
- *action = DOLLAR_DOLLAR;
- *etype = DOUBLE_MATH;
- *keyword = "$$";
- }
- else {
- unget_a_char();
- *action = DOLLAR;
- *etype = MATH;
- *keyword = "$";
- }
- return(1);
-
- case ESCAPE :
-
- /* treat specially \begin and \end */
-
- if (EOF == (ch = get_a_char())) {
- eof_error();
- texit();
- }
-
- /* first check for single character non-alphabetic commands */
-
- if (!isalpha(ch)) {
- *action = CHECK_SINGLE;
- *etype = ESCAPE_SINGLE_CHAR;
- Keyword_Buffer[0] = ch;
- Keyword_Buffer[1] = '\0';
- *keyword = Keyword_Buffer;
- return(1);
- }
-
-
- *keyword = get_keyword();
- isbegin = (0 == strcmp(*keyword,BEGINSTRING));
- isend = (0 == strcmp(*keyword,ENDSTRING));
- if (!isbegin && !isend) {
- *action = CHECK;
- *etype = ESCAPE_ANY;
- return(1);
- }
-
- *etype = isbegin ? ESCAPE_BEGIN : ESCAPE_END;
- *action = isbegin ? PUSH : POP;
- *keyword = get_begin_end_keyword();
- return(1);
-
- case COMMENT :
-
- /* just read in the rest of the line and ignore what's on it */
-
- while ('\n' != (ch = get_a_char())) {
- if (EOF == ch) return(0);
- }
- goto readloop;
-
- default :
- goto readloop;
-
- }
-
- }
-
- push_math_mode (key) char *key;
- {
- if (Verbose_Mode) {
- do_indent(Indent_Level++);
- fprintf (
- stderr,"Line %d: Entering math mode using <%s>\n",Current_Line,key
- );
- }
- Math_Mode_Depth++;
- In_Math_Mode = T;
- lex_push(MATH,key,Current_Line);
- }
-
- pop_math_mode (key) char *key;
- {
- envtype etype;
- char *keyword;
- long linenum;
- if (Verbose_Mode) {
- do_indent(--Indent_Level);
- fprintf (
- stderr,"Line %d: Leaving math mode using <%s>\n",Current_Line,key
- );
- }
- Math_Mode_Depth--;
- In_Math_Mode = (Math_Mode_Depth > 0);
- lex_pop(&etype,&keyword,&linenum);
- }
-
-
- math_mode_action (action,keyword) Actions action; char *keyword;
-
- /* check for math mode tokens, and enter or leave math mode as appropriate */
-
- {
- char *stack_keyword;
- long linenum;
- envtype etype;
- char *key, *matching_keyword;
-
- switch (action) {
-
- /* If there is a matching '$' or '$$' as the latest entry on the stack */
- /* we pop it because it is a matching token. Otherwise, we push it, */
- /* even if we are already in math mode. */
-
- case (DOLLAR) :
- case (DOLLAR_DOLLAR) :
- key = (action == DOLLAR) ? "$" : "$$";
- if (!In_Math_Mode) {
- push_math_mode(key);
- break;
- }
- curstack(&etype,&stack_keyword,&linenum);
- if (0 != strcmp(key,stack_keyword)) {
- push_math_mode(key);
- }
- else {
- pop_math_mode(key);
- }
- break;
-
- /* just adjust Math Mode for PUSH and POP, because in process_file */
- /* we will do the actual pushing and popping of these environments. */
-
- case (PUSH) :
- if (is_math_environment(keyword)) {
- Math_Mode_Depth++;
- In_Math_Mode = T;
- }
- break;
-
- case (POP) :
- if (is_math_environment(keyword)) {
- Math_Mode_Depth--;
- In_Math_Mode = (Math_Mode_Depth == 0);
- }
- break;
-
- /* look for \( and \[ commands which put us into math mode, and \) and */
- /* \] commands which pop us out of math mode. Make sure if we are */
- /* popping that the proper pushed math mode command is the current */
- /* stack entry. */
-
- case (CHECK_SINGLE) :
- if (*keyword == '(' || *keyword == '[') {
- push_math_mode(anewstr(keyword));
- }
- else if (*keyword == ')' || *keyword == ']') {
- if (Stack_Empty) {
- stack_empty_error(MATH,keyword);
- texit();
- }
- curstack(&etype,&stack_keyword,&linenum);
- matching_keyword = (*keyword == ')') ? "(" : "[";
- if (0 != strcmp(matching_keyword,stack_keyword)) {
- nest_error(matching_keyword,keyword,linenum,stack_keyword);
- texit();
- }
- pop_math_mode(keyword);
- }
- break;
-
- }
-
- }
-
-
- process_file ()
-
- /* Get significant LaTeX forms from the input file. For each one, depending */
- /* on its nature perform a verification or manipulate the environment stack. */
- /* When we are done the stack should be empty. */
-
- /* The file has already been opened using the global file descriptor 'fp' */
-
- {
- Actions action;
- envtype etype;
- char *keyword;
- int cmd_index,ch;
-
- while (0 != get_token(&action,&etype,&keyword)) {
-
- switch (action) {
-
- case (POP) :
-
- /* \end{keyword},, '}', ']' */
-
- if (Stack_Empty) {
- stack_empty_error(etype,keyword);
- texit();
- }
-
- math_mode_action(POP,keyword);
-
- if (Verbose_Mode && *keyword != '}' && *keyword != ']') {
- do_indent(--Indent_Level);
- printf("line %d: \\end{%s}\n",Current_Line,keyword);
- }
- do_pop(etype,keyword);
- break;
-
- case (PUSH) :
-
- /* \begin{keyword}, '{', '[' */
-
- math_mode_action(PUSH,keyword);
-
- if (Verbose_Mode && *keyword != '{' && *keyword != '[') {
- do_indent(Indent_Level++);
- printf("line %d: \\begin{%s}\n",Current_Line,keyword);
- }
-
- if (0==strcmp("verbatim",keyword) || 0==strcmp("verbatim*",keyword)) {
- do_verbatim(keyword);
- break;
- }
- else {
- lex_push(etype,anewstr(keyword),Current_Line);
- break;
- }
-
- case (DOLLAR) :
- math_mode_action(DOLLAR,keyword);
- break;
-
- case (DOLLAR_DOLLAR) :
- math_mode_action(DOLLAR_DOLLAR,keyword);
- break;
-
- case (CHECK_SINGLE) :
-
- /* check for \(, \[, \), \] for math mode */
-
- math_mode_action(CHECK_SINGLE,keyword);
-
- if (Check_Mode) {
- if (!LGL_SINGLE_COMMAND_CHAR(*keyword)) {
- single_char_command_error(*keyword);
- }
- if (NOT_FOUND == (cmd_index = command_lookup(keyword))) {
- fprintf(stderr,"Fatal error:\n");
- fprintf(stderr,"Command Table and Legal Chars out of sync\n");
- exit(1);
- }
- if (!In_Math_Mode && IS_MATH_MODE(cmd_index)) {
- math_keyword_error(keyword);
- }
-
- }
- break;
-
- case (CHECK) :
-
- /* \command token */
-
- if (0 == strcmp("verb",keyword)) {
- if ('*' != (ch = get_a_char())) unget_a_char();
- do_verb();
- break;
- }
-
- if (Check_Mode) {
- if (NOT_FOUND == (cmd_index = command_lookup(keyword))) {
- keyword_error(keyword);
- }
- else if (!In_Math_Mode && IS_MATH_MODE(cmd_index)) {
- math_keyword_error(keyword);
- }
- }
-
- break;
-
- default :
- fprintf(stderr,"Invalid return from get_token...\n");
- texit();
-
- }
-
- }
-
- if (!Stack_Empty) {
- eof_error();
- texit();
- }
-
- return(0);
-
- }
-
-
- texit ()
- {
- fclose(fp);
- exit(1);
- }
-
-
- usage ()
- {
- fprintf(stderr,"\nUnrecognized argument to texchk\n");
- fprintf(stderr,"Usage: texchk [ -v -c ] [ file1 file2 ... ]\n");
- exit(1);
- }
-
-
- main (argc,argv) int argc; char **argv;
-
- {
- char **argptr;
- int j,input_files = 0;
-
- init_legal_chars();
-
- /* process command line arguments */
-
- argptr = argv;
- while (*++argptr != 0) {
- if (**argptr == '-') {
- if (strlen(*argptr) != 2) {
- usage();
- }
- switch ((*argptr)[1]) {
- case 'v' :
- Verbose_Mode = T;
- break;
- case 'c' :
- Check_Mode = T;
- break;
- default :
- usage();
- break;
- }
- *argptr = '\0';
- }
- else input_files = 1;
- }
-
- /* read and process each file */
-
- if (!input_files) {
- printf("\n");
- fp = stdin;
- process_file();
- printf("\nOK!\n\n");
- }
- else {
- for (j = 1; j < argc; j++) {
- if (argv[j] != '\0') {
- printf("\nChecking file %s.\n\n",argv[j]);
- new_file();
- fp = (FILE *) efopen(argv[j],"r");
- process_file();
- fclose(fp);
- }
- }
- }
-
- exit(0);
-
- }
-
-
-