OS/2 Shareware BBS: 22 gnu

home *** CD-ROM | disk | FTP | other *** search

/ OS/2 Shareware BBS: 22 gnu / 22-gnu.zip / fweb153.zip / fweb-1.53 / web / fweave.web < prev next >

Wrap

Text File | 1995-09-23 | 219KB | 8,963 lines

@z --- fweave.web --- FWEB version 1.53 (September 23, 1995) Based on version 0.5 of S. Levy's CWEB [copyright (C) 1987 Princeton University] @x----------------------------------------------------------------------------- \Title{FWEAVE.WEB} % The FWEAVE processor. @c @* INTRODUCTION. \WEAVE\ has a fairly straightforward outline. It operates in three phases: first it inputs the source file and stores cross-reference data, then it inputs the source once again and produces the \TeX\ output file, and finally it sorts and outputs the index. It can be compiled with the optional flag |DEBUG| (defined in \.{typedefs.web}). Some compilers may not be able to handle a module this big. In that case, compile this twice, defining from the compiler's command line the macro |part| to have the value of either~1 or~2: e.g., `\.{-Dpart=1}'. See the make file for complete details. For the text of the modules that aren't printed out here, such as \.{typedefs.web}, see \.{common.web}. @m _FWEAVE_ // Identify the module for various \FWEB\ macros. @d _FWEAVE_h @d _FWEB_h @A @<Possibly split into parts@>@; // Defines |part|. @<Include files@>@; @<Typedef declarations@>@; @<Prototypes@>@; @<Global variables@>@; /* For pc's, the file is split into three compilable parts using the compiler-line macro |part|, which must equal either~1, 2, or~3. */ #if(part == 0 || part == 1) @<Part 1@>@; #endif // |Part == 1| #if(part == 0 || part == 2) @<Part 2@>@; #endif // |part == 2| #if(part == 0 || part == 3) @<Part 3@>@; #endif // |part == 3| @ Here is the main program. See the user's manual for a detailed description of the command line. @<Part 1@>=@[ int main FCN((ac, av)) int ac C0("Number of command-line arguments.")@; outer_char **av C1("Array of pointers to command-line arguments.")@; { /* --- Various initializations --- */ #if TIMING ini_timer(); /* Start timing the run. */ #endif // |TIMING| argc=ac; @~ argv=av; /* Remember the arguments as global variables. */ ini_program(weave); common_init(); @<Set initial values@>; /* --- Do the processing --- */ phase1(); /* read all the user's text and store the cross-references */ phase2(); /* read all the text again and translate it to \TeX\ form */ phase3(); /* output the cross-reference index */ /* --- Finish up --- */ if(statistics) see_wstatistics(); // Invoked by command-line option~\.{-s}. return wrap_up(); /* We actually |exit| from here. */ } @I typedefs.hweb @ Here we open the \.{.tex} output file. This routine is called from |common_init|. @<Part 1@>=@[ SRTN open_tex_file(VOID) { if(STRCMP(tex_fname,"stdout") == 0) tex_file = stdout; else if((tex_file=FOPEN(tex_fname,"w"))==NULL) FATAL(W, "!! Can't open output file ", tex_fname); else @<Print header information to beginning of output file@>@; } @ The command line was formatted up with newlines; these must be followed by a \TeX\ comment character. @<Print header...@>= { fprintf(tex_file,"%% FWEAVE v%s (%s)\n\n", (char *)version, (char *)release_date); } @ @<Set init...@>= @<Allocate dynamic memory@>@; @ The function prototypes must appear before the global variables. @<Proto...@>= #include "w_type.h" /* Function prototypes for \FWEAVE. */ @i xrefs.hweb /* Declarations for cross-referencing. */ @ @<Alloc...@>= ALLOC(xref_info,xmem,ABBREV(max_refs),max_refs,0); xmem_end = xmem + max_refs - 1; @ @<Set init...@>= name_dir->xref = (XREF_POINTER)(xref_ptr=xmem); xref_switch = mod_xref_switch = defd_switch = index_short = NO; xmem->num = 0; // Cross-references to undefined modules. @ A new cross-reference for an identifier is formed by calling |new_xref|, which discards duplicate entries and ignores non-underlined references to one-letter identifiers or reserved words. If the user has sent the |no_xref| flag (the \.{-x} option of the command line), it is unnecessary to keep track of cross-references for identifers. If one were careful, one could probably make more changes around module~100 (??) to avoid a lot of identifier looking up. @<Part 1@>=@[ SRTN new_xref FCN((part0,p)) PART part0 C0("")@; name_pointer p C1("")@; { xref_pointer q; // Pointer to previous cross-reference. sixteen_bits m, n; // New and previous cross-reference value. if(index_flag == NO) { SET_TYPE(p,DEFINED_TYPE(p) | 0x80); index_flag = BOOLEAN(!(language==LITERAL)); } /* Do nothing if we're not supposed to cross-reference. Also do nothing if we're inside a \&{format} statement. This is a bit kludgy, but it works. */ if (!index_flag || !(output_on || index_hidden) || in_format || (unnamed_section && !xref_unnamed) ) return; /* The |output_on| flag here prevents index entries for modules skipped with~\.{-i}. */ index_flag = BOOLEAN(!(language==LITERAL)); /* Say where the identifier is defined (but not if it's a reserved word). */ if(defd_switch && (part0 == DEFINITION || !(is_reserved(p) || is_intrinsic(p) || is_keyword(p)))) { sixteen_bits mod_defined = p->defined_in(language); if(mod_defined && mod_defined != module_count) { err_print(W,"Identifier in %s was already explicitly \ or implicitly marked via @@[ as defined in %s", MOD_TRANS(module_count), MOD_TRANS(mod_defined)); mark_harmless; } p->defined_in(language) = module_count; defd_switch = NO; } if(defd_type != NEVER_DEFINED) SET_TYPE(p,defd_type); // Used to be up in previous block. defd_type = NEVER_DEFINED; if ( xref_switch==NO && (is_reserved(p) || ((!index_short) && (length(p)==1))) ) return; if(index_short) index_short = NO; if(no_xref) return; // The result of the \.{-x} flag. m = module_count + xref_switch; xref_switch = NO; q = (xref_pointer)p->xref; if(!(do_inside || all_includes || (quoted_includes && qtd_file))) goto check_implicit; // Skip if reading an include file. if (q != xmem) { /* There's already an entry. */ n = q->num; if (n==m || n==m+def_flag) goto check_implicit; // Discard duplicates within the same module. else if (m==n+def_flag) { q->num = m; /* Update the entry to be defined instead of just used. */ goto check_implicit; } } /* There's no entry yet; make a new cross-reference. */ append_xref(m); /* Link in; highest module number is first. */ xref_ptr->xlink=q; p->xref = (XREF_POINTER)xref_ptr; check_implicit: if(typd_switch) @<Execute an implicit \.{@@f}@>@; } @ When the |typd_switch| is on, due to an~\.{@@`}, we execute an implicit format statement that formats~|p| as a reserved word. @<Execute an implicit...@>= { NAME_INFO rs_wd; name_pointer lhs = p, rhs = &rs_wd; rhs->ilk = int_like; rhs->reserved_word = rhs->Language = BOOLEAN(language); rhs->intrinsic_word = rhs->keyword = NO; @<Format the left-hand side@>@; @#if 0 /* Mark as defined in this module. */ if(mark_defined.imp_reserved_name) { p->defined_in(language) = module_count; SET_TYPE(p,IMPLICIT_RESERVED); } @#endif /* Make all previous entries register as defined, not just used. */ for(q=(xref_pointer)p->xref; q>xmem; q = q->xlink) if(q->num < def_flag) q->num += def_flag; typd_switch = NO; } @ The cross-reference lists for module names are slightly different. Suppose that a module name is defined in modules~$m_1$, \dots, $m_k$ and used in modules~$n_1$, \dots, $n_l$. Then its list will contain $m_1+|def_flag|$, $m_k+|def_flag|$, \dots, $m_2+|def_flag|$, $n_l$, \dots, $n_1$, in this order. After Phase II, however, the order will be $m_1+|def_flag|$, \dots, $m_k+|def_flag|$, $n_1$, \dots, $n_l$. @<Part 1@>=@[ SRTN new_mod_xref FCN((p)) name_pointer p C1("")@; { xref_pointer q,r; /* pointers to previous cross-references */ @#if(0) if(!output_on) return; /* Don't bother with references if the module is skipped with \.{-i}. */ @#endif q = (xref_pointer)p->xref; r=xmem; if (q>xmem) { /* ``Used in module...'' Scan past all the definitions. */ if (mod_xref_switch==NO) while (q->num>=def_flag) { r=q; q=q->xlink; } else /* Defining...*/ if (q->num>=def_flag) { r=q; q=q->xlink; } } /* Discard duplicate ``used in'' xref. */ if(mod_xref_switch == NO && q->num == module_count) return; append_xref(module_count+mod_xref_switch); xref_ptr->xlink=q; mod_xref_switch=NO; if (r==xmem) p->xref = (XREF_POINTER)xref_ptr; else r->xlink=xref_ptr; } @i tokens.hweb /* Declarations for |token| storage. */ @ @<Alloc...@>= ALLOC(Token,tok_mem,ABBREV(max_toks_w),max_toks,1); tok_mem++; /* In some unusual circumstances, there may be references to |tok_mem[-1]|, so be sure it exists. */ tok_m_end = tok_mem+max_toks-1; // End of |tok_mem|./ ALLOC(token_pointer,tok_start,ABBREV(max_texts),max_texts,0); tok_end = tok_start+max_texts-1; // End of |tok_start|. @ @<Set init...@>= @<Initialize |tok_ptr|, |tok_start|, and |text_ptr|@>@; mx_tok_ptr=tok_ptr; mx_text_ptr=text_ptr; @ @<Initialize |tok_ptr|...@>= { tok_ptr = tok_mem + 1; tok_start[0] = tok_start[1] = tok_ptr; text_ptr = tok_start + 1; } @ The |names_match| function is called from |id_lookup| in \.{common.web} when deciding whether to put a name into the table. @<Part 1@>=@[ boolean names_match FCN((p,first,l,t)) name_pointer p C0("Points to the proposed match.")@; CONST ASCII HUGE *first C0("Position of first character of string.")@; int l C0("Length of identifier.")@; eight_bits t C1("Desired ilk.")@; { if (length(p)!=l) return NO; /* Speedy return. */ if ( (p->Language&(boolean)language) && (p->ilk!=t) && !(t==normal && is_reserved(p))) return NO; return (boolean)(!STRNCMP(first,p->byte_start,l)); } @ The following two functions are used in initializations; they are called from \.{common.web}. @<Part 1@>=@[ SRTN ini_p FCN((p,t)) name_pointer p C0("")@; eight_bits t C1("")@; { CONST ASCII HUGE *k; p->ilk=t; p->xref = (XREF_POINTER)xmem; /* Check if identifier is all upper-case. */ p->info.upper_case = NO; for(k = p->byte_start; k<byte_ptr; k++) if(isAlpha(*k) && !isAupper(*k)) return; p->info.upper_case = YES; } SRTN ini_node FCN((node)) CONST name_pointer node C1("")@; { node->xref = (XREF_POINTER)xmem; @<Initialize |mod_info| and |Language|@>@; } @i ccodes.hweb /* Category codes for the reserved words. */ @* LEXICAL SCANNING. Let us now consider the subroutines that read the \.{WEB} source file and break it into meaningful units. There are four such procedures: |skip_limbo| simply skips to the next `\.{@@\ }' or `\.{@@*}' that begins a module; |skip_TeX| passes over the \TeX\ text at the beginning of a module; |copy_comment| passes over the \TeX\ text in a \cee\ comment; and |get_next|, which is the most interesting, gets the next token of a \cee\ text. They all use the pointers |limit| and |loc| into the line of input currently being studied. Control codes in \.{WEB}, which begin with~`\.{@@}', are converted into a numeric code designed to simplify \WEAVE's logic; for example, larger numbers are given to the control codes that denote more significant milestones, and the code of |new_module| should be the largest of all. Some of these numeric control codes take the place of ASCII control codes that will not otherwise appear in the output of the scanning routines. @^ASCII code@> The following table shows the assignments: $$\def\:{\char\count255\global\advance\count255 by 1} \def\Hrule{\noalign{\hrule}}\def\HHrule{\noalign{\hrule height2pt}} \def\Width{60pt} \count255='40 \vbox{ \hbox{\hbox to \Width{\it\hfill0\/\hfill}% \hbox to \Width{\it\hfill1\/\hfill}% \hbox to \Width{\it\hfill2\/\hfill}% \hbox to \Width{\it\hfill3\/\hfill}% \hbox to \Width{\it\hfill4\/\hfill}% \hbox to \Width{\it\hfill5\/\hfill}% \hbox to \Width{\it\hfill6\/\hfill}% \hbox to \Width{\it\hfill7\/\hfill}} \vskip 4pt \hrule \def\!{\vrule height 10.5pt depth 4.5pt} \halign{\hbox to 0pt{\hskip -24pt\WO{\~#}\hfill}&\! \hbox to \Width{\hfill$#$\hfill\!}& &\hbox to \Width{\hfill$#$\hfill\!}\cr 00&\\{ignore}&\MM &\\{verbatim}&\\{force\_line}&\WW &** &\CC &\\{bell}\cr\Hrule 01&\dots &\\{begin\_cmnt}&\\{lf} &\PP &\\{ff} &\\{cr} &\\{begin\_lang} &\\{cmpd\_assgn} \cr\Hrule 02&\GG &\LS &\LL &\.{.DOT.}&; &\SR &\SlSl & \cr\Hrule 03&\\{stmt\_label}&\MG &\WI &\WL &\NN &\WG &\WS &\WV \cr\HHrule 04& &\WR & &\# & &\MOD &\amp & \cr\Hrule 05& & &\ast &+ & &- & &/ \cr\Hrule 06& & & & & & & & \cr\Hrule 07& & & & &< &= &> &? \cr\Hrule 10&\Wcp &\Wcm &\Wcs &\Wcv &\Wcd &\Wcx &\Wca &\Wco \cr\Hrule 11&\Wcg &\Wcl & & & & & & \cr\Hrule 12& & & & & & & & \cr\Hrule 13& & & & & & &\^ & \cr\Hrule 14& & & & & & & & \cr\Hrule 15& & & & & & & & \cr\Hrule 16& & & & & & & & \cr\Hrule 17& & & & &\OR &\.{@@\$}&\.{@@\_},\TL&\\{param}\cr\HHrule 20&\.{L$l$}&\.{C} &\.{R} &\.{N} &\.{M} &\.{X} & & \cr\Hrule 21&\NP &\NC &\.{\#<}&\WPtr &\CC & & & \cr\Hrule 22& & & & & & & & \cr\Hrule 23&\\{constant}&\\{stringg}&\\{identifier}&\.{@@\^}&\.{@@9} &\.{@@.} &\.{@@t} &\.{@@'} \cr\Hrule 24&\.{@@\&}&\.{@@,} &\.{@@\char'174}&\.{@@/} &\.{@@\#} &\.{@@~} &\.{@@;}& \cr\Hrule 25&\.{@@(} &\.{@@)} &\.{\ } &\\{copy\_mode}&\\{toggle\_output}&\.{@@e}&\.{@@:}& \cr\Hrule 26& & &\.{@@!} & & &\.{@@0} &\.{@@1} &\.{@@2} \cr\Hrule 27&\.{@@f} &\.{@@\%}& & &\.{@@l} &\.{@@o} &\.{@@d} &\.{@@m} \cr\Hrule 30&\.{@@\#ifdef}&\.{@@\#ifndef}&\.{@@\#if}&\.{@@\#else}&\.{@@\#elif}&\.{@@\#endif} &\.{@@\#pragma} &\.{@@\#undef}\cr\Hrule 31&\.{@@a} &\.{@@<} &\.{@@\ }& & & & & \cr\Hrule 32& & & & & & & & \cr\Hrule 33& & & & & & & & \cr\Hrule 34& & & & & & & & \cr\Hrule 35& & & & & & & & \cr\Hrule 36& & & & & & & & \cr\Hrule 37& & & & & & &\\{begin\_cmnt0}& \cr} \hrule width 480pt}$$ @d ignore 0 // Control code of no interest to \WEAVE. @d verbatim OCTAL(2) // Extended |ASCII| alpha will not appear. @d force_line OCTAL(3) // Extended |ASCII| beta will not appear. @d begin_comment0 HEX(FE) // Sent from |input_ln|. @d begin_comment1 HEX(FD) @d begin_comment OCTAL(11) // |ASCII| tab mark will not appear. @d compound_assignment OCTAL(17) // Things like `\.{*=}'. @% @d param OCTAL(177) // |ASCII| delete will not appear. /* Language codes. */ @d L_switch OCTAL(200) // The generic language switch \.{@@L$l$}. @d begin_C OCTAL(201) @d begin_RATFOR OCTAL(202) @d begin_FORTRAN OCTAL(203) @d begin_LITERAL OCTAL(204) @d begin_TEX OCTAL(205) @d begin_nuweb OCTAL(206) // Strictly speaking, not a language code. /* More two-byte combinations that couldn't be fitted below printable |ASCII|. */ @d dont_expand OCTAL(210) // Control code for `\.{\#!}'. @d auto_label OCTAL(211) // Control code for `\.{\#:}'. @d all_variable_args OCTAL(212) // Control code for `\.{\#.}'. @d macro_module_name OCTAL(213) // Control code for `\.{\#<\dots@@>}'. @d eq_gt OCTAL(214) // Control code for `\.{=>}'. @d colon_colon OCTAL(215) /* Control code for `\.{::}'. */ /* Control codes for \FWEB\ commands beginning with \.{@@}. */ /* The following two codes will be intercepted without confusion, because they're processed immediately after an \.{@@}, not returned from |next_control|. */ @d switch_math_flag OCTAL(175) @d underline OCTAL(176) @d xref_roman OCTAL(233) /* control code for `\.{@@\^}' */ @d xref_wildcard OCTAL(234) /* control code for `\.{@@9}' */ @d xref_typewriter OCTAL(235) /* control code for `\.{@@.}' */ @d TeX_string OCTAL(236) /* control code for `\.{@@t}' */ @d ascii_constant OCTAL(237) /* control code for `\.{@@'}' */ @d join OCTAL(240) /* control code for `\.{@@\&}' */ @d thin_space OCTAL(241) /* control code for `\.{@@,}' */ @d math_break OCTAL(242) /* control code for `\.{@@\char'174}' */ @d line_break OCTAL(243) /* control code for `\.{@@/}' */ @d big_line_break OCTAL(244) /* control code for `\.{@@\#}' */ @d no_line_break OCTAL(245) /* control code for `\.{@@~}' */ @d pseudo_semi OCTAL(246) /* control code for `\.{@@;}' */ @d defd_at OCTAL(247) // Control code for `\.['. @d begin_meta OCTAL(250) /* Control code for |"@@("|. */ @d end_meta OCTAL(251) /* Control code for |"@@)"|. */ @d macro_space OCTAL(252) /* Space token during preprocessing. */ @d copy_mode OCTAL(253) /* Are we copying comments? */ @d toggle_output OCTAL(254) // Turns on and off Weave's output. @d turn_output_on OCTAL(254) // Appended to the scraps for code. @d turn_output_off OCTAL(255) @d Turn_output_on OCTAL(256) @d Turn_output_off OCTAL(257) /* 260 and 261 are elsewhere. */ @d compiler_directive OCTAL(262) // No longer used. @d Compiler_Directive OCTAL(263) /* Control code for `\.{@@?}' */ @d new_output_file OCTAL(264) // Control code for `\.{@@o}'. @d implicit_reserved OCTAL(265) // Control code for `\.{@@]}'. @d trace OCTAL(266) /* control code for `\.{@@0}', `\.{@@1}', and `\.{@@2}' */ @d invisible_cmnt OCTAL(271) /* Control code for `\.{@@\%}' */ @d pseudo_expr OCTAL(272) /* Control code for `\.{@@e}' */ @d pseudo_colon OCTAL(273) /* Control code for `\.{@@:}' */ @d begin_bp OCTAL(274) // Control code for `\.{@@\lb}'. @d insert_bp OCTAL(275) // Control code for `\.{@@b}'. @d no_index OCTAL(276) // Control code for `\.{@@-}'. @d yes_index OCTAL(277) // Control code for `\.{@@~}'. @d no_mac_expand OCTAL(300) // Control code for `\.{@@!}'. @d protect_code OCTAL(301) // Control code for `\.{@@p}'. @d set_line_info OCTAL(302) // Control code for `\.{@@q}'. /* Definition section begun by codes $\ge$~|formatt|. */ @d formatt OCTAL(310) /* control code for `\.{@@f}' */ @d limbo_text OCTAL(313) /* Control code for `\.{@@l}' */ @d op_def OCTAL(314) /* Control code for `\.{@@v}' */ @d macro_def OCTAL(315) // Control code for `\.{@@w}'. @d definition OCTAL(320) /* control code for `\.{@@d}' */ @d undefinition OCTAL(321) // Control code for `\.{@@u}'. @d WEB_definition OCTAL(322) /* Control code for `\.{@@M}' */ /* --- Preprocessor commands --- */ @d m_ifdef OCTAL(330) @d m_ifndef OCTAL(331) @d m_if OCTAL(332) @d m_else OCTAL(333) @d m_elif OCTAL(334) @d m_endif OCTAL(335) @d m_for OCTAL(336) @d m_endfor OCTAL(337) @d m_line OCTAL(340) @d m_undef OCTAL(341) /* --- Module names --- */ @d begin_code OCTAL(350) /* control code for `\.{@@a}' */ @d module_name OCTAL(351) /* control code for `\.{@@<}' */ /* --- Beginning of new module --- */ @d new_module OCTAL(352) /* control code for `\.{@@\ }' and `\.{@@*}' */ @ Control codes are converted from ASCII to \WEAVE's internal representation by means of the table |ccode|. Codes that are used only by \FTANGLE\ get the special code~|ignore| (see \.{typedefs.hweb}; these are just skipped. Codes that are used by neither processor are initialized to~|'0xFF'|; that can be used to trigger an error message. @<Global...@>= IN_STYLE eight_bits ccode[128]; /* Meaning of an |ASCII| char following '\.{@@}'. */ @ The control codes are set up in \.{style.web}. @m TANGLE_ONLY(d,c) INI_CCODE(d,USED_BY_OTHER) @m WEAVE_ONLY(d,c) INI_CCODE(d,c) @<Set ini...@>= zero_ccodes(); // See \.{style.web}. ccode[@'/'] = line_break; /* The commenting style is also fundamental, and for convenience the |line_break| command is also inviolate. */ @<Set the changable codes@>@; prn_codes(); @ Here are the default values for the things that are allowed to be changed. @<Set the changable...@>= { SAME_CCODE(" \t*",new_module); // Either space, tab, or asterisk. SAME_CCODE("aA",begin_code); SAME_CCODE("<",module_name); SAME_CCODE("dD",definition); SAME_CCODE("uU",undefinition); SAME_CCODE("mM",WEB_definition); SAME_CCODE("fF",formatt); WEAVE_ONLY("\001",toggle_output); // This command is for internal use only! SAME_CCODE("'\"",ascii_constant); REASSIGNABLE("=",verbatim); WEAVE_ONLY("\\",line_break); REASSIGNABLE("tT",TeX_string); SAME_CCODE("L",L_switch); SAME_CCODE("cC",begin_C); SAME_CCODE("rR",begin_RATFOR); SAME_CCODE("n",begin_FORTRAN); SAME_CCODE("N",begin_nuweb); SAME_CCODE("xX",begin_TEX); SAME_CCODE("&",join); WEAVE_ONLY("_",underline); WEAVE_ONLY("[",defd_at); WEAVE_ONLY("`]",implicit_reserved); SAME_CCODE("%",invisible_cmnt); SAME_CCODE("?",Compiler_Directive); WEAVE_ONLY("$",switch_math_flag); REASSIGNABLE("^",xref_roman); REASSIGNABLE(".",xref_typewriter); REASSIGNABLE("9",xref_wildcard); { char temp[3]; sprintf(temp,";%c",XCHR(interior_semi)); WEAVE_ONLY(temp,pseudo_semi); } WEAVE_ONLY("e",pseudo_expr); WEAVE_ONLY(":",pseudo_colon); SAME_CCODE("l",limbo_text); SAME_CCODE("vV",op_def); SAME_CCODE("wW",macro_def); WEAVE_ONLY(",",thin_space); WEAVE_ONLY("|",math_break); SAME_CCODE("#",big_line_break); WEAVE_ONLY("~",no_line_break); SAME_CCODE("(",begin_meta); SAME_CCODE(")",end_meta); SAME_CCODE("oO",new_output_file); SAME_CCODE("{",begin_bp); TANGLE_ONLY("}bB",insert_bp); SAME_CCODE("!",no_mac_expand); TANGLE_ONLY("q", set_line_info); WEAVE_ONLY("-",no_index); WEAVE_ONLY("+",yes_index); WEAVE_ONLY("p", protect_code); @<Special control codes allowed only when debugging@>@; } @ If \WEAVE\ is compiled with debugging commands, one can write~\.{@@2}, \.{@@1}, and~\.{@@0} to turn tracing fully on, partly on, and off, respectively. @<Special control codes...@>= #if(DEBUG) WEAVE_ONLY("012",trace); #endif /* |DEBUG| */ @ At this point |loc|~is positioned after a language command like~\.{@@c}, or on the~$l$ in~\.{@@L$l$}. @f @<Cases to set |language| and |break|@> case @<Cases to set |language| and |break|@>= @<Specific language cases@>: loc--; /* Position to letter after \.{@@}. Falls through to general case |L_switch|. */ case L_switch: @<Set the |language| and maybe kill rest of line@>@; break; case begin_nuweb: nuweb_mode = !NUWEB_MODE; if(module_count == 0) global_params = params; break; @ @<Set the |language| and maybe kill...@>= { @<Set |language|@>@; if(module_count == 0) global_params = params; ini0_language(); @<Kill rest of line; no |auto_semi|@>@; } @ The |skip_limbo| routine is used on the first pass to skip through portions of the input that are not in any modules, i.e., that precede the first module. Language commands may be encountered at any time; these reset the current language from whatever was specified on the command line. When the first module is found, the global language is set to the current language. After this procedure has been called, the value of |input_has_ended| will tell whether or not a module has actually been found. @<Part 1@>=@[ SRTN skip_limbo(VOID) { WHILE() { if (loc>limit && !get_line()) return; *(limit+1)=@'@@'; // Guard character. /* Look for '@@', then skip two chars. */ while (*loc!=@'@@') loc++; /* |loc| now on the \.{@@}. */ if(loc++ <= limit) switch(ccode[*loc++]) { /* Process any language change commands; skip any other @@~commands. */ @<Cases to set |language| and |break|@>@:@; case invisible_cmnt: loc = limit + 1; break; case new_module: return; // End of limbo section. } @#if(0) // Old code. if (loc <=limit) if (ccode[*loc++]==new_module) return; @#endif } } @ The |skip_TeX| routine is used on the first pass to skip through the \TeX\ code at the beginning of a module. It returns the next control code or~`\v' found in the input. A |new_module| is assumed to exist at the very end of the file. @<Part 1@>=@[ eight_bits skip_TeX(VOID) { WHILE() { if (loc>limit && !get_line()) return new_module; *(limit+1)=@'@@'; /* Marker to curtail the scan. */ while (*loc!=@'@@' && *loc!=@'|') loc++; if (*loc++ ==@'|') return @'|'; // Have hit beginning of code mode. if (loc<=limit) { SET_CASE(*loc); return ccode[*(loc++)]; } } DUMMY_RETURN(0); } @* INPUTTING the NEXT TOKEN. As stated above, \.{WEAVE}'s most interesting lexical scanning routine is the |get_next| function that inputs the next token of \cee\ input. However, |get_next| is not especially complicated. The result of |get_next| is either an ASCII code for some special character, or it is a special code representing a pair of characters (e.g., `\.{!=}'), or it is the numeric value computed by the |ccode| table, or it is one of the following special codes: \yskip\hang |identifier|: In this case the global variables |id_first| and |id_loc| will have been set to the beginning and ending-plus-one locations in the buffer, as required by the |id_lookup| routine. \yskip\hang |string|: The string will have been copied into the array |mod_text|; |id_first| and |id_loc| are set as above (now they are pointers into |mod_text|). \yskip\hang |constant|: The constant is copied into |mod_text|, with slight modifications; |id_first| and |id_loc| are set. \yskip\noindent Furthermore, some of the control codes cause |get_next| to take additional actions: \yskip\hang |xref_roman|, |xref_wildcard|, |xref_typewriter|, |TeX_string|, |verbatim|: The values of |id_first| and |id_loc| will have been set to the beginning and ending-plus-one locations in the buffer. \yskip\hang |module_name|: In this case the global variable |cur_module| will point to the |byte_start| entry for the module name that has just been scanned. \yskip\noindent If |get_next| sees `\.{@@\_}' it sets |xref_switch| to |def_flag| and goes on to the next token. \yskip\noindent If |get_next| sees `\.{@@\$}' it sets |math_flag| to |!math_flag| and goes on to the next token. @d constant OCTAL(230) /* \cee\ string or \.{WEB} precomputed string */ @d stringg OCTAL(231) /* \cee\ string or \.{WEB} precomputed string */ @d identifier OCTAL(232) /* \cee\ identifier or reserved word */ @<Global...@>= EXTERN name_pointer cur_module; // Name of module just scanned. EXTERN int math_flag SET(NO); EXTERN boolean chk_end SET(YES); // Do we check for end of line? EXTERN boolean last_was_cmnt SET(NO); /* Helps with interchanging semicolons and comments. */ EXTERN boolean lst_ampersand SET(NO); /* For continuations in free-form syntax \Fortran-90. */ EXTERN boolean eat_blank_lines SET(NO); // For Nuweb mode. EXTERN ASCII c; // The current character for |get_next|. @ As one might expect, |get_next| consists mostly of a big switch that branches to the various special cases that can arise. This function has been broken into multiple function calls to |prs_TeX_code| and |prs_regular_code| in order to make it fit on personal computers. @<Part 1@>=@[ eight_bits get_next(VOID) /* produces the next input token */ { boolean terminate = NO; char terminator[2]; GOTO_CODE pcode; // Return from the parsing functions. 0~means |continue|. WHILE() { @<Check if we're at the id part of a preprocessor command@>; @<Check if we're at the end of a preprocessor command@>; chk_end = YES; @<Get another line of input if necessary@>@; @<Get next character; skip blanks and tabs@>@; /* Handle an (effectively) empty line. (Don't move this statement upwards.) */ if(limit == cur_buffer || (at_beginning && loc > limit)) return big_line_break; switch(language) { case TEX: if((pcode=prs_TeX_code()) == MORE_PARSE) break; else if((int)pcode < 0) CONFUSION("prs_TEX_code","Negative pcode"); else goto found_something; default: if((pcode=prs_regular_code(MORE_PARSE)) == MORE_PARSE) break; else if((int)pcode < 0) CONFUSION("prs_regular_code", "Negative pcode"); else goto found_something; } } found_something: /* We need the following stuff to handle the |INNER| parsing mode properly. (|at_beginning| doesn't correspond to physical beginning of line, so can't be reset by |get_line()|.) */ if(!preprocessing) switch((eight_bits)pcode) { case begin_language: break; default: at_beginning = NO; break; } return (eight_bits)pcode; } @ Get another line of input if necessary. We raise the special flag |at_beginning| to help us with statement labels and preprocessor commands. Normally this flag is set when we get a new line. However, it must also be set after we enter code mode by encountering vertical bars. @<Get another line...@>= if (loc>limit) { if(terminate) { terminator[0] = *limit; terminator[1] = *(limit+1); } if(!get_line()) return(new_module); if(eat_blank_lines) { /* Avoid empty stuff at end of module in Nuweb mode. */ @<Skip blank lines@>@; eat_blank_lines = NO; } if(parsing_mode == OUTER) at_beginning = YES; // Start of new line. if(terminate) { *limit = terminator[0]; *(limit+1) = terminator[1]; terminate = NO; } } else if(parsing_mode == OUTER) at_beginning = NO; @ In Nuweb mode, blank lines at the end of the module are significant, unless `\.{@@\%\%}' is used. That turns on |eat_blank_lines|. @<Skip blank lines@>= { while(loc >= limit) if(!get_line()) { eat_blank_lines = NO; return(new_module); } } @ Here we obtain the next character, advancing~|loc| in the process. Depending on the situation, we also skip blanks and tabs. @<Get next char...@>= if(preprocessing) @<Compress string of blanks into one; if any found, return |macro_space|@>@; else @<Skip white space at beginning of line@>@; if(c==cont_char && loc==limit) { if(preprocessing || free_Fortran) loc--; /* IFFY */ else loc++; terminate = YES; continue; } @ @<Compress string of blanks...@>= { do { if((c=*loc++) != @' ' || c != tab_mark) break; } while(loc < limit); if(c==@' ' || c==tab_mark) return macro_space; } @ @<Skip white space at beg...@>= { if(language==TEX) c = *loc++; else { ASCII HUGE *loc0 = loc; // Remember starting point for nuweb mode. do { /* Skip beginning white space. */ c = *loc++; } while(loc<=limit && (c==@' ' || c==tab_mark) ); if(nuweb_mode) { if(!(c == @'@@' && *loc == @'#')) { /* Go back to beginning. */ loc = loc0; c = *loc++; if(phase == 1 && c == tab_mark) c = @' '; } } } } @ \TeX\ syntax differs significantly from that of the other languages. First of all, \TeX\ comments (beginning with~'\.\%') are always short. Next, in phase~1, we must look at the text identifier by identifier in order to make cross-references properly. In phase~2, however, we can absorb whole collections of identifiers, until a comment or control code comes along. In order to deal with changing category codes, we translate letters through the array~|TeX|, which contains the most up-to-date category codes. @<Part 1@>=@[ GOTO_CODE prs_TeX_code(VOID) { GOTO_CODE icode; // Return code from |get_control_code|. if(loc>limit) return @';'; if (c==@'@@') { // The next call takes care of a branch to |mistake|. if((icode=get_control_code()) == GOTO_MISTAKE) return prs_regular_code(GOTO_MISTAKE); else return icode; } else if(TeX[c] == TeX_comment) { long_comment = YES; // Since we may concatenate lines. return begin_comment; } else if(c == @'|' && parsing_mode == INNER) return @'|'; else if(phase==1) { if(TeX[c] == TeX_escape) @<Get \TeX\ identifier@>@; else return MORE_PARSE; } else @<Get \TeX\ string@>@; @% return MORE_PARSE; // This means to continue to top of |get_next|. } @ If the identifier doesn't begin with a letter, it's a single-character macro such as~`\.{\\<}'. @<Get \TeX\ identifier@>= { id_first = id_loc = mod_text + 1; *id_loc++ = *(loc-1); // The beginning backslash. if(TeX[*loc] != TeX_letter) { /* Single-character macro, such as~`\.{\\<}'. */ if(*loc == @'@@') { if(*(loc+1) != @'@@') ERR_PRINT(W,"You should say `\\@@@@'"); else loc++; } *id_loc++ = *loc++; // The single character. } else while(TeX[*loc] == TeX_letter) { /* Scan over the macro name. */ if(*loc == @'@@') { if(*(loc+1) != @'@@') ERR_PRINT(W,"You should say `@@@@'"); else loc++; } *id_loc++ = *loc++; } return identifier; } @ \TeX\ strings are everything on a single line, up to a comment or, if we're inside vertical bars, up to a terminating bar. It looks nicer if we leave spaces alone instead of displaying them as~`\.{\ }'. @d ordinary_space 01 /* Inserted after ctrl sequences, to avoid many visible spcs. */ @<Get \TeX\ string@>= { loc--; id_first = id_loc = mod_text + 1; while(loc < limit) { if(*loc == @'@@') if(*(loc+1)==@'@@') *id_loc++ = *loc++; else break; // Scan ended by control code. if(TeX[*loc] == TeX_comment) break; if(*loc==@'|' && parsing_mode==INNER) break; // End of internal mode. if(TeX[*loc] == TeX_escape) { if(TeX[*(loc+1)] != TeX_letter) { // One-character control sequence. if(*(loc+1) == @'@@') if(*(loc+2) != @'@@') ERR_PRINT(W,"You should say \\@@@@"); else *id_loc++ = *loc++; *id_loc++ = *loc++; } else { // Ordinary control sequence. do *id_loc++ = *loc++; while (TeX[*loc] == TeX_letter); while (loc < limit) { if(TeX[*loc] != TeX_space) break; *id_loc++ = ordinary_space; loc++; } continue; } } *id_loc++ = *loc++; } return stringg; } @ Parse everything but \TeX. @<Part 1@>=@[ GOTO_CODE prs_regular_code FCN((iswitch)) GOTO_CODE iswitch C1("")@; { GOTO_CODE icode; // Return code from |get_control_code|. switch(iswitch) { case GOTO_MISTAKE: goto mistake; case GOTO_GET_IDENTIFIER: goto get_identifier; default: break; } /* --- ELLIPSIS: `\.{...}' --- */ if(c==@'.' && *loc==@'.' && *(loc+1)==@'.') { ++loc; compress(ellipsis); } /* --- DOT CONSTANT: `\.{.FALSE.}' --- */ else if(FORTRAN_LIKE(language) && dot_constants && (c == wt_style.dot_delimiter.begin) && !isDigit(*loc)) @<Get a dot constant@>@; /* --- CONSTANT: `\.{123}', `\.{.1}', or `\.{\\135}' --- */ else if (isDigit(c) || c==@'\\' || c==@'.') @<Get a constant@>@; /* --- BOZ-CONSTANT --- */ else if (in_data && Fortran88 && (*loc==@'"' || *loc==@'\'') && (c==@'B' || c==@'O' || c==@'Z') ) return get_string(*loc++,c); /* --- IDENTIFIER --- */ else if (is_identifier(c)) @<Get an identifier@>@; /* --- STRING: `\.{"abc"}', `\.{'\\n'}', `\.{<file\_name>}' --- */ else if (c==@'\'' || c==@'"' || (sharp_include_line && !in_comment && (c==@'(' || (C_LIKE(language) && c==@'<') ) )) return get_string(c,'\0'); /* --- CONTROL CODE --- */ else if (c==@'@@') if((icode=get_control_code()) == GOTO_MISTAKE) goto mistake; else return icode; /* --- WHITE SPACE --- */ /* Blanks were skipped above. */ else if (c==@' ' || c==tab_mark) @#if(0) if(preprocessing) /* What is this statement for? */ { id_first = mod_text + 1; id_loc = id_first + 1; *id_first = c; return stringg; } else /* JAK to here */ @#endif if(nuweb_mode) return c; @%(c==tab_mark ? bell : c); else return MORE_PARSE; // Ignore spaces and tabs; continue. /* --- C PREPROCESSOR STATEMENT: `\.{\#include}' --- */ if (c==@'#' && at_beginning && C_LIKE(language)) @<Raise preprocessor flag@>@; /* If |'#'| is first character in line, it's a C~preprocessor statement. */ /* --- END A |@r format| STATEMENT: `\.{format(\dots);}' --- */ else if (in_format && c==@';') { /* End a |@r format| statement. */ in_format = NO; return end_format_stmt; } /* --- TWO-SYMBOL OPERATOR --- */ mistake: @<Compress two-symbol operator@>@; return (eight_bits)c; } @ For FORTRAN, we allow ``dot constants'', like ~\.{.true.}\ or~\.{.or.}. This routine scans between the dots, then looks up the identifier in a table to see if it's valid and to get its token translation. This procedure has a tendency to run away if an unexpected dot finds its way into the input (either because of a syntactical mistake, or because \Weave\ is missing the relevant rule). Thus, we limit the search to no more than |MAX_DOT_LENGTH == 31| characters, the maximum possible length of a dot constant. @<Get a dot constant@>= @{ ASCII HUGE *p0; int n; int dcode; ASCII dot_end = wt_style.dot_delimiter.end; @b /* At this point, |loc| is positioned to the first position after the dot. */ for(p0=loc, n=0; n<MAX_DOT_LENGTH; n++,loc++) if(*loc == dot_end || !isAlpha(*loc)) break; /* Found end of dot constant, or something not allowed. */ if(*loc != dot_end) /* Didn't find end. */ { loc = p0; /* Reset position back to beginning. */ goto mistake; } if((dcode=dot_code(dots,uppercase(p0,n),loc,dot_const)) != 0) compress(dcode); /* Search for match in table. */ /* Invalid dot constant. */ loc = p0; goto mistake; } @ Because preprocessor commands do not fit in with the rest of the syntax of C, we have to deal with them separately. One solution [Levy] is to enclose such commands between special markers. Thus, when a~'\.\#' is seen as the first character of a line, |get_next| returns a special code \\{left\_preproc} and raises a flag |preprocessing|. (Unfortunately, Levy's solution didn't work in certain situations, and when the preprocessor language was installed a different method was adopted. Thus, parts of the code are asymmetrical. This should eventually be improved, but it was considered more important to make it work at all.) @d left_preproc OCTAL(260) // Begins a preprocessor command. @d right_preproc OCTAL(261) // Ends a preprocessor command. @ @<Raise prep...@>= @{ IN_COMMON ASCII HUGE *pinclude, HUGE *ppragma; /* Strings for tokens |include| and |pragma|. */ @b preprocessing = YES; @<Check if next token is |include| or |pragma|@>; return left_preproc; } @ An additional complication is the freakish use of~'\.<' and~'\.>' to delimit a file name in lines that start with \&{\#include}. We must treat this file name as a string, and use the flag |sharp_include_line| to help. Also, |#pragma|s can have arbitrary syntax, so we don't want to typeset it as usual. For those, we set |sharp_pragma_line|. (Not yet used for anything.) @<Check if next token is |include|...@>= { /* According to ANSI, white space may be skipped at beginning of line. */ while (*loc==@' ' || *loc==@'\t') loc++; if(STRNCMP(loc,pinclude,7)==0) sharp_include_line = YES; else if(STRNCMP(loc, ppragma, 7) == 0) sharp_pragma_line = YES; } @ Since the preprocessor has different reserved words than C~itself, we include the preprocessor token with the identifier if it's first on a preprocessor line. @<Check if we're at the id...@>= if(preprocessing && at_beginning) { at_beginning = NO; /* Preprocessor directives can have white space between the '\.\#' and the name. */ for( ; loc < limit; loc++) if(!(*loc==@' ' || *loc==tab_mark)) break; *(loc-1) = @'#'; /* Now we're positioned on an identifier beginning with~|'#'|, with no intervening blanks. */ return (eight_bits)prs_regular_code(GOTO_GET_IDENTIFIER); } @ When we get to the end of a preprocessor line, we lower the flag and send a code \\{right\_preproc}, unless the last character was the continuation character'~\.\\'. @<Check if we're at the end...@>= chk_the_end: if(chk_end) { /* Continue to next line; also skip all lines that have continuation character in column~1. */ while (*loc==cont_char && loc==limit-1 && (preprocessing || free_Fortran)) if (!get_line()) return new_module; /* still in preprocessor mode */ /* Now we've gotten to the end of line, but it's not continued. */ if (loc>=limit) if(preprocessing) { chk_end=preprocessing=sharp_include_line=sharp_pragma_line=NO; return right_preproc; } else if(Fortran88 && parsing_mode == OUTER && auto_semi && limit > cur_buffer && !(limit[0] == @'@@' && limit[1] == @'m')) { loc = limit + 1; chk_end = NO; if(last_was_cmnt) { // Comment has already been appended. last_was_cmnt = NO; if(lst_ampersand) { // Deal with continuation before comment. lst_ampersand = NO; chk_end = YES; if(!get_line()) { ERR_PRINT(W,"Section ended in middle of Fortran-90 continuation"); return new_module; } APP_STR("\\indent"); goto chk_the_end; } continue; } @% else return @'\n'; // or @';' ??? or nothing??? } } @ The following code assigns values to the combinations~\.{++}, \.{--}, \.{->}, \.{>=}, \.{<=}, \.{==}, \.{<<}, \.{>>}, \.{!=}, and~\.{\&\&}. (For FORTRAN, we also have~\.{//} and~\.{\^}.) The compound assignment operators in~C are indexed, all under the aegis of |compound_assignment|. @d compress(c) if (loc++<=limit) return (eight_bits)c@; @d COMPOUND(c,n) if(loc <= limit) {loc += n; assignment_token=c; return (eight_bits)compound_assignment;} @d CA_START OCTAL(100) /* The index into |op| is |CA_START + assignment_token|, where |assignment_token| is one of the following. See |valid_op()| for further details. */ @d plus_eq 0 @d minus_eq 01 @d star_eq 02 @d slash_eq 03 @d mod_eq 04 @d xor_eq 05 @d and_eq 06 @d or_eq 07 @d gt_gt_eq 010 @d lt_lt_eq 011 @d or_or_or 012 @<Glob...@>= EXTERN eight_bits assignment_token; /* The particular one of the above compound assignment tokens. */ @ @<Compress two...@>= switch(c) { case (ASCII)begin_comment0:// Comment sent from FORTRAN or Ratfor |input_ln|. long_comment = YES; return begin_comment; case (ASCII)begin_comment1: // As above, but short comment. long_comment = NO; return begin_comment; case @'\\': if(*loc==@'/' && !in_format && FORTRAN_LIKE(language)) { compress(slash_slash); // `\.{\\/}' $\to$ `|@r \/|'. } break; case @'/': @<Cases for \.{\slashstar}, \.{//}, \.{/)}, and~\.{/=}@>@; break; case @'(': if(*loc == @'/' && !in_format) compress(left_array); break; case @'+': if (*loc==@'+') {compress(plus_plus); // `\.{++}' $\to$ `|++|'. } else if(*loc==@'=') {COMPOUND(plus_eq,1); // `\.{+=}' $\to$ `|+=|'. } break; case @'-': if (*loc==@'-') {compress(minus_minus); // `\.{--}' $\to$ `|--|'. } else if (*loc==@'>') {compress(minus_gt); // `\.{->}' $\to$ `|->|'. } else if(*loc==@'=') {COMPOUND(minus_eq,1); // `\.{-=}' $\to$ `|-=|'. } break; case @'=': if (*loc==@'=') {compress(eq_eq); // `\.{==}' $\to$ `|==|'. } else if(*loc==@'>') {compress(eq_gt); // `\.{=>}' $\to$ `$\WPtr$'. } /* \FORTRAN-88's pointer assignment statement. */ break; case @'>': if (*loc==@'=') {compress(gt_eq); // `\.{>=}' $\to$ `|>=|'. } else if (*loc==@'>') if(*(loc+1)==@'=') {COMPOUND(gt_gt_eq,2); // `\.{>>=}' $\to$ `|>>=|'. } else {compress(gt_gt); // `\.{>>}' $\to$ `|>>|'. } break; case @'<': if (*loc==@'=') {compress(lt_eq); // `\.{<=}' $\to$ `|<=|'. } else if (*loc==@'<') if(*(loc+1)==@'=') {COMPOUND(lt_lt_eq,2); // `\.{<<=}' $\to$ `|<<=|'. } else {compress(lt_lt); // `\.{<<}' $\to$ `|<<|'. } else if(*loc==@'>') {compress(not_eq); // `\.{<>}' $\to$ `|!=|'. } /* \FORTRAN-88 */ break; case @'%': if(*loc==@'=') {COMPOUND(mod_eq,1); // `\.{\%=}' $\to$ `|%=|'. } break; case @'&': if (*loc==@'&') {compress(and_and); // `\.{\&\&}' $\to$ `|&&|'. } else if(*loc==@'=') { COMPOUND(and_eq,1); // `\.{\&=}' $\to$ `|&=|'. } break; case @'|': if (*loc==@'|') { if(*(loc+1)==@'|') { COMPOUND(or_or_or,2); // `\.{\vb\vb\vb}' $\to$ `|||||'. } else compress(or_or); // `\.{\vb\vb}' $\to$ `||| |'. } else if(*loc==@'=' && !FORTRAN_LIKE(language)) { COMPOUND(or_eq,1); // `\.{\vertbar=}' $\to$ `||=|'. } break; case @'!': if(!in_format && (point_comments || *loc == @'!') ) { if(*loc != @'!') loc--; long_comment = NO; compress(begin_comment); // \.{! Comment} or \.{!! Comment}. } else if (*loc==@'=') {compress(not_eq); // `\.{!=}' $\to$ `|!=|'. } break; case @'*': if(FORTRAN_LIKE(language) && (*loc == @'*') ) {compress(star_star); // `\.{x**y}' $\to$ `|@r x**y|'. } /* Exponentiation. */ else if(*loc==@'=') {COMPOUND(star_eq,1); // `\.{*=}' $\to$ `|*=|'. } break; case @'^': if(*loc == @'^') {compress(star_star);} else if(FORTRAN_LIKE(language) && (loc < limit) ) return star_star; // `\.{x\^y}' $\to$ `|@r x^y|'. else if(*loc==@'=') {COMPOUND(xor_eq,1); // `\.{\^=}' $\to$ `|^=|'. } break; case @':': if(*loc==@':') compress(colon_colon); // `\.{::}' $\to$ `|::|'. break; case @'#': @<Cases for \.{\#\#}, \.{\#!}, \.{\#:}, \.{\#.}, and~\.{\#<}@>@; break; } @ @<Cases for \.{\slashstar}...@>= if (*loc==@'*') { long_comment = YES; compress(begin_comment); // \.{\slashstar\dots/starslash} } else if(*loc == @'/') { if(C_LIKE(language) || language==TEX || (Cpp_comments && !in_format && FORTRAN_LIKE(language))) { /* Short comments are recognized in both~C and \Cpp, and also in |TEX|. */ long_comment = NO; /* \Cpp-style comment. */ compress(begin_comment); // \.{//\dots} } else if(!in_format) { compress(slash_slash); /* Concatenation operator~|@r \/|. Multiple slashes in |format| statements are just left alone. */ } } else if(*loc == @')' && !in_format) {compress(right_array); // `\.{/)}' $\to$ `$\SR$'. } else if(*loc == @'=') {COMPOUND(slash_eq,1); // `\.{(/}' $\to$ `$\LS$'. } @ @<Cases for \.{\#\#}...@>= switch(*loc) { case @'#': compress(paste); // `\.{\#\#}' $\to$ token `\.{\#\#}'. break; case @'!': compress(dont_expand); // `\.{\#!}' $\to$ token `\.{\#!}'. break; case @':': compress(auto_label); // `\.{\#:}' $\to$ token `\.{\#:}'. break; case @'.': compress(all_variable_args); // `\.{\#.}' $\to$ token `\.{\#.}'. break; case @'<': loc++; mac_mod_name = YES; @<Scan the module name and make |cur_module| point to it@>; return macro_module_name; case @'\'': case @'"': if(phase == 1) loc++; // Skip over so string scanner doesn't complain. break; } @ Different conventions are followed by \TeX\ and \cee\ to express octal and hexadecimal numbers; it is reasonable to stick to each convention withing its realm. Thus the \cee\ part of a \.{WEB} file has octals introduced by~\.0 and hexadecimals by~\.{0x}---e.g., \.{0377} or \.{0xFF}---but \.{WEAVE} will print in italics or typewriter font, respectively, and introduced by single or double quotes---e.g., |0377| or |0xFF|. \FWEB\ also adds binary constants, written as \.{0b10101} and printed as |0b10101|. In order to simplify the \TeX\ macro used to print such constants, we replace some of the characters. (If you don't like the way these constants look, you can easily change the macro; see \.{fwebmac.tex}.) Notice that in this section and the next, |id_first| and |id_loc| are pointers into the array |mod_text|, not into |cur_buffer|. The next definitions correspond to the macros in \.{fwebmac.tex}. @d BINARY_CODE @'&' /* `\.{0b10101}' $\to$ `|0b10101|' */ @d OCTAL_CODE @'~' /* `\.{0377}' $\to$ `|0377|' */ @d HEX_CODE @'`' /* `\.{0xabc}' $\to$ `|0xabc|' */ @d CONSTANT_CODE @'#' // Various kinds of constants. @d FLOAT_CODE @'0' // `\.{50000F}' $\to$ `|50000F|'. @d LONG_CODE @'1' /* `\.{50000L}' $\to$ `|50000L|' */ @d UNSIGNED_CODE @'2' // `\.{50000U}' $\to$ `|50000U|'. @d ULONG_CODE @'3' // `\.{50000UL}' $\to$ `|50000UL|'. @d EXP_CODE @'^' /* `\.{(x+y)\^(a+b)}' $\to$ `|@r (x+y)^(a+b)|' */ @d HOLLERITH_CODE @'%' /* `\.{5Hhello}' $\to$ `|@r 5Hhello|' */ @<Get a constant@>= @{ boolean decimal_point = NO; ASCII prec_char; @b id_first = id_loc = mod_text + 1; if (c==@'\\') { /* Probably octal---e.g., `\.{\\107}' */ ASCII *loc0; if(*loc == @'/') goto mistake; // It's really `\.{\\/}'. *id_loc++ = OCTAL_CODE; // \.{WEBMAC} control code for octal. loc0 = loc; while (isOdigit(*loc)) *id_loc++ = *loc++; if(loc == loc0) return (eight_bits)c; // Not octal! } else if (c==@'0') @<Get an octal, hex, or binary constant@>@; else @<Get a decimal or Hollerith constant@>@; @<Post-process constant@>@; if(!decimal_point && at_beginning && ((is_FORTRAN_(language) && !last_was_continued) || (is_RATFOR_(language) && *loc == @':'))) return stmt_label; return constant; } @ @<Get an octal, hex...@>= { if (*loc==@'x' || *loc==@'X') /* Hex---e.g., `\.{0xABC}' */ { *id_loc++ = HEX_CODE; /* \.{WEBMAC} code for hex. */ loc++; while (isXdigit(*loc)) *id_loc++ = *loc++; } else if(*loc==@'b' || *loc==@'B') /* Binary */ { *id_loc++ = BINARY_CODE; /* \.{WEBMAC} code for binary. */ loc++; while(isBdigit(*loc)) *id_loc++ = *loc++; } else if (isOdigit(*loc)) /* Octal---e.g., `\.{011}' */ { *id_loc++ = OCTAL_CODE; while (isOdigit(*loc)) *id_loc++=*loc++; } else goto dec; /* decimal constant */ } @ Decimal (\.{1.0e-5}) or \FORTRAN\ Hollerith constant (|@R 3Habc|). @<Get a decimal...@>= { if (c==@'.' && !isDigit(*loc)) goto mistake; /* Isn't a constant like~`|.1|'. */ dec: *id_loc++ = c; while (isDigit(*loc) || *loc==@'.') *id_loc++ = *loc++; /* Optimistically, we'll include the decimal point with the constant. However, in \Fortran\ we have to check for the possibility that it's an integer followed by a dot constant. We do this immediately below. */ decimal_point = BOOLEAN(*(loc-1) == @'.'); if(FORTRAN_LIKE(language)) if(decimal_point) /* Check for dot constant. */ { if(is_dot()) /* It's an integer constant followed by a dot constant. */ { id_loc--; loc--; return constant; } } else if(*loc == @'h' || *loc == @'H') @<Copy Hollerith constant@>; if(in_format) return constant; prec_char = *loc; if (prec_char==@'e' || prec_char==@'E' || (FORTRAN_LIKE(language) && (prec_char==@'d' || prec_char==@'D' || prec_char==@'q' || prec_char==@'Q'))) @<Get the exponent field@>@; } @ Process the exponent part of a floating-point constant such as \.{1.5e-10} |@e = 1.5e-10|. @<Get the expon...@>= { *id_loc++ = EXP_CODE; // Control character for WEB power of ten. *id_loc++ = A_TO_UPPER(prec_char); loc++; // Skip past the exponent character. if (*loc==@'+' || *loc==@'-') *id_loc++ = *loc++; while (isDigit(*loc)) *id_loc++ = *loc++; } @ Hollerith constants have the form \.{3Habc}. @<Copy Hol...@>= @{ int k,n; @b *id_loc = '\0'; /* Temporarily make a true terminated string. */ n = ATOI(id_first); /* Convert the string to an integer constant. */ *id_loc++ = HOLLERITH_CODE; /* Control character for WEB Hollerith macro. */ ++loc; /* Skip the |'H'|. */ for(k=0; k<n; ++k) /* Copy the actual string. */ *id_loc++ = *loc++; return constant; } @ We don't yet handle correctly things like~\.{50UL}; it comes out like~|50UL|. @<Post-process...@>= if (C_LIKE(language)) { switch(*loc) { case @'l': case @'L': *id_loc++ = CONSTANT_CODE; loc++; if(*loc == @'u' || *loc == @'U') { *id_loc++ = ULONG_CODE; loc++; } else *id_loc++ = LONG_CODE; break; case @'u': case @'U': *id_loc++ = CONSTANT_CODE; loc++; if(*loc == @'l' || *loc == @'L') { *id_loc++ = ULONG_CODE; loc++; } else *id_loc++ = UNSIGNED_CODE; break; case @'f': case @'F': *id_loc++ = CONSTANT_CODE; *id_loc++ = FLOAT_CODE; loc++; break; } } else if(Fortran88) @<Absorb optional kind-param@>@; @ In \Fortran-90, there can be optional kind parameters after a constant, started off by an underscore. Example: |@r 50_4|. @<Absorb optional kind-param@>= { if(*loc == @'_') while(is_kind(*loc)) *id_loc++ = *loc++; } @ Code strings and character constants, delimited by double and single quotes, respectively, can contain newlines or instances of their own delimiters if they are protected by a backslash (for~C) or if the delimiter is repeated (for \FORTRAN). We follow this convention, but do not allow the string to be longer than |longest_name|. Special codes are inserted every |NBREAK| characters so that \TeX\ can break the strings. (The count is restarted after commas, which are also treated as discretionary breaks.) @d discretionary_break OCTAL(177) @d NBREAK 25 // \bf Put into style file? @<Glob...@>= EXTERN boolean insert_breaks SET(YES); /* No breaks inserted during limbo text processing. */ @ Here we absorb a string. Examples: \.{"abc"}, \.{'\\n'}, or \.{<file\_name>}. @<Part 1@>=@[ eight_bits get_string FCN((c,boz)) ASCII c C0("What started the string")@; ASCII boz C1("The boz character, or 0.")@; { ASCII delim = c; /* what started the string */ ASCII right_delim = c; int level,kount; boolean equal_delims; id_first = mod_text + 1; id_loc = mod_text; /* ???? */ if (delim==@'\'' && *(loc-2)==@'@@') {*++id_loc=@'@@'; *++id_loc=@'@@';} *++id_loc=delim; @<Determine the right matching delimiter@>@; kount = 0; /* How far since last discretionary line break command. */ WHILE() { /* Scan for end of string. */ if (loc>=limit) @<Check for continued string@>@; if ((c=*loc++)==delim) @<Handle left-hand delimiter@>@; if(c==right_delim) if(--level == 0) { if (++id_loc<=mod_end) *id_loc=c; break; /* Found end of string for unequal delims. */ } /* Handle a final backslash. */ if ((c==cont_char) && (C_LIKE(language) || (is_FORTRAN_(language) && free_form_input))) if (loc>=limit) continue; else if (++id_loc<=mod_end) { *id_loc = c; c=*loc++; } /* Store the character. */ if (++id_loc<=mod_end) *id_loc=c; @<Insert discretionary line-break commands@>@; } /* End of \&{while}. */ if (id_loc>=mod_end) { SET_COLOR(error); printf("\n! String too long: "); @.String too long@> ASCII_write(mod_text+1,25); printf("..."); mark_error; } id_loc++; @<Check for boz constant@>@; return stringg; } @ @<Determine the right...@>= { switch(delim) { case @'<': right_delim=@'>'; // for file names in |#include| lines. break; case @'(': right_delim = @')'; // For m4 \&{include} or related functions. sharp_include_line = NO; break; case @'[': right_delim = @']'; // For auto insertions in macro definitions. break; } level = 1; // For searching for balanced delimiters. equal_delims = BOOLEAN(right_delim==delim); } @ @<Check for continued string@>= { if( (equal_delims || chk_ifelse) && *(limit-1)!=cont_char) { err_print(W,"String %s with '%s%c' didn't end", BTRANS, delim==@'\'' ? "\\" : "", XCHR(delim)); loc=limit; break; @.String didn't end@> } if(!get_line()) { err_print(W,"Input ended in middle of string beginning with \ '\\%c'",XCHR(delim)); loc=cur_buffer; break; @.Input ended in middle of string@> } else { /* Now the continuation of the string is in the buffer. If appropriate, skip over beginning white space and backslash. */ if(bslash_continued_strings) { for(; loc < limit; loc++) if(*loc != @' ' && *loc != tab_mark) break; if(*loc == cont_char) loc++; /* Move past the backslash. */ else err_print(W,"Inserted '\\%c' at beginning of continued \ string",XCHR(cont_char)); } } } @ @<Handle left-hand delim...@>= { level++; if (++id_loc<=mod_end) *id_loc=c; if(!equal_delims) continue; if(FORTRAN_LIKE(language) && (*loc == delim) ) ++loc; /* Copy over repeated delimiter. */ else break; /* Found end of string. */ } @ Insert discretionary line-break command every |NBREAK| characters. Since the string macro also inserts discretionary breaks after commas, we reset the counter to~0 after a comma. As one annoyance, we don't want to insert a break immediately after an~`\.{@@}', because the output routines would otherwise get confused. @<Insert discretionary line-break...@>= if(insert_breaks) if(c == @',') kount = 0; else if(++kount >= NBREAK && c != @'@@' && ++id_loc<=mod_end) { kount = 0; *id_loc = discretionary_break; } @ In \Fortran-90, we have \It{boz-constants}---binary, octal, or hexadecimal constants that look like~`\.{B'011'}', `\.{O'077'}', or~`\.{Z'FF'}'. (The single quotes may be replaced by double quotes.) These constants may appear only in |@r data| statements. @<Check for boz...@>= { if(FORTRAN_LIKE(language)) if(boz) @<Handle boz constant@>@; else @<Handle VAX extensions of hex or octal constants@>@; } @ At this point we already know we're dealing with a boz constant. @<Handle boz...@>= { switch(boz) { case @'B': *id_first = BINARY_CODE; break; case @'O': *id_first = OCTAL_CODE; break; case @'Z': *id_first = HEX_CODE; break; } id_loc--; return constant; } @ Handle the VAX extensions of hex or octal constants---e.g., \.{'abc'X} or \.{'123'O}. @<Handle VAX exten...@>= { if(*loc==@'X' || *loc==@'x') { *id_first = HEX_CODE; /* Overwrite opening delimiter. */ @<Finish VAX hex/octal constant.@>@; } else if(*loc==@'O' || *loc==@'o') { *id_first = OCTAL_CODE; /* Octal */ @<Finish VAX hex...@>@; } } @ @<Finish VAX hex...@>= loc++; /* Skip the ending signifier. */ id_loc--; /* Forget closing delimiter. */ return constant; @ @<Glob...@>= EXTERN boolean doing_cdir SET(NO); @ After an \.{@@}~sign has been scanned, the next character tells us whether there is more work to do. Note that lower- and upper-case control codes are generally treated as variants of the same fundamental code; to distinguish them, we set the |upper_case_code| flag. When the code is in upper case, it does not automatically issue an implicit~\.{@@[}, for example. @<Part 1@>=@[ GOTO_CODE get_control_code(VOID) { eight_bits cc; /* The |ccode| value. */ @b c = *loc++; SET_CASE(c); // Set the |upper_case_code| flag. /* Deflect a verbatim comment beginning with `\.{@@\slashstar}'. */ if( (c==@'/' && (*loc==@'*' || *loc==@'/')) || c==(ASCII)begin_comment0 || c==(ASCII)begin_comment1) return GOTO_MISTAKE; switch(cc = ccode[c]) { case no_index: index_flag = NO; return MORE_PARSE; case yes_index: INDEX_SHORT; return MORE_PARSE; case defd_at: if(mark_defined.generic_name) { defd_switch = YES; // `\.{@@[}'. defd_type = GENERIC_NAME; } // \bf NOTE: Falls through. case underline: xref_switch = def_flag; // `\.{@@\_}' return MORE_PARSE; case implicit_reserved: if(mark_defined.imp_reserved_name) { typd_switch = defd_switch = YES; // `\.{@@`}'. defd_type = IMPLICIT_RESERVED; xref_switch = def_flag; } return MORE_PARSE; case switch_math_flag: math_flag=!math_flag; // `\.{@@\$}' return MORE_PARSE; #ifdef DEBUG case trace: tracing=c-@'0'; // `\.{@@0}', `\.{@@1}', `\.{@@2}' return MORE_PARSE; #endif /* |DEBUG| */ /* For language switches, we set the |language|, then send back a single code |begin_language|. When we process this, we'll then append another 8-bit code with the language number itself. */ @<Specific language cases@>: loc--; // Falls through to general case below. case L_switch: { @<Set the |language|...@>@; return begin_language; // `\.{@@L$l$}' } case begin_nuweb: ERR_PRINT(W,"@@N ignored; must appear before beginning of code part"); return MORE_PARSE; case xref_roman: case xref_wildcard: case xref_typewriter: case TeX_string: @<Scan to the next \.{@@>}@>@; /* `\.{@@\^\dots@@>}', `\.{@@9\dots@@>}', `\.{@@.\dots@@>}', and `\.{@@t\dots@@>}'. */ case module_name: mac_mod_name = NO; // Used as a flag for macro processing. @<Scan the module name and make |cur_module| point to it@>@; return module_name; // `\.{@@<\dots@@>}' case new_output_file: @<Scan the output file name@>@; return cc; case invisible_cmnt: if(*loc == @'%') eat_blank_lines = YES; loc = limit + 1; // Skip the line. return MORE_PARSE; // `\.{@@\%} case compiler_directive: case Compiler_Directive: long_comment = NO; doing_cdir = YES; return begin_comment; // `\.{@@!}' or `\.{@@?}' case verbatim: @<Scan a verbatim string@>@; // `\.{@@=\dots@@>}' case ascii_constant: return get_string(c,'\0'); // `\.{@@'\dots'}' case big_line_break: // `\.{@@\#}' if(loc >= limit) return cc; @<Process possible pre...@>; // In \.{typedefs.web}. return cc; case begin_bp: return @'{'; // Ought to improve this, to mark the debugging locations. case USED_BY_NEITHER: if(phase==1) err_print(W,"Invalid `@@%c' ignored",XCHR(c)); return ignore; default: return cc; } } @ The occurrence of a module name sets |xref_switch| to zero, because the module name might (for example) follow \&{int}. @<Scan the module name...@>= @{ ASCII HUGE *k; // Pointer into |mod_text|. static ASCII ell[] = @"..."; static ASCII bad_mod_name[] = @"!!! {\\it Incompatible} !!!"; @b @<Put module name into |mod_text|@>@; if (k-mod_text > 3 && STRNCMP(k-2,ell,3)==0) cur_module = prefix_lookup(mod_text+1,k-3); else cur_module = mod_lookup(mod_text+1,k); if(!cur_module) cur_module = mod_lookup(bad_mod_name,bad_mod_name+STRLEN(bad_mod_name)-1); if(cur_module) { @#if 0 language = (LANGUAGE)cur_module->Language; @#endif params = cur_module->mod_info->params;// Restore state for this module. frz_params(); } xref_switch = NO; /* The actual return value can be either |module_name| or |macro_module_name| and is put in explicitly right after the use of this module in the code. */ } @ Module names are placed into the |mod_text| array with consecutive spaces, tabs, and carriage-returns replaced by single spaces. There will be no spaces at the beginning or the end. (We set |mod_text[0]=' '| to facilitate this, since the |mod_lookup| routine uses |mod_text[1]| as the first character of the name.) @<Set init...@>= mod_text[0] = @' '; @ Here we copy the text of the module name, stripping off white space from the front and back. Also, we convert any real semicolons into interior semis. This helps out with language switches between \Fortran\ and~C, for example. If the global language were~C, then a module name that should be read in \Fortran\ will be first be absorbed in~C because the parser doesn't know yet which language it will be. @<Put module name...@>= { int mlevel = 1; // For nested module names. k = mod_text; WHILE() { if (loc>limit && !get_line()) { ERR_PRINT(W,"Input ended in section name"); @.Input ended in section name@> loc=cur_buffer+1; break; } c = *loc; @<If end of name, |break|@>; loc++; if (k<mod_end) k++; switch(c) { case @' ': case tab_mark: c=@' '; if (*(k-1)==@' ') k--; // Compress white space. break; case @';': c = interior_semi; break; } *k = c; } if (k>=mod_end) { SET_COLOR(warning); printf("\n! Section name too long: "); @.Section name too long@> ASCII_write(mod_text+1,25); printf("..."); mark_harmless; } if (*k==@' ' && k>mod_text) k--; // Trailing blanks. } @ @<If end of name,...@>= if (c==@'@@') { c = *(loc+1); if (c==@'>') { if(--mlevel == 0) { loc+=2; break; } } else if(c==@'<') mlevel++; if (ccode[c]==new_module) { ERR_PRINT(W,"Section name didn't end"); break; @.Section name didn't end@> } *(++k) = @'@@'; loc++; // Now |c==*loc| again. } @ This fragment is used for skipping over control text, such as `\.{@@t\dots@@>}'. @<Scan to the next...@>= { cc = ccode[*(loc-1)]; /* Is this statement redundant? */ id_first=loc; *(limit+1)=@'@@'; while (*loc!=@'@@') loc++; id_loc=loc; if (loc++>limit) { ERR_PRINT(W,"Control text didn't end"); loc=limit; return cc; @.Control text didn't end@> } if (*loc++!=@'>') ERR_PRINT(W,"Control codes are forbidden in control text"); @.Control codes are forbidden...@> return cc; } @ At the present point in the program we have |*(loc-1)=verbatim|; we set |id_first| to the beginning of the string itself, and |id_loc| to its ending-plus-one location in the buffer. We also set~|loc| to the position just after the ending delimiter. @<Scan a verbatim string@>= { id_first=loc++; *(limit+1)=@'@@'; *(limit+2)=@'>'; while (*loc!=@'@@' || *(loc+1)!=@'>') loc++; if (loc>=limit) ERR_PRINT(W,"Verbatim string didn't end"); @.Verbatim string didn't end@> id_loc=loc; loc+=2; return (verbatim); } @* PHASE ONE PROCESSING. We now have accumulated enough subroutines to make it possible to carry out \.{WEAVE}'s first pass over the source file. If everything works right, both phase one and phase two of \.{WEAVE} will assign the same numbers to modules, and these numbers will agree with what \.{TANGLE} does. The global variable |next_control| often contains the most recent output of |get_next|; in interesting cases, this will be the control code that ended a module or part of a module. @<Global...@>= EXTERN eight_bits next_control; /* control code waiting to be acting upon */ @ The overall processing strategy in phase one has the following straightforward outline. @<Part 1@>=@[ SRTN phase1(VOID) { LANGUAGE language0=language; phase = 1; the_part = LIMBO; rst_input(); reading(web_file_name,(boolean)(tex_file==stdout)); module_count = 0; skip_limbo(); // Skip stuff before any module (but process language commands). change_exists = NO; /* Remember the language to put into force at the beginning of each module. |language| may have been set from the command line, by default (nothing on the command line), or by explicit~\.{@@c}, \.{@@r}, \.{@@n}, or~\.{@@L$l$} commands during the limbo phase. */ chk_override(language0); fin_language(); /* Make sure all flags are initialized properly. */ global_params = params; while (!input_has_ended) @<Store cross-reference data for the current module@>; chngd_module[module_count]=change_exists; /* the index changes if anything does */ @<Print error messages about unused or undefined module names@>; } @ @<Store cross-reference data...@>= { the_part = TEX_; if (++module_count==(sixteen_bits)max_modules) OVERFLW("section numbers",ABBREV(max_modules)); chngd_module[module_count]=NO; // It will become |YES| if any line changes. progress(); /* All modules start off in the global language. */ params = global_params; frz_params(); @<Store cross-references in the \TeX\ part of a module@>; @<Store cross-references in the definition part of a module@>; @<Store cross-references in the \cee\ part of a module@>; if(chngd_module[module_count]) change_exists=YES; typd_switch = defd_switch = NO; // Don't propagate beyond one module. } @ The |C_xref| subroutine stores references to identifiers in \cee\ text material beginning with the current value of |next_control| and continuing until |next_control| is~`\.\{' or~`\v', or until the next ``milestone'' is passed (i.e., |next_control>=formatt|). If |next_control>=formatt| when |C_xref| is called, nothing will happen; but if |next_control="|"| upon entry, the procedure assumes that this is the~`\v' preceding \cee\ text that is to be processed. The program uses the fact that our internal code numbers satisfy the relations |xref_roman=identifier+roman| and |xref_wildcard=identifier +wildcard| and |xref_typewriter=identifier+typewriter| and |normal=0|. @<Part 1@>=@[ SRTN C_xref FCN((part0,mode0)) PART part0 C0("")@; PARSING_MODE mode0 C1("")@; { PARAMS outer_params; PARSE_PARAMS parse_params0; name_pointer p; /* a referenced name */ parsing_mode = mode0; if(parsing_mode == INNER) { outer_params = params; /* Store whole structure. */ parse_params0 = parse_params; } if(language == LITERAL) next_control = begin_meta; do_inside = YES; while (next_control<formatt) { switch(next_control) { case begin_language: @<Handle a possible language switch in the middle of the module@>@; break; case toggle_output: @<Toggle output@>@; break; case begin_meta: @<Skip over meta-comment@>@; break; case identifier: case xref_roman: case xref_wildcard: case xref_typewriter: p=id_lookup(id_first,id_loc, (eight_bits)(next_control-identifier)); new_xref(part0,p); if(part0 == DEFINITION) defd_switch = NO; /* Prevent the implicit~\.{@@[} from propagating beyond the first identifier. */ if(next_control==identifier && C_LIKE(language) && parsing_mode == OUTER) { if(p->ilk == typedef_like) @<Mark \&{typedef} variable@>@; else if(p->ilk == class_like) @<Mark \&{class} variable@>@; } break; case stringg: if(sharp_include_line && phase == 1 && read_iformats && C_LIKE(language)) get_iformats(); break; } next_control=get_next(); if ( next_control==@'|' || next_control==begin_comment) break; } end_xref: if(parsing_mode==INNER) { params = outer_params; frz_params(); parse_params = parse_params0; parsing_mode = OUTER; } } @ @<Glob...@>= IN_COMMON outer_char wbprefix[MAX_FILE_NAME_LENGTH]; EXTERN boolean do_inside; // Cross-reference stuff inside a \&{typedef}? EXTERN boolean qtd_file; // Is the include file quoted? #ifndef L_tmpnam #define L_tmpnam 25 #endif EXTERN outer_char temp_in[L_tmpnam], temp_out[L_tmpnam]; // Names of temporary files used in |get_iformats|. @ To scan an include file for |typedef| and/or |@c++ class| statements, we use two temporary files whose names are |temp_in| and |temp_out|. These are created once, the first time |get_iformats| is called (so we don't call |tmpnam| possible many times). The include command is written into |temp_in|. By means of issuing a |system| command, the C preprocessor expands that command and writes its results to |temp_out|. Then \FWEAVE\ parses that file, cross-referencing only the |typedef| and/or |@c++ class| variables. Presently, this only works for the \.{gcc} and \.{g++} compilers. @<Part 1@>=@[ SRTN get_iformats(VOID) { int n, new_depth; outer_char file_name[256], temp[100]; FILE *ftemp_in; PART part0 = CODE; if(!temp_in[0]) mktmp(temp_in, language==C ? wt_style.output_ext.C_ : wt_style.output_ext.Cpp_); if((ftemp_in = FOPEN(temp_in, "w")) == NULL) { printf("\n! Can't open temporary file `%s'", temp_in); mark_harmless; read_iformats = NO; return; } if(!temp_out[0]) mktmp(temp_out, (outer_char *)""); /* We don't open the output file here, as \.{cpp} may not write into it if it's open. */ preprocessing = sharp_include_line = NO; /* Copy include file name, include delimiters. */ STRNCPY(file_name, id_first, n=PTR_DIFF(int, id_loc, id_first)); file_name[n] = '\0'; to_outer((ASCII HUGE *)file_name); qtd_file = BOOLEAN(file_name[0] == '"'); // Is this file name quoted (i.e., look locally)? /* Write the include file command to temporary file, so the preprocessor can read it. */ fprintf(ftemp_in, "#include %s\n", file_name); fclose(ftemp_in); /* Create a command to run the preprocessor. We tell the preprocessor to look first in the |wbprefix| directory, then in the current directory. (Note the use of the \.{-I.} command of \.{gcc}, which looks in the directory current when the compiler was invoked.) */ sprintf((char *)temp, "\n%s -E -P -I%s -I. -o %s %s", language==C ? "gcc" : "g++", *wbprefix ? (char *)wbprefix : ".", temp_out, temp_in); if(!rmv_files) puts((char *)temp); // Echo the |system| command that runs the preprocessor. system((CONST char *)temp); @<Deflect the input file to be \.{temp\_out}@>@; if(new_depth != incl_depth || !get_line()) goto restore; // No file, or nothing in it. do_inside = NO; // This flag says to not xref stuff inside braces of \&{typedef}. next_control = get_next(); /* Parse the preprocessed include file until EOF is reached and the |incl_depth| changes. */ while(new_depth == incl_depth) { name_pointer p; switch(next_control) { case identifier: p=id_lookup(id_first,id_loc, (eight_bits)(next_control-identifier)); if(p->ilk == typedef_like) @<Mark \&{typedef} variable@>@; else if(p->ilk == class_like) @<Mark \&{class} variable@>@; break; } next_control=get_next(); } end_xref: restore: preprocessing = sharp_include_line = YES; } @ The following commands are borrowed with slight modifications from \.{common.web}. @<Deflect...@>= { if(++incl_depth >= (int)max_include_depth) { incl_depth--; err_print(C, "Too many nested includes; %d allowed. \ Increase with `-yid'.", max_include_depth); @.Too many nested includes@> goto restore; } { /* No change file name specified; obtain it from the last level. */ INPUT_PRMS *p_lower = &prms[incl_depth-1]; INPUT_PRMS0 *p0_lower = &p_lower->change; STRCPY(change_file_name,p0_lower->File_name); change_file = p0_lower->File; change_params = p_lower->input_params; } STRCPY(cur_file_name, temp_out); new_depth = incl_depth; { IN_COMMON INCL_PATHS incl; if(ini_input_prms(CUR_FILE, incl.list, NO)) { if(cur_prms.change->File != prms[incl_depth-1].change.File) {} else *cur_prms.change = prms[incl_depth-1].change; // Still using the old change file. cur_line = 0; prn_where = YES; /* Instead of printing the names of the temporary files, we print the include file name itself. */ CLR_PRINTF(include_file,(" (%s", file_name)); /* Tell the terminal where we're reading from. */ } else { /* Failed to open include file. */ incl_depth--; } } } @ The following is called from |wrap_up()| in \.{common.web}. @<Part 1@>=@[ SRTN cls_files(VOID) { if(read_iformats && rmv_files) { remove((CONST char *)temp_in); remove((CONST char *)temp_out); } } @ Make a temporary file name, and append an extension. We use |tempnam| if possible, because it gives more control over the directory. Otherwise, we use the ANSI |tmpnam|. @<Part 1@>=@[ outer_char * mktmp FCN((file_name, ext)) outer_char *file_name C0("")@; outer_char *ext C1("")@; { outer_char *buffer; #if(HAVE_TEMPNAM) extern char *tempnam(); if(!*wbprefix) STRCPY(wbprefix,"./"); buffer = (outer_char *)tempnam((char *)wbprefix, "FTMP"); // Non-|ANSI|, but more control over directory. #else buffer = (outer_char *)tmpnam(NULL); // |ANSI| routine. #endif STRCPY(file_name, buffer); if(*ext) { STRCAT(file_name, "."); STRCAT(file_name, ext); } return file_name; } @ When an include line of the form |#include <test.h>| is sensed in C or \Cpp, we would like to open the related file \.{test.H} and process it for format commands. (Processing \.{test.h} would format and cross-reference many variables that the user wouldn't care to know about.) See ``Push stack'' code in \.{common.web}. @d change_params prms[incl_depth].input_params @<Unused@>= SRTN get_iformats(VOID) { outer_char temp[100], HUGE *period; int n; int new_depth; preprocessing = sharp_include_line = NO; STRNCPY(temp, id_first+1, n=PTR_DIFF(int, id_loc, id_first)-2); temp[n] = '\0'; to_outer((ASCII HUGE *)temp); if(!(period = (outer_char HUGE *)STRRCHR(temp, '.'))) goto restore; period[1] = '\0'; STRCAT(temp, w_style.misc.include_ext); if(++incl_depth >= (int)max_include_depth) { incl_depth--; err_print(C, "Too many nested includes; %d allowed. \ Increase with `-yid'.", max_include_depth); @.Too many nested includes@> goto restore; } { /* No change file name specified; obtain it from the last level. */ INPUT_PRMS *p_lower = &prms[incl_depth-1]; INPUT_PRMS0 *p0_lower = &p_lower->change; STRCPY(change_file_name,p0_lower->File_name); change_file = p0_lower->File; change_params = p_lower->input_params; } STRCPY(cur_file_name, temp); new_depth = incl_depth; { IN_COMMON INCL_PATHS incl; if(ini_input_prms(CUR_FILE,incl.list,NO)) { if(cur_prms.change->File != prms[incl_depth-1].change.File) {} else *cur_prms.change = prms[incl_depth-1].change; // Still using the old change file. cur_line = 0; prn_where = YES; CLR_PRINTF(include_file,(" (%s", (char *)cur_file_name)); /* Tell the terminal where we're reading from. */ } else { /* Failed to open include file. */ incl_depth--; } } if(new_depth != incl_depth || !get_line()) goto restore; next_control = get_next(); while(new_depth == incl_depth) { switch(next_control) { case formatt: pr_format(NO, NO); break; default: ERR_PRINT(W, "Invalid command in #include file"); break; } } restore: preprocessing = sharp_include_line = YES; } @ @<Skip over meta-comment@>= { WHILE() { if(!get_line()) if(language == LITERAL) { next_control = new_module; goto done_meta; } else { ERR_PRINT(W,"Input ended during meta-comment"); break; } if(*loc == @'@@') switch(*(loc+1)) { case @')': get_line(); case @'*': case @' ': next_control = new_module; goto done_meta; case @'<': next_control = module_name; goto done_meta; } } done_meta:; } @ For the forward-referencing facility, we need to format the variable of a \&{typedef} during phase~1. We mark the first variable we come to that isn't reserved and isn't enclosed by braces. (We must format identifiers even if they're inside braces.) @<Mark \&{typedef} variable@>= { int brace_level = 0; boolean typedefd_it = NO; /* First, we scan over a possible |struct|. */ while((next_control=get_next()) == identifier) if((p=id_lookup(id_first,id_loc,0))->ilk != struct_like) { new_xref(part0,p); // Structure name: ``|typedef struct s@;|''. next_control = get_next(); // Don't repeat the structure name. break; } while(next_control <=module_name) { switch(next_control) { case @'{': brace_level++; break; case @'}': if(brace_level-- == 0) { ERR_PRINT(W,"Extra '}' in typedef"); goto done; } break; case identifier: p = id_lookup(id_first,id_loc,0); if(brace_level == 0 && !typedefd_it) { if(is_reserved(p)) break; defd_switch = BOOLEAN(mark_defined.typedef_name); defd_type = TYPEDEF_NAME; typd_switch = YES; INDEX_SHORT; new_xref(part0,p); } else if(do_inside) new_xref(part0,p); if(brace_level == 0 && !typedefd_it) typedefd_it = YES; /* Don't do any more (e.g., array dimensions). (But this means one can't yet do |BB| in |typedef int AA, BB@;|.) */ break; case formatt: case limbo_text: case op_def: case macro_def: case definition: case undefinition: case WEB_definition: case begin_code: case new_output_file: case protect_code: ERR_PRINT(W,"You can't do that inside a typedef"); break; case module_name: if(cur_module) new_mod_xref(cur_module); next_control = get_next(); if(next_control == @'=') { ERR_PRINT(W,"'=' not allowed after @@<...@@> \ inside typedef; check typedef syntax. Inserted ';'"); next_control = @';'; } continue; case @';': if(brace_level == 0) goto done; // End of |typedef|. break; case begin_comment: @<Handle a comment@>@; break; } next_control = get_next(); } done: defd_switch = typd_switch = NO; // Just in case we screwed up. if(next_control == new_module) { ERR_PRINT(W,"Module ended during typedef"); goto end_xref; } } @ Similarly, \&{class} variables should be formatted during phase~1. @<Mark \&{class}...@>= { if((next_control=get_next()) == identifier) { p = id_lookup(id_first,id_loc,0); defd_switch = BOOLEAN(mark_defined.typedef_name); defd_type = TYPEDEF_NAME; typd_switch = YES; INDEX_SHORT; new_xref(part0,p); typd_switch = NO; } } @ The |language| has already been set inside |get_next()| when we get to here. @<Handle a possible language switch...@>= switch(language) { case NO_LANGUAGE: CONFUSION("handle possible language switch", "Language isn't defined"); case FORTRAN: case FORTRAN_90: case RATFOR: case RATFOR_90: if(mode0 == OUTER && !free_form_input) @<Set up column mode@>@; break; case TEX: if(mode0 == OUTER) @<Set up col...@>@; break; case C: case C_PLUS_PLUS: case LITERAL: column_mode = NO; break; case NUWEB_OFF: case NUWEB_ON: CONFUSION("handle possible language switch","Invalid langage"); } @ The |outr_xref| subroutine is like |C_xref| but it begins with |next_control!='|'| and ends with |next_control>=formatt|. Thus, it handles \cee\ text with embedded comments. @<Part 1@>=@[ SRTN outr_xref FCN((part0)) /* extension of |C_xref| */ PART part0 C1("")@; { while (next_control<formatt) if (next_control!=begin_comment) C_xref(part0,OUTER); else @<Handle a comment@>@; } @ Deal with a comment inside C~text. @<Handle a comment@>= { int bal; // Brace level in comment. bal = copy_comment(1); next_control = @'|'; doing_cdir = NO; while (bal>0) { /* Inside comment. */ in_comment = YES; C_xref(part0,INNER); if (next_control==@'|') bal = copy_comment(bal); else bal = 0; // An error message will occur in phase 2. } } @ In the \TeX\ part of a module, cross-reference entries are made only for the identifiers in \cee\ texts enclosed in~\Cb, or for control texts enclosed in \.{@@\^}$\,\ldots\,$\.{@@>} or \.{@@.}$\,\ldots\,$\.{@@>} or \.{@@9}$\,\ldots\,$\.{@@>}. @<Store cross-references in the \T...@>= { the_part = TEX_; WHILE() { switch (next_control=skip_TeX()) { @<Specific language cases@>: loc--; // Falls through to general case below. case L_switch: { @<Set the |language|...@>; continue; } case begin_nuweb: nuweb_mode = !NUWEB_MODE; continue; case toggle_output: @<Toggle output@>@; continue; case underline: xref_switch = def_flag; continue; #ifdef DEBUG case trace: tracing=next_control-@'0'; continue; #endif /* |DEBUG| */ case @'|': while(next_control <= module_name) { C_xref(TEX_,INNER); if(next_control == @'|' || next_control == new_module) break; next_control = get_next(); if(next_control == @'|') break; } break; case xref_roman: case xref_wildcard: case xref_typewriter: case macro_module_name: case module_name: loc-=2; next_control=get_next(); // Scan to \.{@@>}. if( !(next_control==module_name || next_control==macro_module_name) ) new_xref(TEX_,id_lookup(id_first,id_loc, (eight_bits)(next_control-identifier))); break; case invisible_cmnt: loc = limit + 1; break; } if (next_control>=formatt) break; } } @ During the definition and \cee\ parts of a module, cross-references are made for all identifiers except reserved words; however, the identifiers in a format definition are referenced even if they are reserved. The \TeX\ code in comments is, of course, ignored, except for \cee\ portions enclosed in~\Cb; the text of a module name is skipped entirely, even if it contains \Cb~constructions. The variables |lhs| and |rhs| point to the respective identifiers involved in a format definition. @<Global...@>= EXTERN name_pointer lhs, rhs; /* pointers to |byte_start| for format identifiers */ @ When we get to the following code we have |next_control>=formatt|. @d KILL_XREFS(name) no_xref |= !defn_mask.name @d INDEX_SHORT index_short = index_flag = YES // Implicit \.{@@~}. @<Store cross-references in the d...@>= { boolean no_xref0 = no_xref; the_part = DEFINITION; while (next_control<begin_code) { /* |formatt| or |definition| or |WEB_definition| or \.{@@\#...} command. */ switch(next_control) { case WEB_definition: if(mark_defined.WEB_macro && lower_case_code) defd_switch = YES; // Implied \.{@@[}. xref_switch = def_flag; /* Implied \.{@@\_} */ defd_type = M_MACRO; KILL_XREFS(macros); INDEX_SHORT; break; case m_undef: KILL_XREFS(macros); INDEX_SHORT; break; case definition: if(mark_defined.outer_macro && mark_defined.outer_macro) defd_switch = YES; // Implied \.{@@[}. xref_switch = def_flag; /* Implied \.{@@\_} */ defd_type = D_MACRO; KILL_XREFS(outer_macros); INDEX_SHORT; break; case undefinition: KILL_XREFS(outer_macros); INDEX_SHORT; break; case m_ifdef: case m_ifndef: INDEX_SHORT; break; } switch(next_control) { case formatt: pr_format(YES, YES); break; case limbo_text: @<Absorb limbo text@>@; break; case op_def: @<Overload an operator@>@; break; case macro_def: @<Overload an identifier@>@; break; case invisible_cmnt: loc = limit + 1; // Skip the line. default: next_control=get_next(); break; } outr_xref(DEFINITION); no_xref = no_xref0; } } @ The syntax of a format definition is ``\.{@@f\ new\_name\ old\_name}'' or ``\.{@@f\ `\{\ 10}''. Error messages for improper format definitions of the first kind will be issued in phase two; for the second kind, in phase one. For the first kind, our job in phase one is to define the |ilk| of a properly formatted identifier, and to fool the |new_xref| routine into thinking that the identifier on the right-hand side of the format definition is not a reserved word. For the second kind, we must actually change the category code of a \TeX\ character, and that must be done in phase one so future identifiers can be resolved properly. @<Part 1@>=@[ SRTN pr_format FCN((xref_lhs, xref_rhs)) boolean xref_lhs C0("")@; boolean xref_rhs C1("")@; { eight_bits last_control,rhs_ilk; LANGUAGE saved_language = language; if(upper_case_code) KILL_XREFS(Formats); else KILL_XREFS(formats); INDEX_SHORT; if(language==TEX) language = C; last_control = next_control = get_next(); /* Identifier or module name to be formatted, or |ASCII| character. */ if (next_control==identifier || next_control==module_name) @<Process an identifier or module name@>@; else if(next_control==@'`') @<Change a category code@>@; if(saved_language==TEX) language = saved_language; } @ Here we deal with format commands of the form ``\.{@@f\ new\_name\ old\_name}''. @<Process an identifier...@>= { if(next_control==identifier) { lhs=id_lookup(id_first, id_loc, normal); lhs->ilk=normal; if(xref_lhs) new_xref(DEFINITION,lhs); } else lhs = cur_module; next_control=get_next(); if (next_control==identifier) { /* Format the lhs like this one. */ rhs=id_lookup(id_first, id_loc,normal); if(lhs != NULL) { if(last_control==identifier) @<Format the left-hand side@>@; else lhs->mod_ilk = rhs->ilk; // We're formatting a module name. } /* Take care of the possibility that the rhs may not yet have been encountered. */ if(xref_rhs) { rhs_ilk = rhs->ilk; rhs->ilk=normal; new_xref(DEFINITION,rhs); rhs->ilk=rhs_ilk; } next_control=get_next(); } } @ Set the appropriate format bit. @<Format the left-hand side@>= { lhs->ilk = rhs->ilk; /* First turn off the old lhs bit (retaining all others), then add in the new bit for the current language. */ #define RST_BIT(field) lhs->field = BOOLEAN(lhs->field & ~(boolean)language)\ | (rhs->field & (boolean)language) RST_BIT(reserved_word); RST_BIT(Language); RST_BIT(intrinsic_word); RST_BIT(keyword); #undef RST_BIT } @ Here we consider format commands of the form ``\.{@@f\ `\{\ 10}''. |get_TeX|~leaves the (|outer_char|) constant string between [|id_first|,|id_loc|). @<Change a category code@>= { if((next_control = get_TeX()) != constant) ERR_PRINT(W,"Invalid @@f command: \ One of the representations `a, `\\a, or `^^M is required"); else { int c = TeX_char(); // Convert the |ASCII| code in |id_first|. next_control = get_next(); // Now expecting integer category code. if(next_control != constant) ERR_PRINT(W,"Invalid category code"); else { TeX_CATEGORY cat; TERMINATE(id_loc,0); cat = (TeX_CATEGORY)ATOI(id_first); // Numerical value of new cat code. if((int)cat < 0 || (int)cat > 15) ERR_PRINT(W,"Category code must be between 0 and 15"); else TeX[c] = cat; // Change the category code. next_control = get_next(); } } } @ We require a special routine to obtain an |ASCII| character in \TeX's representation after a~'\.`'. On entry, |loc|~is positioned after the~'\.`'. The possible representations are~`\.{a}', `\.{\\a}', or~`\.{\^\^M}'. @<Part 1@>=@[ eight_bits get_TeX(VOID) { if(loc >= limit) { ERR_PRINT(W,"@@f line ends prematurely"); return ignore; } id_first = id_loc = mod_text + 1; if(*loc == @'\\') *id_loc++ = *loc++; else if(*loc == @'^' && *(loc+1) == @'^') { // \TeX's way of representing control characters. *id_loc++ = *loc++; @~ *id_loc++ = *loc++; } if(*loc == @'@@') if(*(loc+1) == @'@@') loc++; else ERR_PRINT(W,"You should say `@@@@"); *id_loc++ = *loc++; // Position to next non-processed character. *id_loc = '\0'; id_first = esc_buf(id_loc+1,mod_end,id_first,YES); to_outer(id_first); return constant; } @ Here we convert the constant obtained in the previous routine into an |ASCII| character. @<Part 1@>=@[ int TeX_char(VOID) { int c; while(*id_first == @'\\') id_first++; if(*id_first == @'^' && *(id_first+1) == @'^') { c = *(id_first+2); if(c >= 64) c -= 64; else c += 64; } else c = *id_first; return c; } @ Limbo text commands have the form ``\.{@@l\ "abc\\ndef"}'', and must be absorbed during phase one so they can be dumped out at the beginning of phase two. @<Absorb limbo text@>= { LANGUAGE language0 = language; KILL_XREFS(limbo); if(language==TEX) language = C; // In order to absorb strings properly. insert_breaks = NO; // We want the string to be absorbed completely literally. if((next_control = get_next()) != stringg) ERR_PRINT(W,"String must follow @@l"); else { // Begin by stripping off delimiting quotes. for(id_first++,id_loc--; id_first<id_loc; ) { if(*id_first==@'@@') { if(*(id_first+1)==@'@@') id_first++; else ERR_PRINT(W,"Double @@ should be used in strings"); } /* Deal with escape sequences. */ if(*id_first == @'\\') { id_first++; /* Splitting the following line before |HUGE| led to compiler problem with VAX/VMS. */ app_tok(esc_achar( (CONST ASCII HUGE*HUGE*)&id_first))@; } else app_tok(*id_first++); } freeze_text; /* We'll know we've collected stuff because |text_ptr| will be advanced. */ } insert_breaks = YES; language = language0; } @ The syntax of an operator-overloading command is ``\.{@@v\ .IN.\ "\\in"\ +}''. @<Overload an op...@>= { OPERATOR HUGE *p,HUGE *p1; KILL_XREFS(v); /* Look at the first field, which should be an operator or a dot-op. */ next_control = get_next(); if(next_control == identifier) ERR_PRINT(W,"For future compatibility, please use syntax `.NAME.' for \ overloading dot operators"); if(!(p=valid_op(next_control))) ERR_PRINT(W,"Operator after @@v is invalid"); else { if(get_next() != stringg) ERR_PRINT(W,"Second argument (replacement text) \ of @@v must be a quoted string"); else { int k = language_num; OP_INFO HUGE *q = p->info + k; int n = PTR_DIFF(int, id_loc, id_first) - 2; /* Don't count the string delimiters. */ outer_char HUGE *s; if(q->defn) FREE_MEM(q->defn,"q->defn",STRLEN(q->defn)+1, outer_char); q->defn = GET_MEM("q->defn",n+1,outer_char); *(id_loc-1) = '\0'; // Kill off terminating quote. for(s=q->defn,id_first++; *id_first; s++) if(*id_first == @'\\') { id_first++; *s = XCHR(esc_achar((CONST ASCII HUGE *HUGE*)&id_first)); } else *s = XCHR(*id_first++); overloaded[k] = q->overloaded = YES; /* There may be several representations with the same name. */ for(p1=op; p1<op_ptr; p1++) { if(p1==p || !p1->op_name) continue; if(STRCMP(p1->op_name,p->op_name) == 0) { OP_INFO HUGE *q1 = p1->info + k; if(q1->defn) FREE_MEM(q1->defn,"q1->defn", STRLEN(q1->defn)+1,outer_char); q1->defn = GET_MEM("q1->defn",n+1,outer_char); STRCPY(q1->defn,q->defn); q1->overloaded = YES; } } /* Get the new category and set it. If the last construction isn't recognized as a valid operator, the category is set to |expr|. */ p = valid_op(next_control=get_next()); q->cat = (p ? p->info[k].cat : (eight_bits)expr); } } } @ The syntax for overloading an identifier is ``\.{@@w\ \It{id}\ "\dots"}'', or the string replacement text can be replaced by~'\..', which means just prepend a backslash to make it into a macro name. @d QUICK_FORMAT @'.' // The shorthand for overloading like itself. @<Overload an id...@>= { if((next_control=get_next()) != identifier) ERR_PRINT(W,"Identifier must follow @@w"); else { name_pointer p = id_lookup(id_first,id_loc,normal); int n,offset; WV_MACRO HUGE *w; ASCII HUGE *s; ASCII HUGE *id_first0, HUGE *id_loc0; /* Index the identifier (but not defined). Force short identifiers to be indexed. */ KILL_XREFS(w); INDEX_SHORT; new_xref(DEFINITION, p); /* Remember the first identifier. */ id_first0 = id_first; id_loc0 = id_loc; switch(next_control=get_next()) { case @'\\': if((next_control = get_next()) != identifier) { ERR_PRINT(W,"Identifier must follow '\\'"); break; } next_control = ignore; /* We don't want to put the identifier into the index. */ goto quick_code; case QUICK_FORMAT: id_first = id_first0; id_loc = id_loc0; quick_code: offset = 1; n = PTR_DIFF(int, id_loc, id_first) + 1; *id_loc = '\0'; goto fmt_like_string; case stringg: { offset = 0; n = PTR_DIFF(int, id_loc, id_first) - 2; // Don't count quotes. *(id_loc-1) = '\0'; id_first++; // Skip over opening quote. fmt_like_string: p->wv_macro = w = GET_MEM("wv_macro",1,WV_MACRO); w->text = GET_MEM("w->text",n+1,outer_char); if(offset) *w->text = @'\\'; for(s=w->text + offset; *id_first; s++) if(*id_first == @'\\') { id_first++; *s = esc_achar((CONST ASCII HUGE *HUGE*)&id_first); } else *s = *id_first++; w->len = PTR_DIFF(unsigned, s, w->text); w->cat = (eight_bits)(upper_case_code ? 0 : expr); // Temporary } break; default: ERR_PRINT(W,"Second argument (replacement text) \ of @@w must be either a quoted string or '.' or have the form \\name"); break; } } } @ Finally, when the \TeX\ and definition parts have been treated, we have |next_control>=begin_code|. @<Glob...@>= EXTERN boolean unnamed_section SET(NO); @ @<Store cross-references in the \cee...@>= { the_part = CODE; if (next_control<=module_name) { /* |begin_code| or |module_name| */ boolean beginning_module = YES; if(next_control==begin_code) { boolean nuweb_mode0 = nuweb_mode; unnamed_section = YES; params = global_params; nuweb_mode = nuweb_mode0; frz_params(); mod_xref_switch = NO; if(mark_defined.fcn_name && lower_case_code) { defd_switch = YES; // Implicit \.{@@[}. defd_type = FUNCTION_NAME; } } else { unnamed_section = NO; mod_xref_switch = def_flag; } do { if (next_control==module_name && cur_module) new_mod_xref(cur_module); if(beginning_module) { if(mod_xref_switch) next_control = get_next(); else next_control = @'='; // For |begin_code|. if(next_control==@'=') if( !nuweb_mode && ((FORTRAN_LIKE(language) && !free_form_input) || (language==TEX)) ) @<Set up column mode@>@; beginning_module = NO; } else next_control = get_next(); outr_xref(CODE); } while (next_control<=module_name) ; // Hunt for new module. column_mode = NO; // Turn off the FORTRAN verbatim input mode. unnamed_section = NO; // Don't deflect cross-references. } } @ After phase one has looked at everything, we want to check that each module name was both defined and used. The variable |cur_xref| will point to cross-references for the current module name of interest. @<Global...@>= EXTERN xref_pointer cur_xref; /* temporary cross-reference pointer */ @ The following recursive procedure walks through the tree of module names and prints out anomalies. @^recursion@> @<Part 1@>=@[ SRTN mod_check FCN((p)) name_pointer p C1("Print anomalies in subtree |p|.")@; { if (p) { mod_check(p->llink); cur_xref = (xref_pointer)p->xref; if (cur_xref->num <def_flag) { SET_COLOR(warning); printf("\n! Never defined: <"); prn_id(p); putchar('>'); mark_harmless; @.Never defined: <section name>@> } while (cur_xref->num >= def_flag) cur_xref = cur_xref->xlink; if (cur_xref==xmem) { SET_COLOR(warning); printf("\n! Never used: <"); prn_id(p); putchar('>'); mark_harmless; @.Never used: <section name>@> } mod_check(p->rlink); } } @ Start off at the top of the tree. @<Print error messages about un...@>= mod_check(root) @* LOW-LEVEL OUTPUT ROUTINES. The \TeX\ output is supposed to appear in lines at most |line_length| characters long, so we place it into an output buffer. During the output process, |out_line| will hold the current line number of the line about to be output. @d CHECK_OPEN // This is defined differently in \FTANGLE. @<Global...@>= EXTERN BUF_SIZE line_length; EXTERN ASCII HUGE *out_buf; // Assembled characters. EXTERN ASCII HUGE *out_end; // End of |out_buf|. EXTERN ASCII HUGE *out_ptr; // Points to last character in |out_buf|. EXTERN LINE_NUMBER out_line; // number of next line to be output. @ @<Alloc...@>= ALLOC(ASCII,out_buf,ABBREV(line_length),line_length,1); /* assembled characters */ out_end = out_buf+line_length; /* end of |out_buf| */ @ The |flush_buffer| routine empties the buffer up to a given breakpoint, and moves any remaining characters to the beginning of the next line. If the |per_cent| parameter is |YES|, a |'%'|~is appended to the line that is being output; in this case the breakpoint~|b| should be strictly less than |out_end|. If the |per_cent| parameter is |NO|, trailing blanks are suppressed. The characters emptied from the buffer form a new line of output. The same caveat that applies to |ASCII_write| applies to |c_line_write|. (??) @d OUT_FILE tex_file @d C_LINE_WRITE(n) fflush(tex_file),FWRITE(out_buf+1,n,tex_file) @d ASCII_LINE_WRITE(n) fflush(tex_file),ASCII_file_write(tex_file,out_buf+1,(size_t)(n))@; @d TEX_PUTXCHAR(c) PUTC(c) // Send an |outer_char| to the \.{TEX} file. @d TEX_NEW_LINE PUTC('\n') // A newline to the \.{TEX} file. @d TEX_PRINTF(s) fprintf(tex_file,s) // A string to the \.{TEX} file. @<Part 1@>=@[ SRTN flush_buffer FCN((b,per_cent)) ASCII HUGE *b C0("")@; boolean per_cent C1("Outputs from |out_buf+1| to |b|, \ where |b<=out_ptr|.")@; { ASCII HUGE *j; ASCII HUGE *out_start; if(output_on) { out_start = out_buf + 1; j = b; // Pointer into |out_buffer|. /* Remove trailing blanks. */ if(!per_cent) while (j>out_buf && *j==@' ') j--; ASCII_LINE_WRITE(j-out_buf); if (per_cent) TEX_PUTXCHAR('%'); if(*b != @'\n') TEX_NEW_LINE; // Nuweb mode has explicit newlines. out_line++; if (b<out_ptr) { if(*out_start == @'%') out_start++; STRNCPY(out_start,b+1,PTR_DIFF(size_t,out_ptr,b)); } out_ptr -= b - out_start + 1; } else out_ptr = out_buf; } @ When we are copying \TeX\ source material, we retain line breaks that occur in the input, except that an empty line is not output when the \TeX\ source line was nonempty. For example, a line of the \TeX\ file that contains only an index cross-reference entry will not be copied. The |fin_line| routine is called just before |get_line| inputs a new line, and just after a line break token has been emitted during the output of translated \cee\ text. @<Part 1@>=@[ SRTN fin_line(VOID) /* do this at the end of a line */ { ASCII HUGE *k; // Pointer into |cur_buffer|. if (out_ptr>out_buf) flush_buffer(out_ptr,NO); // Something nontrivial in line. else { /* Don't output an empty line when \TeX\ source line is nonempty. */ for (k=cur_buffer; k<=limit; k++) if (*k!=@' ' && *k!=tab_mark) return; flush_buffer(out_buf,NO); // Empty line. } } @ In particular, the |fin_line| procedure is called near the very beginning of phase two. We initialize the output variables in a slightly tricky way so that the first line of the output file will be `\.{\\input fwebmac}'. This is the default. However, occasionally, one may need to load other macro packages before \.{fwebmac}. To prevent this first line to be generated, use the command line option~``\.{-w}''. To change the name of the default, way ``\.{-wnew\_name}''---for example, ``\.{-wfmac.sty}''. @<Set init...@>= { out_ptr = out_buf; out_line = 1; if(input_macros) { TEX_PRINTF("\\input "); OUT_STR(*fwebmac ? fwebmac : w_style.misc.macros); /* The command line overrides the style file. */ } } @ When the `\.{@@I}'~command is used in conjunction with the command-line option `\.{-i}', we process the incoming text, but don't write it out. We need an output flag to tell us when output is allowed. @<Glob...@>= EXTERN boolean output_on SET(YES); @ When we wish to append one character~|c| to the output buffer, we write `|out(c)|'; this will cause the buffer to be emptied if it was already full. |c|~is assumed to be of type |ASCII|. If we want to append more than one character at once, we say |OUT_STR(s)|, where |s|~is a string containing the characters, or |out_del_str(s,t)| (``output a delimited string''), where~|s| and~|t| point to the same array of characters (stored as 16-bit tokens); characters from~|s| to~|t-1|, inclusive, are output. The |out_str| routine takes an |outer_char| string as an argument, since this is typically used as a print statement from inside the code. A line break will occur at a space or after a single-nonletter \TeX\ control sequence. @d out(c) { if(out_ptr >= out_end) break_out(); *(++out_ptr) = (ASCII)(c); } @d OUT_STR(s) out_str(OC(s)) @<Part 1@>=@[ SRTN out_del_str FCN((s,t)) /* output |ASCII| characters from |s| to |t-1|. */ token_pointer s C0("")@; token_pointer t C1("")@; { if(!output_on) return; // Skip output. while (s<t) out(*s++); } SRTN out_str FCN((s)) /* output characters from |s| to end of string */ CONST outer_char HUGE *s C1("")@; { if(!output_on) return; // Skip output. while (*s) out(XORD(*s++)); } @ Here we write an |outer_char| file name. We have to watch out for special characters. @<Part 1@>=@[ SRTN out_fname FCN((s)) CONST outer_char HUGE *s C1("File name to be written.")@; { ASCII a; while(*s) { a = XORD(*s++); switch(a) { @<Special string cases@>: out(@'\\'); break; } out(a); } } @ The |break_out| routine is called just before the output buffer is about to overflow. To make this routine a little faster, we initialize position~0 of the output buffer to~'\.\\'; this character isn't really output. @<Set init...@>= out_buf[0] = @'\\'; @ A long line is broken at a blank space or a newline (which may enter from a limbo string), or just before a backslash that isn't preceded by another backslash or a newline. In the latter case, a~|'%'| is output at the break. @<Part 1@>=@[ SRTN break_out(VOID) /* finds a way to break the output line */ { ASCII HUGE *k = out_ptr; /* pointer into |out_buf| */ boolean is_tex_comment = BOOLEAN(*(out_buf+1) == @'%'); if(nuweb_mode) WHILE() { if(k==out_buf) @<Print warning message, break the line, and |return|@>; if(*(k--) == @'\n') { flush_buffer(++k, NO); break; } } else WHILE() { if (k==out_buf) @<Print warning message, break the line, and |return|@>; if (*k==@' ') { flush_buffer(k,NO); break; } if (*k==@'\n' && k[-1] != @'\n') {/* Get the per-cent sign before the newline. */ *k = @'%'; flush_buffer(k,NO); // Kill off the newline. break; } if (*(k--)==@'\\' && *k!=@'\\' && *k != @'\n') { /* we've decreased |k| */ flush_buffer(k,YES); break; } } if(is_tex_comment) *(++out_ptr) = @'%'; } @ We get to this module only in unusual cases that the entire output line consists of a string of backslashes followed by a string of nonblank non-backslashes. In such cases it is almost always safe to break the line by putting a~|'%'| just before the last character. @<Print warning message...@>= { SET_COLOR(warning); printf("\n! Line had to be broken (output l. %u):\n",out_line); @.Line had to be broken@> ASCII_write(out_buf+1, out_ptr-out_buf-1); new_line; mark_harmless; flush_buffer(out_ptr-1,YES); return; } @ Here is a macro that outputs a module number in decimal notation. The number to be converted by |out_mod| is known to be less than |def_flag|, so it cannot have more than five decimal digits. If the module is changed, we output~`\.{\\*}' just after the number. @<Part 1@>=@[ SRTN out_mod FCN((n,encap)) sixteen_bits n C0("Module number.")@; boolean encap C1("Encapsulate?")@; { char s[100]; if(encap) sprintf(s,"%s%s%u%s", (char *)w_style.indx.encap_prefix, (char *)w_style.indx.encap_infix ,n , (char *)w_style.indx.encap_suffix); else sprintf(s,"%u",n); OUT_STR(s); if(chngd_module[n]) OUT_STR("\\*"); } @ The |out_name| procedure is used to output an identifier or index entry, enclosing it in braces. When we're outputting an identifier, we must escape the various special characters that may sneak in. Index entries are treated literally. @d IDENTIFIER YES @d INDEX_ENTRY NO @<Part 1@>=@[ SRTN out_name FCN((is_id,p)) boolean is_id C0("Flag to distinguish identifier/index entry.")@; name_pointer p C1("The name to be output.")@; { ASCII HUGE *k, HUGE *k_end=(p+1)->byte_start; // Pointers into |byte_mem|. boolean multi_char,non_TeX_macro; sixteen_bits mod_defined; if(!output_on) return; // Skip output. multi_char = BOOLEAN(k_end - p->byte_start > 1); if(multi_char) out(@'{');// Multiple-letter identifiers are enclosed in braces. non_TeX_macro = BOOLEAN(is_id && *p->byte_start == @'\\' && language != TEX); if(non_TeX_macro) out(@'$'); /* \Cpp\ macros (such as those like \.{\\Wcp} that would arise from |@c++ operator +=()|) must be in math mode. */ for (k=p->byte_start; k<k_end; k++) { if(is_id) switch(*k) { /* Escape the special characters in identifiers. */ case @'\\': case @'{': case @'}': /* A non-\TeX\ identifier can result from the translation of an operator name in \Cpp. For that, we shouldn't escape the opening backslash. We also assume that any braces following that macro should be interpreted literally. */ if(non_TeX_macro) break; @<Other string cases@>: out(@'\\'); } out(*k); } if(non_TeX_macro) out(@'$'); if(multi_char) out(@'}'); if(p->wv_macro) @<Output the overloaded translation@>@; /* Should do all languages here. (Sorted!). */ if(subscript_fcns && (mod_defined = p->defined_in(language))) { char temp[100]; if(output_protect) OUT_STR("\\protect"); sprintf(temp,"\\WIN%d{%d}",DEFINED_TYPE(p), mod_defined==module_count ? 0 : mod_defined); OUT_STR(temp); } } @ @<Output the overlo...@>= { WV_MACRO HUGE *w = p->wv_macro; ASCII HUGE *s = w->text; OUT_STR("\\WTeX{"); while(*s) out(*s++); out(@'}'); } @ The following can occur in identifiers recognized by \FWEB. @<Special identifier cases@>= case @'_': case @'$': case @'%': case @'#': case @'\\': out(@'\\')@; @* ROUTINES THAT COPY \TeX\ MATERIAL. During phase two, we use the subroutines |copy_limbo| and |copy_TeX| in place of the analogous |skip_limbo| and |skip_TeX| that were used in phase one. The routine |copy_comment| serves for both phases. The |copy_limbo| routine, for example, begins by outputting two kinds of \TeX\ code that it has constructed or collected. First, it writes out \TeX\ definitions for user-defined dot constants; second, it writes out any limbo text that it collected during phase one. Then it takes \TeX\ material that is not part of any module and transcribes it almost verbatim to the output file. No `\.{@@}'~signs should occur in such material except in `\.{@@@@}'~pairs; such pairs are replaced by singletons. @<Part 2@>=@[ SRTN copy_limbo(VOID) { ASCII c; @<Output default definitions for user-defined dot constants@>@; @<Output any limbo text definitions@>@; OUT_STR("\n% --- Beginning of user's limbo section ---"); flush_buffer(out_ptr,NO); WHILE() { if (loc>limit && (fin_line(), !get_line())) break; *(limit+1)=@'@@'; while (*loc!=@'@@') out(*(loc++)); // Copy verbatim to output. if (loc++<=limit) { c=*loc++; // Character after `\.{@@}'. if (ccode[c]==new_module) break; if (c!=@'z' && c!=@'Z') switch(ccode[c]) { @<Cases to set |language| and |break|@>@:@; case toggle_output: out_skip(); break; case invisible_cmnt: loc = limit + 1; // Skip entire rest of line. break; case @'@@': out(@'@@'); // $\.{@@@@} \to \.{@@}$. break; default: ERR_PRINT(W,"Double @@ required\ outside of sections"); @.Double \AT! required...@> } } } @<Output the end of limbo section@>@; } @ By the beginning of phase~2, we know about any user-defined operators in \Fortran-90 via the \.{@@v}~command. Here we output default (empty) definitions of the associated macros. The user can override these in his limbo section. @<Output default def...@>= { int k; OPERATOR *p; /* An extra blank line after \.{\\input fwebmac.sty}. */ for(k=0; k<NUM_LANGUAGES; k++) if(overloaded[k]) { flush_buffer(out_ptr,NO); break; } for(k=0; k<NUM_LANGUAGES; k++) if(overloaded[k]) { flush_buffer(out_ptr,NO); OUT_STR("% --- Overloaded operator definitions from @@v for '"); OUT_STR(lang_codes[k]); OUT_STR("' ---"); flush_buffer(out_ptr,NO); for(p=op; p<op_ptr; p++) { OP_INFO HUGE *q = p->info + k; if(q->overloaded) @<Define to \TeX\ an overloaded operator@>@; } flush_buffer(out_ptr,NO); } } @ This fragment produces output of the form ``\.{\\newbinop\{abc\}\{C\{def\}}''. See \.{fwebmac.web} to learn how such macros are defined. @<Define to \TeX\ ...@>= @{ #define TEMP_LEN 1000 outer_char temp[TEMP_LEN], outer_op_name[100]; OUT_STR("\\new"); switch(q->cat) { case unorbinop: case binop: OUT_STR("binop"); @~ break; case unop: OUT_STR("unop"); @~ break; default: OUT_STR("op"); @~ break; } STRCPY(outer_op_name,p->op_name); @~ to_outer((ASCII *)outer_op_name); SPRINTF(TEMP_LEN,temp,`"{%s}{%s}{%s} ",outer_op_name,lang_codes[k],q->defn`); OUT_STR(temp); #undef TEMP_LEN } @ Limbo text material is collected from all \.{@@l}~commands, then output verbatim here, at the beginning of phase two. We begin by writing out any default material from the style file entry \.{limbo}. @<Output any limbo text...@>= { text_pointer t = tok_start + 1; /* Default material. */ if(*w_style.misc.limbo_begin) { flush_buffer(out_ptr,NO); OUT_STR("% --- Limbo text from style-file parameter `limbo.begin' ---"); flush_buffer(out_ptr,NO); OUT_STR(w_style.misc.limbo_begin); flush_buffer(out_ptr,NO); } /* If there were any \.{@@l}~commands, they were stored in phase~1; output them now. */ if(text_ptr > t) { flush_buffer(out_ptr,NO); OUT_STR("% --- Limbo text from @@l ---"); // Header line. flush_buffer(out_ptr,NO); } /* Actual text. */ for(; t<text_ptr; t++) { out_del_str(*t,*(t+1)); flush_buffer(out_ptr,NO); } @<Initialize |tok_ptr|...@>@; } @ @<Output the end of limbo...@>= { if(*w_style.misc.limbo_end) { flush_buffer(out_ptr,NO); OUT_STR("% --- Limbo text from style-file parameter `limbo.end' ---"); flush_buffer(out_ptr,NO); OUT_STR(w_style.misc.limbo_end); flush_buffer(out_ptr,NO); } } @ @<Unused@>= if(Fortran88) { DOTS *d; flush_buffer(out_ptr,NO); for(d=dots + PREDEFINED_DOTS; d->code; d++) if(d->code == dot_const) fprintf(tex_file,"\\newdot{%s}{} ",d->symbol); if(d-dots > PREDEFINED_DOTS + 1) flush_buffer(out_ptr,NO); } @ A fragment that toggles the output switch. This is used in conjunction with the \.{@@i}~command, which is translated into a |toggle_output|. @<Glob...@>= EXTERN boolean strt_off SET(NO), ending_off SET(NO); @ @<Toggle output@>= { static int outer_include_depth; if(output_on) { if(phase==2) { flush_buffer(out_ptr,NO); } outer_include_depth = incl_depth; output_on = NO; } else if(incl_depth <= outer_include_depth) { output_on = YES; } } @ While appending code text, store the state of the output. @ @<Store the output switch@>= { if(output_on) app(Turn_output_on); else { app(force); /* If we don't do this, output is turned off before the contents of the last line are printed. */ app(turn_output_off); } app_scrap(ignore_scrap,no_math); } @ While appending code text, store the state of the output. @ @<Store output switch and \.{\\Wskipped}@>= { if(output_on) app(Turn_output_on); else { app(force); app(Turn_output_off); } app_scrap(ignore_scrap,no_math); } @ The |copy_TeX| routine processes the \TeX\ code at the beginning of a module; for example, the words you are now reading were copied in this way. It returns the next control code or~`\v' found in the input. Lines that consist of all spaces are made empty; spaces between the beginning of a line and an \.{@@}~command are stripped away. (Unlike the original design, we leave tab marks in, since some users use those as active characters.) This makes the test for empty lines in |fin_line| work. @<Part 2@>=@[ eight_bits copy_TeX(VOID) { ASCII c; // Current character being copied. WHILE() { if (loc>limit) { @<Delete run of spaces between beginning of line and present position@>@; fin_line(); if(!get_line()) return new_module; } *(limit+1)=@'@@'; scan: while ((c=*(loc++))!=@'|' && c!=@'@@') { if(c==interior_semi) c = @';'; out(c); // Copy \TeX\ verbatim to output. #if(0) if (out_ptr==out_buf+1 && (c==@' ' || c==tab_mark )) out_ptr--; #endif } if (c==@'|') return @'|'; // Beginning of code mode. if (loc<=limit) { /* Found an \.{@@}. */ eight_bits cc; if(*loc == @'@@') { out(@'@@'); loc++; goto scan; } @<Delete run of spaces...@>@; SET_CASE(*loc); if( (cc = ccode[*(loc++)]) != big_line_break) return cc; if(loc >= limit) return cc; @<Process possible pre...@>; // An `\.{@@\#\dots}' command. return cc; // A |big_line_break| command. } } DUMMY_RETURN(ignore); } @ If there are only spaces between the beginning of the output buffer and the present position |out_ptr|, delete those spaces. @<Delete run of spaces...@>= { ASCII HUGE *b; for(b=out_buf+1; b<=out_ptr; b++) if(*b != @' ') break; if(b > out_ptr) out_ptr = out_buf; } @ A flag lets us know when we're processing a comment. @<Glob...@>= EXTERN boolean in_comment; @ The |copy_comment| function issues a warning if more braces are opened than closed, and in the case of a more serious error it supplies enough braces to keep \TeX\ from complaining about unbalanced braces. (Because of a bug inherited from \CWEB, this doesn't work right if there is a construction such as~`\.{\\\{}' in the comment.) Instead of copying the \TeX\ material into the output buffer, this function copies it into the token memory. The abbreviation |app_tok(t)| is used to append token~|t| to the current token list, and it also makes sure that it is possible to append at least one further token without overflow. @d app_tok(c) {if (tok_ptr+2>tok_m_end) OVERFLW("tokens",ABBREV(max_toks_w)); app(c);} @<Part 2@>=@[ int copy_comment FCN((bal)) /* copies \TeX\ code in comments */ int bal C1("Brace balance.")@; { ASCII c; //* Current character being copied. char terminator[2]; token_pointer tok_ptr0 = tok_ptr; in_comment = YES; terminator[0] = *limit; @~ terminator[1] = *(limit+1); *limit = @' '; /* Space to implement continued line. Short commands will be ended by this space. */ /* Especially when it comes to stars and asterisks, we need to know when we're copying \TeX. Since this is actually going into token memory instead of being transcribed directly to the output, we append the |copy_mode| flag to help us know where we are. For this to work properly, one must return only from the bottom of this function, because we append another |copy_mode| at the bottom. */ if(phase == 2) app_tok(copy_mode); WHILE() { if(loc > limit) @<Continue comment if necessary@>@; // Get the next character. Convert a run of tabs into one tab. if(language==TEX) c = *loc++; else do c = *(loc++); while(c == tab_mark); if (c==@'|') break; // Found beginning of code mode. if (c==@'*' && *loc==@'/' && long_comment) { loc++; // Position after `\.{\starslash}'. @<Finish comment and |break|@>; } /* It looks better in the \.{tex} file if tabs are replaced by spaces. Presumably this won't harm anything else. */ if (phase==2) @<Append comment text@>@; @<Copy special things when |c=='@@', '\\', '{', '}'|@>; } if(phase == 2) app_tok(copy_mode); // Negate the copying mode. *limit = terminator[0]; @~ *(limit+1) = terminator[1]; if(!long_comment && *limit == @'@@' && loc > limit) loc = limit; in_comment = NO; return bal; } @ @<Continue comment if nec...@>= { if(!(long_comment || language==TEX)) { // End of short comment. if(auto_semi && *(tok_ptr-2) == @';' && *(tok_ptr-1) == @' ') tok_ptr -= 2; /* Strip trailing spaces. */ while(*(tok_ptr-1) == @' ') tok_ptr--; /* If the last space happened to be escaped, kill the escape. */ if(*(tok_ptr-1) == @'\\' && *(tok_ptr-2) != @'\\') tok_ptr--; /* Kill the trailing end-of-comment. */ if(*(tok_ptr-2)==@'*' && *(tok_ptr-1)==@'/') tok_ptr -= 2; @<Finish comment and |break|@>@; } if (!get_line()) { if(language!=TEX) ERR_PRINT(W,"Input ended in mid-comment"); @.Input ended in mid-comment@> loc=cur_buffer+1; @<Clear |bal| and |break|@>; } /* For \TeX, we concatenate adjacent lines that all begin with comment characters. */ if(language==TEX) { if(loc==limit) @<Finish comment...@>@; for(;loc <= limit; loc++) if(*loc!=@' ' && *loc!=tab_mark) break; if(loc > limit) continue; if(TeX[*loc] == TeX_comment) loc++; else { // Unskip the white space. loc = cur_buffer; @<Finish comment...@>@; } } } @ During phase~2, we must actually append the text character by character. That's essentially straightforward, but a few replacements are made. @<Append comment text@>= switch(c) { case tab_mark: if(language==TEX) APP_STR("\\quad"); else app_tok(@' '); break; case interior_semi: app_tok(@';'); break; case @'%': if(language==TEX) app_tok(@'\\'); app_tok(c); break; default: /* Basically, we just append the present character here. However, compiler directives need to be escaped. */ if(doing_cdir) switch(c) { @<Special string cases@>: app_tok(@'\\'); } app_tok(c); break; } @ This fragment finishes off a comment, ensuring that braces are properly balanced. @<Finish comment...@>= if(bal==1) { if (phase==2) { if(language==TEX) @<Check for a null \TeX\ comment@>@; app_tok(@'}'); } bal = 0; break; } else { ERR_PRINT(W,"Braces don't balance in comment"); @.Braces don't balance in comment@> @<Clear |bal| and |break|@>; } @ @<Check for a null ...@>= { token_pointer t; for(t=tok_ptr-1; t>tok_ptr0; t--) if(*t != @' ') break; if(t == tok_ptr0 && *(t-4)==@'\\' && *(t-3)==@'W' && *(t-2)==@'C' && *(t-1)==@'{') *(tok_ptr0-2) = @'x'; // Change \.{\\WC} to \.{\\Wx}. } @ @<Copy special things when |c=='@@'...@>= if (c==@'@@') { if (*(loc++)!=@'@@') { ERR_PRINT(W,"Illegal use of @@ in comment"); @.Illegal use of \AT!...@> loc-=2; if (phase==2) tok_ptr--; @<Clear |bal|...@>; } } else if (c==@'\\' && *loc!=@'@@' && phase==2) app_tok(*(loc++))@; else if (c==@'{') bal++; else if (c==@'}') bal--; @ When the comment has terminated abruptly due to an error, we output enough right braces to keep \TeX\ happy. @<Clear |bal|...@>= app_tok(@' '); /* this is done in case the previous character was~`\.\\' */ while (bal-- >0) app_tok(@'}'); bal = 0; break; @i scraps.hweb /* Declarations related to the scraps and productions. */ @ @<Alloc...@>= ALLOC(scrap,scrp_info,ABBREV(max_scraps),max_scraps,0); scrp_end=scrp_info+max_scraps -1; /* end of |scrp_info| */ @ @<Set init...@>= scrp_base=scrp_info+1; mx_scr_ptr=scrp_ptr=scrp_info; @* INITIALIZING the SCRAPS. If we are going to use the powerful production mechanism just developed, we must get the scraps set up in the first place, given a \cee\ text. A table of the initial scraps corresponding to \cee\ tokens appeared above in the section on parsing; our goal now is to implement that table. We shall do this by implementing a subroutine called |C_parse| that is analogous to the |C_xref| routine used during phase one. Like |C_xref|, the |C_parse| procedure starts with the current value of |next_control| and it uses the operation |next_control=get_next()| repeatedly to read \cee\ text until encountering the next~`\v' or comment, or until |next_control>=formatt|. The scraps corresponding to what it reads are appended into the |cat| and |trans| arrays, and |scrp_ptr| is advanced. @<Glob...@>= EXTERN boolean scanning_meta SET(NO); @ @<Part 2@>=@[ SRTN C_parse FCN((mode0)) /* Creates scraps from \cee\ tokens */ PARSING_MODE mode0 C1("")@; { name_pointer p; // Identifier designator. LANGUAGE language0 = language; // Save the incoming language. PARSE_PARAMS parse_params0; parse_params0 = parse_params; // Save parsing state. parsing_mode = mode0; if(parsing_mode == INNER) { // Start fresh for parsing interior code. at_beginning = YES; preprocessing = NO; } while (next_control<formatt) { if(nuweb_mode && parsing_mode == INNER) @<Append a verbatim scrap@>@; else { @<Append the scrap appropriate to |next_control|@>; next_control = get_next(); } if (next_control==@'|' || next_control==begin_comment) break; if(next_control == begin_language && !ok_to_define && parsing_mode == OUTER) return; } /* If the language has changed, append stuff to restore it. */ if(language != language0) { app_tok(begin_language); app(lan_num(language0)); app_scrap(ignore_scrap,no_math); } if(parsing_mode == INNER) parse_params = parse_params0; // Restore incoming values. } @ This fragment is a simple kludge; it doesn't handle various cases gracefully, such as `\.{||}'. @<Append a verbatim s...@>= { while(loc < limit) { if(*loc == @'|') { next_control = *loc++; break; } app(*loc++); } app_scrap(ignore_scrap, no_math); } @ The following macro is used to append a scrap whose tokens have just been appended. Note that mathness is stored in the form $4(\hbox{\it right boundary}) + \hbox{\it left boundary}$. Thus, noting that $5b = 4b + b$, we see that the construction~$5b$ makes the left- and right-hand boundaries equal. @d app_scrap(c,b)@/ (++scrp_ptr)->cat = (eight_bits)(c); scrp_ptr->trans = text_ptr; scrp_ptr->mathness = (eight_bits)(5*(b)); /* Make left and right boundaries equal. */ freeze_text@; @<Part 2@>=@[ SRTN set_language FCN((language0)) LANGUAGE language0 C1("")@; { char language_line[50]; language = language0; app_tok(begin_language); app(lan_num(language)); if(parsing_mode == OUTER) { sprintf(language_line,"\\LANGUAGE{%s}", (char *)LANGUAGE_CODE(language)); APP_STR(language_line); @.\\LANGUAGE@> } app_scrap(language_scrap,no_math); } @ Operator overloading. @<Glob...@>= EXTERN boolean overloaded[NUM_LANGUAGES]; EXTERN BUF_SIZE op_entries; /* Length for dynamic array. */ EXTERN OPERATOR HUGE *op, HUGE *op_end; /* Dynamic array of entries for operator overloading. */ EXTERN OPERATOR HUGE *op_ptr; /* Next open position in |OP|. */ @ Initializing operators is conveniently handled by macros. /* Initialize an ordinary operator such as~`\.+'. */ @d INIT_OP(op_code,op_name,lang,op_macro,cat) init_op((eight_bits)(op_code),OC(op_name),(int)(lang),OC(op_macro), NO,cat,(CONST outer_char *)NULL) /* Initialize a compound assignment operator such as~`\.{+=}'. */ @d INIT_CA(ca_index,op_name,lang,op_macro,cat) assignment_token = ca_index; INIT_OP(compound_assignment,OC(op_name),(int)(lang),OC(op_macro),cat)@; /* Initialize a dot operator such as~`\.{.NE.}'. */ @d INIT_DOT(op_name,lang,op_macro,cat) init_op((eight_bits)identifier,OC(op_name),(int)(lang),OC(op_macro), NO,cat,(CONST outer_char *)NULL) @d ALL_LANGUAGES ((int)C | (int)C_PLUS_PLUS | (int)FORTRAN | (int)FORTRAN_90 | (int)(RATFOR) | (int)(RATFOR_90) | (int)LITERAL) @d ONLY_C_like ((int)C | (int)C_PLUS_PLUS) @d ALL_BUT_C_like (~ONLY_C_like) @d ALL_BUT_Cpp ((int)C | (int)FORTRAN | (int)FORTRAN_90 | (int)(RATFOR) | (int)(RATFOR_90) | (int)LITERAL) @<Alloc...@>= { int l; for(l=0; l<NUM_LANGUAGES; l++) overloaded[l] =NO; ALLOC(OPERATOR,op,ABBREV(op_entries),op_entries,0); op_end = op + op_entries; op_ptr = op + 128; /* The first 128 are for direct indexing. */ @<Initialize ordinary operators@>; @<Initialize compound assignment operators@>; } @ @<Initialize ordinary op...@>= INIT_OP(@'!',"NOT",ALL_LANGUAGES,"\\WR",unop); // `|!|' INIT_DOT("NOT",ALL_BUT_C_like,"\\WR",unop); @.\\WR@> @..NOT.@> INIT_OP(@'%',"MOD",ALL_LANGUAGES,"\\MOD",binop); // `|%|' @.\\MOD@> INIT_OP(@'&',"LAND",C,"\\amp",unorbinop); /* `|&|'. */ INIT_OP(@'&',"LAND",C_PLUS_PLUS,"\\amp",reference); @.\\amp@> INIT_OP(@'&',"LAND",ALL_BUT_C_like,"\\AND",binop); // `|@r &|' @.\\AND@> INIT_OP(@'*',"STAR",ALL_LANGUAGES,"\\ast",unorbinop); // `|*|' @.\\ast@> INIT_OP(@'+',"PLUS",ALL_LANGUAGES,"+",unorbinop); // `|+|' INIT_OP(@'-',"MINUS",ALL_LANGUAGES,"-",unorbinop); // `|-|' INIT_OP(@'/',"SLASH",ALL_LANGUAGES,"/",binop); // `|/|' INIT_OP(@'<',"LT",ALL_BUT_Cpp,"<",binop); // `|<|' INIT_OP(@'<',"LT",C_PLUS_PLUS,"<",langle); // `|<|' INIT_DOT("LT",ALL_BUT_C_like,"<",binop); @..LT.@> INIT_OP(@'=',"EQUALS",ALL_LANGUAGES,"=",binop); // `|=|' INIT_OP(@'>',"GT",ALL_BUT_Cpp,">",binop); // `|>|' INIT_OP(@'>',"GT",C_PLUS_PLUS,">",rangle); // `|>|' INIT_DOT("GT",ALL_BUT_C_like,">",binop); @..GT.@> INIT_OP(@'?',"QUESTION",ONLY_C_like,"\\?",question); // `|?|' @.\\?@> INIT_OP(@'^',"CARET",ALL_LANGUAGES,"\\^",binop); // `|x^y|' @.\\\^@> INIT_OP(@'|',"OR",ALL_LANGUAGES,"\\OR",binop); // `$\OR$' @.\\OR@> INIT_OP(@'~',"TILDE",ONLY_C_like,"\\TLD",unop); @.\\TL@> INIT_OP(not_eq,"NE",ALL_LANGUAGES,"\\WI",binop); /* `|!=|' */ INIT_DOT("NE",ALL_BUT_C_like,"\\WI",binop); @.\\WI@> @..NE.@> INIT_OP(lt_eq,"LE",ALL_LANGUAGES,"\\WL",binop); /* `|<=|' */ INIT_DOT("LE",ALL_BUT_C_like,"\\WL",binop); @.\\WL@> @..LE.@> INIT_OP(gt_eq,"GE",ALL_LANGUAGES,"\\WG",binop); /* `|>=|' */ INIT_DOT("GE",ALL_BUT_C_like,"\\WG",binop); @.\\WG@> INIT_OP(eq_eq,"EQ",ALL_LANGUAGES,"\\WS",binop); /* `|==|' */ INIT_DOT("EQ",ALL_BUT_C_like,"\\WS",binop); @.\\WS@> @..EQ.@> INIT_OP(and_and,"AND",ALL_LANGUAGES,"\\WW",binop); /* `|&&|' */ INIT_DOT("AND",ALL_BUT_C_like,"\\WW",binop); @.\\WW@> @..AND.@> INIT_OP(or_or,"OR",ALL_LANGUAGES,"\\WV",binop); /* `||| |' */ INIT_DOT("OR",ALL_BUT_C_like,"\\OR",binop); @.\\WV@> @..OR.@> INIT_OP(plus_plus,"PP",ALL_LANGUAGES,"\\PP",unop); // `|++|' @.\\PP@> INIT_OP(minus_minus,"MM",ALL_LANGUAGES,"\\MM",unop); // `|--|' @.\\MM@> INIT_OP(minus_gt,"EQV",ONLY_C_like,"\\MG",binop); /* `|->|' */ @.\\MG@> INIT_OP(minus_gt,"EQV",ALL_BUT_C_like,"\\EQV",binop); /* `|@r .eqv.|' */ INIT_DOT("EQV",ALL_BUT_C_like,"\\EQV",binop); @.\\EQV@> @..EQV.@> INIT_OP(gt_gt, "RSHIFT",ONLY_C_like,"\\GG",binop); // `|>>|' @.\\GG@> INIT_OP(lt_lt,"LSHIFT",ONLY_C_like,"\\LL",binop); // `|<<|' @.\\LL@> INIT_OP(star_star,"EE",ALL_LANGUAGES,"\\EE",exp_op); /* `\.{**}' */ @.\\EE@> INIT_OP(slash_slash,"SlSl",ALL_BUT_C_like,"\\SlSl",binop); /* `|@r \/|' */ @.\\SlSl@> INIT_OP(ellipsis,"NEQV",ALL_BUT_C_like,"\\NEQV",binop); // `|@r .NEQV.|' INIT_DOT("NEQV",ALL_BUT_C_like,"\\NEQV",binop); INIT_DOT("XOR",ALL_BUT_C_like,"\\NEQV",binop); @..NEQV.@> @..XOR.@> INIT_DOT("FALSE",ALL_BUT_C_like,"\\FALSE",expr); // `|@r .false.|' @..FALSE.@> INIT_DOT("TRUE",ALL_BUT_C_like,"\\TRUE",expr)@; // `|@r .true.|' @..TRUE.@> @ @<Initialize compound...@>= INIT_CA(plus_eq,"Wcp",ALL_LANGUAGES,"\\Wcp",binop); // `|+=|' @.\\Wcp@> INIT_CA(minus_eq,"Wcm",ALL_LANGUAGES,"\\Wcm",binop); // `|-=|' @.\\Wcm@> INIT_CA(star_eq,"Wcs",ALL_LANGUAGES,"\\Wcs",binop); // `|*=|' @.\\Wcs@> INIT_CA(slash_eq,"Wcv",ALL_LANGUAGES,"\\Wcv",binop); // `|/=|' @.\\Wcv@> INIT_CA(mod_eq,"Wcd",ONLY_C_like,"\\Wcd",binop); // `|%=|' @.\\Wcd@> INIT_CA(xor_eq,"Wcx",ONLY_C_like,"\\Wcx",binop); // `|^=|' @.\\Wcx@> INIT_CA(and_eq,"Wca",ONLY_C_like,"\\Wca",binop); // `|&=|' @.\\Wca@> INIT_CA(or_eq,"Wco",ONLY_C_like,"\\Wco",binop); // `||=|' @.\\Wco@> INIT_CA(gt_gt_eq,"Wcg",ONLY_C_like,"\\Wcg",binop); // `|>>=|' @.\\Wcg@> INIT_CA(lt_lt_eq,"Wcl",ONLY_C_like,"\\Wcl",binop)@; // `|<<=|' @.\\Wcl@> @ Initializing an operator involves several possibilities. If the operator's code is less than~128, the info is put directly into the corresponding table position. Otherwise, as for a new dot constant, we search through the positions $>= 128$ and insert it at the first available slot. @<Part 3@>=@[ SRTN init_op FCN((op_code,op_name,lang,op_macro,overload,cat,defn)) eight_bits op_code C0("The operator")@; CONST outer_char op_name[] C0("Fortran-like name of the operator")@; int lang C0("Union of all allowable languages for this def")@; CONST outer_char op_macro[] C0("Default macro expansion")@; boolean overload C0("Do we overload?")@; eight_bits cat C0("Category code")@; CONST outer_char defn[] C1("Replacement text for overloaded macro")@; { OPERATOR HUGE *p; int k,l; /* The dot constants won't be in the table yet. Just put them there. */ if(op_code == identifier) p = op_ptr++; // Next free position for a dot op. else if(!(p=valid_op(op_code))) { err_print(W,"Invalid op code %d",op_code); return; } p->op_name = GET_MEM("op name",STRLEN(op_name)+1,ASCII); STRCPY(p->op_name,op_name); to_ASCII((outer_char *)p->op_name); /* Access the languages by bit-shifting with~|l|. */ for(k=0,l=1; k<NUM_LANGUAGES; k++,l<<=1) if(lang & l) { OP_INFO HUGE *q = p->info + k; q->op_macro = op_macro; overloaded[k] |= (q->overloaded = overload); q->cat = cat; if(defn) q->defn = (outer_char HUGE *)defn; } } @ A storage variable. @<Glob...@>= EXTERN eight_bits last_control; @ Here we translate |next_control| into text characters, which are stored in memory. @<Append the scrap appropriate to |next_control|@>= { room_for(6,4,4); // Is there enough room? (Check and justify these numbers!!!) if(next_control) lst_ampersand = NO; switch (next_control) { case macro_module_name: @<Append a module name@>@; break; case stmt_label: case stringg: case constant: case verbatim: @<Append a string or constant@>; break; case begin_format_stmt: in_format = YES; case identifier: @<Append an identifier scrap@>; break; case TeX_string: @<Append a \TeX\ string scrap@>; break; case begin_language: @<Append scraps for |begin_language|@>; break; case new_output_file: @<Append the output file name@>@; break; case toggle_output: @<Toggle output@>@; @<Store output switch and \.{\\Wskipped}@>@; break; case macro_space: app(@' '); app_scrap(space,maybe_math); break; @<Cases involving single ASCII characters@>@:@; @<Cases involving nonstandard ASCII characters@>@:@; @<Cases involving special \WEB\ commands@>@:@; default: app(next_control); app_scrap(ignore_scrap,maybe_math); break; } } @ Check against possible overflow. @<Part 3@>=@[ SRTN room_for FCN((ntokens,ntexts,nscraps)) int ntokens C0("")@; int ntexts C0("")@; int nscraps C1("")@; { if(tok_ptr+ntokens>tok_m_end) { if (tok_ptr>mx_tok_ptr) mx_tok_ptr=tok_ptr; OVERFLW("tokens",ABBREV(max_toks_w)); } if(text_ptr+ntexts>tok_end) { if (text_ptr>mx_text_ptr) mx_text_ptr=text_ptr; OVERFLW("texts",ABBREV(max_texts)); } if (scrp_ptr+nscraps>scrp_end) { if (scrp_ptr>mx_scr_ptr) mx_scr_ptr=scrp_ptr; OVERFLW("scraps",ABBREV(max_scraps)); } } @ Some nonstandard ASCII characters may have entered \.{WEAVE} by means of standard ones. They are converted to \TeX\ control sequences so that it is possible to keep \.{WEAVE} from stepping beyond standard ASCII. @<Cases involving nonstandard...@>= /* Overloaded operators can be defined dynamically in \FORTRAN-88. These are generically labelled by |dot_const|. The |dot_code| routine fills the structure |dot_op| with the macro name and category corresponding to the operator. */ case dot_const: next_control = identifier; id_first = dot_op.name + 1; id_loc = id_first + STRLEN(id_first); app_overload(); break; case eq_gt: APP_STR("\\WPtr"); /* `$\WPtr$' */ app_scrap(binop,yes_math); break; @.\\WPtr@> case ellipsis: if(C_LIKE(language)) { APP_STR("\\dots"); /* `|...|' */ @.\\dots@> app_scrap(int_like,maybe_math); } else app_overload(); break; case not_eq: case lt_eq: case gt_eq: case eq_eq: case and_and: case or_or: case plus_plus: case minus_minus: case minus_gt: case gt_gt: case lt_lt: case star_star: case slash_slash: case compound_assignment: app_overload(); @~ break; case paste: APP_STR("\\NN"); /* `|##|' */ app_scrap(ignore_scrap,maybe_math); break; @.\\NN@> case dont_expand: APP_STR("\\NP"); /* `|#!|' */ app_scrap(ignore_scrap,maybe_math); break; @.\\NP@> case auto_label: APP_STR("\\NC"); /* `|#:|' */ app_scrap(ignore_scrap,maybe_math); break; @.\\NC@> case all_variable_args: APP_STR("\\ND"); // `|#.| app_scrap(expr,maybe_math); break; @.\\ND@> case colon_colon: if(C_LIKE(language)) { @.\\CC@> APP_STR("\\CC"); // `|a::b|' app_scrap(unop,yes_math); } else { APP_STR("\\CF"); // `|@r a::b|' @.\\CF@> app_scrap(binop,yes_math); } break; case left_array: APP_STR("\\LS"); // `|@r (/|' @.\\LS@> app_scrap(lpar,yes_math); break; case right_array: APP_STR("\\SR"); // `|@r /)|' @.\\SR@> app_scrap(rpar,yes_math); break; @ @<Cases involving special...@>= case force_line: APP_STR("\\]"); app_scrap(ignore_scrap,yes_math); break; case thin_space: APP_STR("\\,"); app_scrap(ignore_scrap,yes_math); break; case math_break: app(opt); @~ APP_STR("0"); app_scrap(ignore_scrap,yes_math); break; case line_break: app(force); app_scrap(ignore_scrap,no_math); break; case left_preproc: app(force); if(parsing_mode==OUTER) APP_STR("\\4"); // Backspace for beauty. app_scrap(lproc,no_math); break; case right_preproc: app(force); app_scrap(rproc,no_math); break; case no_mac_expand: APP_STR("\\WTLD"); app_scrap(expr,maybe_math); break; case begin_meta: @<Process |begin_meta|@>@; break; case end_meta: if( !nuweb_mode && ((FORTRAN_LIKE(language) && !free_form_input) || (language==TEX)) ) @<Set up column mode@>@; get_line(); APP_STR(w_style.misc.meta.code.end); app(force); app_scrap(ignore_scrap,no_math); scanning_meta = NO; break; @.\\WBM@> @.\\WEM@> case big_line_break: app(big_force); app_scrap(ignore_scrap,no_math); break; case no_line_break: app(big_cancel); @~ APP_STR("\\ ");@~ app(big_cancel); app_scrap(ignore_scrap,no_math); break; case pseudo_expr: app_scrap(expr,maybe_math); @~ break; case pseudo_semi: app_scrap(semi,maybe_math); @~ break; case pseudo_colon: app_scrap(colon,maybe_math); @~ break; case join: APP_STR("\\WJ"); app_scrap(ignore_scrap,no_math); break; @.\\WJ@> case protect_code: ERR_PRINT(W,"You can't do that in code text"); @.You can't do that...@> break; @ @<Process |begin_meta|@>= { if(!nuweb_mode) app(force); app(toggle_meta); APP_STR(w_style.misc.meta.code.begin); column_mode = NO; scanning_meta = YES; WHILE() { if(loc >= limit) // !!!!! { app(@'\n'); if(!get_line()) break; } while(loc < limit) { if(*loc == @'@@') @<Check for end of meta-comment and |goto done_meta@;| if necessary@>@; app(*loc++); } } done_meta: APP_STR(w_style.misc.meta.code.end); app(toggle_meta); if(!nuweb_mode) app(force); app_scrap(ignore_scrap,no_math); scanning_meta = NO; } @ @<Check for end of meta-comment...@>= { switch(ccode[*(loc+1)]) { case @'@@': loc++; break; case end_meta: if( !nuweb_mode && ((FORTRAN_LIKE(language) && !free_form_input) || (language==TEX)) ) @<Set up column mode@>@; get_line(); goto done_meta; case invisible_cmnt: if(*(loc+2) == @'%') eat_blank_lines = YES; app(@'\n'); get_line(); if(eat_blank_lines) { eat_blank_lines = NO; while(loc >= limit) if(!get_line()) goto done_meta; } continue; case new_module: goto done_meta; // !!!!! case line_break: if(loc[2] == @'*' || loc[2] == @'/') { /* Verbatim comment. */ loc++; break; } /* Falls through! */ case thin_space: case no_line_break: case join: case pseudo_semi: case pseudo_expr: case pseudo_colon: case compiler_directive: case Compiler_Directive: case no_index: case yes_index: case begin_bp: case insert_bp: loc += 2; continue; case big_line_break: break; // To handle preprocessor statements easily. default: if(nuweb_mode) goto done_meta; // !!!!! break; } } @ @<Cases involving single...@>= case @'\\': APP_STR("\\ttBS"); app_scrap(ignore_scrap,no_math); break; case @'\n': app(@' '); app_scrap(newline,maybe_math); break; case @'/': if(in_format) { app(next_control); app_scrap(expr,no_math); /* ``|@r format(//e10.5/f5.2)|'' */ } else if(in_data) { app(@'{'); @~ app(next_control); @~ app(@'}'); app_scrap(slash_like,maybe_math); } else { app_overload(); /* ``|a/b|'' */ } break; case @'.': app(next_control); app_scrap(binop,yes_math); break; case @'+':/* Handle \FORTRAN's |@r +1.0|; now also ANSI~C: ``|x = +2.5;|'' */ case @'<': case @'>': case @'=': case @'%': case @'!': case @'~': case @'-': case @'*': case @'|': case @'?': case @'^': app_overload(); @~ break; case @'&': lst_ampersand = YES; app_overload(); @~ break; case @'#': switch(*loc) { case @'\'': APP_STR("\\Nq"); loc++; break; case @'"': APP_STR("\\NQ"); loc++; break; default: APP_STR("\\#"); break; } app_scrap(expr,maybe_math); break; case ignore: case xref_roman: case xref_wildcard: case xref_typewriter: break; case @'(': app(next_control); app_scrap(lpar,maybe_math); break; case @')': app(next_control); app_scrap(rpar,maybe_math); break; case @'[': app(next_control); app_scrap(lbracket,yes_math); break; case @']': app(next_control); app_scrap(rbracket,yes_math); break; case @'{': APP_STR("\\{"); app_scrap(lbrace,yes_math); break; case @'}': APP_STR("\\}"); app_scrap(rbrace,yes_math); break; case @',': app(@','); app_scrap(comma,maybe_math); break; case end_format_stmt: in_format = NO; /* Falls through to the next case, which appends the semi. */ case interior_semi: in_data = NO; app(@';'); app_scrap(semi,maybe_math); break; case @';': in_data = NO; if(!is_FORTRAN_(language) || prn_semis) app(@';'); app_scrap(semi,maybe_math); break; case @':': app(@':'); app_scrap(colon,maybe_math); break; case @'`': @#if 0 if(!ok_to_define) { APP_STR("\\LA"); app_scrap(expr,maybe_math); } else { q_protected = BOOLEAN(!q_protected); app(q_protected ? @'{' : @'}'); app_scrap(expr,yes_math); } @#endif APP_STR("\\LA"); app_scrap(expr,maybe_math); break; @ @<Append scraps for |begin_language|@>= switch(language) { case NO_LANGUAGE: CONFUSION("append scraps for begin_language", "Language isn't defined"); case C: case C_PLUS_PLUS: case LITERAL: column_mode = NO; @~ break; case FORTRAN: case FORTRAN_90: case RATFOR: case RATFOR_90: if(mode0==OUTER && !free_form_input) @<Set up column mode@>@; break; case TEX: if(mode0==OUTER) @<Set up col...@>@; break; case NUWEB_OFF: case NUWEB_ON: CONFUSION("append scraps for begin_language","Invalid language"); } set_language(language); break@; @ The following function returns a pointer to an |OPERATOR| structure, or |NULL| if there's something invalid about the operator. Identifiers must be searched for explicitly. If an identifier isn't there, it's put into the table. @<Part 3@>=@[ OPERATOR HUGE *valid_op FCN((op_code)) eight_bits op_code C1("")@; { int n = 0; OPERATOR HUGE *p; switch(op_code) { case @'/': case @'+': case @'<': case @'>': case @'=': case @'%': case @'!': case @'~': case @'-': case @'*': case @'&': case @'|': case @'?': case @'^': case ellipsis: case not_eq: case lt_eq: case gt_eq: case eq_eq: case and_and: case or_or: case plus_plus: case minus_minus: case minus_gt: case gt_gt: case lt_lt: case star_star: case slash_slash: p = op + op_code; if(p >= op + 128) CONFUSION("valid_op", "Operator out of range"); return p; case compound_assignment: if(assignment_token==or_or_or) return op + @'|'; p = op + CA_START + assignment_token; if(p >= op + 128) CONFUSION("valid_op", "Compound assignment operator out of range"); return p; case dot_const: if(!FORTRAN_LIKE(language)) return NULL; id_first = dot_op.name + 1; id_loc = id_first + STRLEN(id_first); case identifier: if(!FORTRAN_LIKE(language)) return NULL; /* Can do names only in \Fortran. */ @<Add an operator to the table, if necessary, and |return p@;|@>@; } return NULL; } @ @<Add an operator...@>= { ASCII id[255]; STRNCPY(id,id_first,n=PTR_DIFF(int,id_loc,id_first)); id[n] = '\0'; // Make into proper string. for(p=op+128; p<op_ptr; p++) if(STRCMP(p->op_name,id) == 0) return p; if(op_ptr >= op_end) OVERFLW("op table","op"); p->op_name = GET_MEM("op name",n+1,ASCII); STRCPY(p->op_name,id); op_ptr++; return p; } @ The form in which operators are appended depends on whether they have been overloaded with an \.{@@v}~command or not. If they have not, they are are appended as a straight macro name, such as the translation of `\.{.FALSE.}' into `\.{\\FALSE}'. If they have been overloaded, they are appended instead as a construction such as `\.{\\Wop\{FALSE\}\{N\}}'; the output limbo section will then contain an automatically generated definition such as `\.{\\newop\{FALSE\}\{N\}\{\\\{.FALSE.\}\}}'. This defines the macro \.{\\\_FALSE\_N} to have the definition specified in the \.{@@v}~command. @<Part 3@>=@[ SRTN app_overload(VOID) { int ln = language_num; OPERATOR HUGE *p = valid_op(next_control); OP_INFO HUGE *q = p->info + ln; char temp[10]; if(overload_ops && q->overloaded) { switch(q->cat) { case unorbinop: case binop: APP_STR("\\Wb{"); @~ break; case unop: APP_STR("\\Wu{"); @~ break; default: APP_STR(" \\Wop{"); @~ break; } app_ASCII_str(p->op_name); sprintf(temp,"}{%s}",lang_codes[ln]); APP_STR(temp); } else if(q->op_macro) APP_STR(q->op_macro); else { err_print(W,"Unidentifiable dot constant in language %s. Missing @@v?", languages[ln]); APP_STR("\\Wunknown{"); app(wt_style.dot_delimiter.begin); app_ASCII_str(p->op_name); app(wt_style.dot_delimiter.end); app(@'}'); app_scrap(binop,yes_math); return; } app_scrap(q->cat,yes_math); } @ The following code must use |app_tok| instead of |app| in order to protect against overflow. Note that |tok_ptr+1<=max_toks| after |app_tok| has been used, so another |app| is legitimate before testing again. Many of the special characters in a string must be prefixed by '\.\\' so that \TeX\ will print them properly. @^special string characters@> @<Append a string or...@>= if(next_control == stmt_label && !isDigit(*id_first)) /* Identifier as statement label. */ { p = id_lookup(id_first,id_loc,normal); APP_FLAG(id,p,name_dir); app_scrap(label,no_math); } else { if (next_control==constant || next_control==stmt_label) APP_STR("\\WO{"); @.\\WO@> else if (next_control==stringg) @<Append commands for beginning of string@>@; @.\\.@> else APP_STR("\\={"); @.\\=@> @<Append the basic string@>@; if(next_control==stmt_label) {app_scrap(label,no_math);} else {app_scrap(expr,yes_math);} } @ @<Append commands for beginning of string@>= { APP_STR(pfmt->typewritr); app_tok(@'{'); } @ Here we append the string material within [|id_first|,|id_loc|). This is basically straightforward; however, commas are replaced by~`\.{\\1}' (which will be treated as a comma followed by a discretionary break), the |discretionary_break| code is replaced by~`\.{\\2}' (which will be treated as a discretionary break), the |ordinary_space| code is replaced by~`\.{\\2}' (which is treated as an ordinary space, not~`\.{\ }'), and the |tab_mark| code (which will be present only in \TeX\ mode) is replaced by~`\.{\\3}', which is defined in \.{fwebmac.web} to be several spaces. @<Append the basic str...@>= { while (id_first<id_loc) { switch (*id_first) { case @',': *id_first = @'1'; app(@'\\'); break; case ordinary_space: *id_first = @'2'; app(@'\\'); break; case tab_mark: *id_first = @'3'; app(@'\\'); break; case discretionary_break: *id_first = @'0'; // Falls through! @<Special string cases@>: app(@'\\'); break; case @'@@': if (*(id_first+1)==@'@@') id_first++; else ERR_PRINT(W,"Double @@ should be used in strings"); @.Double \AT! should be used...@> } app_tok(*id_first++); } /* End the macro. */ app(@'}'); } @ Here are the characters that are special to \TeX\ and therefore need to be escaped within a string. @f @<Special string cases@> default @f @<Special \TeX\ cases@> default @f @<Other string cases@> default @<Special string cases@>= @<Special \TeX\ cases@>: @<Other string cases@>@: @; @ @<Special \TeX\ cases@>= case @'\\':case @'{': case @'}'@: @; @ @<Other string cases@>= case @' ':case @'#':case @'%':case @'$':case @'^':case @'`': case @'~': case @'&': case @'_'@: @; @ This fragment appends the text collected inside `\.{@@t\dots@@>}'. That text is placed inside an \.{\\hbox} and treated as an ordinary expression. @<Append a \TeX\ string scrap@>= APP_STR("\\hbox{"); while (id_first<id_loc) app_tok(*id_first++); app(@'}'); app_scrap(expr,maybe_math); @ Ordinary identifiers are just treated as expressions. @<Append an identifier scrap@>= p = id_lookup(id_first, id_loc,normal); @#if 0 if (p->ilk==normal || !(p->reserved_word & (boolean)language) ) { APP_FLAG(id,p,name_dir); app_scrap(expr,maybe_math); /* not a reserved word */ } else { APP_FLAG(res,p,name_dir); app_scrap(p->ilk,maybe_math); } @#endif if(p->wv_macro) { WV_MACRO HUGE *w = p->wv_macro; ASCII HUGE *s = w->text; if(w->cat) { APP_STR(pfmt->id); app(@'{'); } while(*s) app_tok(*s++); if(w->cat) app(@'}'); app_scrap(p->ilk ? p->ilk : expr, w->cat ? maybe_math : yes_math); } else if (p->reserved_word & (boolean)language) { APP_FLAG(res,p,name_dir); app_scrap(p->ilk == normal ? expr : p->ilk,maybe_math); /* See the inverse construction in \.{reserved}:|save_words|. */ } else { APP_FLAG(id,p,name_dir); app_scrap(expr,maybe_math); // Not a reserved word. } @ @<Append the output file...@>= { APP_STR(upper_case_code ? "\\WOut{" : "\\Wout{"); *id_loc = '\0'; id_first = esc_buf(mod_text+1, mod_end, id_first, YES); was_opened(id_first, upper_case_code, ¶ms.OUTPUT_FILE_NAME, NULL); if(upper_case_code) was_opened(id_first, upper_case_code, &global_params.OUTPUT_FILE_NAME, NULL); while(*id_first) app_tok(*id_first++); app(@'}'); @%if(nuweb_mode) app(force); app_scrap(ignore_scrap,no_math); if(nuweb_mode) { /* !!!!! */ next_control = begin_meta; continue; } } @ When the~`\ttv' that introduces \cee\ text is sensed, a call on |C_translate| will return a pointer to the \TeX\ translation of that text. If scraps exist in |scrp_info|, they are unaffected by this translation process. @<Part 2@>=@[ text_pointer C_translate(VOID) { text_pointer p; // Points to the translation. scrap_pointer save_base; // Holds original value of |scrp_base|. PARAMS outer_params; PARSE_PARAMS parse_params0; outer_params = params; parse_params0 = parse_params; save_base = scrp_base; scrp_base = scrp_ptr+1; // Empty work space after last existing scrap. /* We enclose code fragments with the \TeX\ macro~\.{\\WCD\{\dots\}}. */ if(output_protect) APP_STR("\\protect"); APP_STR("\\WCD{"); app_scrap(ignore_scrap,no_math); if(nuweb_mode) { APP_STR("{\\tt "); app_scrap(ignore_scrap, no_math); } while(next_control <= module_name) { C_parse(INNER); // Get the scraps together. if(next_control == @'|') break; @<Emit the scrap for a module name if present@>; if(next_control == @'|') break; } app_tok(cancel); app_scrap(ignore_scrap,maybe_math); // Place a |cancel| token as a final ``comment''. if(nuweb_mode) app(@'}'); #if 0 else app_scrap(semi, maybe_math); /* Append a pseudo-semicolon to try to force the code fragments to reduce to full statements. */ #endif if(output_protect) { app(protect_code); app_scrap(ignore_scrap, no_math); } app(@'}'); app_scrap(ignore_scrap,no_math); if (next_control != @'|') ERR_PRINT(W,"Missing '|' after code text"); @.Missing '|'...@> p = translate(INNER); // Make the translation. if (scrp_ptr>mx_scr_ptr) mx_scr_ptr=scrp_ptr; scrp_ptr = scrp_base-1; // Restore old |scrp_ptr|. scrp_base = save_base; // Scrap the scraps. params = outer_params; frz_params(); parse_params = parse_params0; return p; } @ The |outr_parse| routine is to |C_parse| as |outr_xref| is to |C_xref|: it constructs a sequence of scraps for \cee\ text until |next_control>=formatt|. Thus, it takes care of embedded comments. @<Part 2@>=@[ SRTN outr_parse(VOID) /* makes scraps from \cee\ tokens and comments */ { int bal; // Brace level in comment. text_pointer p, q; // Partial comments. |p|: Stuff before `\Cb'; |q|: `\Cb'. while (next_control<formatt) { if (next_control != begin_comment) C_parse(OUTER); else @<Append a comment or compiler directive@>@; } } @ @<Append a comment...@>= { // Append a comment/compiler directive. if(doing_cdir) @<Begin a compiler directive@>@; else @<Append a regular comment@>@; bal = copy_comment(1); // Closing brace is inserted here. next_control = ignore; if(doing_cdir && bal > 0) ERR_PRINT(W,"Can't have vertical bars in @@! compiler directives"); doing_cdir = NO; /* Handle code mode inside comments. */ while (bal > 0) { in_comment = YES; p=text_ptr; freeze_text; q = C_translate(); /* at this point we have |tok_ptr+7<=max_toks| */ APP_FLAG(tok,p,tok_start); APP_FLAG(inner_tok,q,tok_start); if (next_control==@'|') { bal = copy_comment(bal); next_control = ignore; } else bal = 0; // An error has been reported. } app(force); app_scrap(ignore_scrap,no_math); /* the full comment becomes a scrap */ } @ Compiler directives are begun by the style-file text \.{cdir.start}. For example, `\.{@@!abc}' $\to$ `\.{\#pragma\ abc}'. @<Begin a compiler dir...@>= { outer_char HUGE *s = t_style.cdir_start[language_num]; int n = 2*STRLEN(s) + 1; /* The factor of~2 counts possible escapes, and the 1 takes care of |'\0'|. */ ASCII HUGE *temp = GET_MEM("temp_cdir",n,ASCII); ASCII HUGE *start = GET_MEM("start_cdir",n,ASCII); STRCPY(start,s); to_ASCII((outer_char HUGE *)start); room_for(9+n,3,1); /* Tokens: */ app(force); APP_STR("\\WCDIR{"); esc_buf(temp,temp+n,start,YES); @~ APP_STR(to_outer(temp)); @.\\WCDIR@> FREE_MEM(temp,"temp_cdir",n,ASCII); FREE_MEM(start,"start_cdir",n,ASCII); } @ @<Append a regular comment@>= { room_for(8,3,1); /* Tokens: `\.{;{ }\ { }\\{ }W{ }C\{{ }\}{ }\It{force}}'. */ if(Fortran88) { if(free_Fortran && lst_ampersand) { scrp_ptr--; // Kill off the \.{\&}. } else if(!at_beginning && auto_semi) { app(@';'); } last_was_cmnt = YES; } app(break_space); APP_STR(long_comment ? "\\WC{" : "\\Wc{"); // Long/short comment. @.\\WC@> @.\\Wc@> } @* OUTPUT of TOKENS. So far our programs have only built up multi-layered token lists in \.{WEAVE}'s internal memory; we have to figure out how to get them into the desired final form. The job of converting token lists to characters in the \TeX\ output file is not difficult, although it is an implicitly recursive process. Three main considerations had to be kept in mind when this part of \.{WEAVE} was designed: (a)~There are two modes of output, |outer| mode that translates tokens like |force| into line-breaking control sequences, and |inner| mode, intended for code between~\Cb, that ignores them except that blank spaces take the place of line breaks. (b)~The |cancel| instruction applies to adjacent token or tokens that are output, and this cuts across levels of recursion since `|cancel|' occurs at the beginning or end of a token list on one level. (c)~The \TeX\ output file will be semi-readable if line breaks are inserted after the result of tokens like |break_space| and |force|. (d)~The final line break should be suppressed, and there should be no |force| token output immediately after `\.{\\WY\\WP}'. @i output.hweb @ The output process uses a stack to keep track of what is going on at different ``levels'' as the token lists are being written out. Entries on this stack have three parts: \yskip\hang |end_field| is the |tok_mem| location where the token list of a particular level will end; \yskip\hang |tok_field| is the |tok_mem| location from which the next token on a particular level will be read; \yskip\hang |mode_field| is the current mode, either |inner| or |outer|. \yskip\noindent The current values of these quantities are referred to quite frequently, so they are stored in a separate place instead of in the |stack| array. We call the current values |cur_end|, |cur_tok|, and |cur_mode|. The global variable |stck_ptr| tells how many levels of output are currently in progress. The end of output occurs when an |end_translation| token is found, so the stack is never empty except when we first begin the output process. @d inner 0 /* Value of |mode| for \cee\ texts within \TeX\ texts */ @d outer 1 /* Value of |mode| for \cee\ texts in modules */ @<Typed...@>= typedef int mode; typedef struct { token_pointer end_field; /* Ending location of token list */ token_pointer tok_field; /* Present location within token list */ boolean mode_field; /* Interpretation of control tokens */ } output_state; typedef output_state HUGE *stack_pointer; @ @d cur_end cur_state.end_field /* Current ending location in |tok_mem| */ @d cur_tok cur_state.tok_field /* Location of next output token in |tok_mem| */ @d cur_mode cur_state.mode_field /* Current mode of interpretation */ @d ini_stack stck_ptr=stack;cur_mode=outer@; /* Initialize the stack */ @<Global...@>= EXTERN output_state cur_state; /* |cur_end|, |cur_tok|, |cur_mode| */ EXTERN BUF_SIZE stck_size; EXTERN output_state HUGE *stack; /* Dynamic array of info for non-current levels */ EXTERN stack_pointer stck_end; /* End of |stack| */ EXTERN stack_pointer stck_ptr; /* First unused location in the output state stack */ EXTERN stack_pointer mx_stck_ptr; /* Largest value assumed by |stck_ptr| */ @ @<Alloc...@>= ALLOC(output_state,stack,ABBREV(stck_size_w),stck_size,0); stck_end=stack+stck_size-1; /* End of |stack| */ @ @<Set init...@>= mx_stck_ptr=stack; @ To insert token-list |p| into the output, the |push_level| subroutine is called; it saves the old level of output and gets a new one going. The value of |cur_mode| is not changed. @<Part 2@>=@[ SRTN push_level FCN((p)) /* Suspends the current level */ text_pointer p C1("")@; { if (stck_ptr==stck_end) OVERFLW("stack levels",ABBREV(stck_size_w)); if (stck_ptr>stack) { /* save current state */ stck_ptr->end_field=cur_end; stck_ptr->tok_field=cur_tok; stck_ptr->mode_field=cur_mode; } stck_ptr++; if (stck_ptr>mx_stck_ptr) mx_stck_ptr=stck_ptr; cur_tok=*p; cur_end=*(p+1); } @ Conversely, the |pop_level| routine restores the conditions that were in force when the current level was begun. This subroutine will never be called when |stck_ptr=1|. @<Part 2@>=@[ SRTN pop_level(VOID) { cur_end=(--stck_ptr)->end_field; cur_tok=stck_ptr->tok_field; cur_mode=stck_ptr->mode_field; } @ The |get_output| function returns the next byte of output that is not a reference to a token list. It returns the values |identifier| or |res_word| or |mod_name| if the next token is to be an identifier (typeset in italics), a reserved word (typeset in boldface) or a module name (typeset by a complex routine that might generate additional levels of output). In these cases |cur_name| points to the identifier or module name in question. @<Global...@>= EXTERN name_pointer cur_name; @ @d res_word OCTAL(201) /* Returned by |get_output| for reserved words */ @d mod_name OCTAL(200) /* Returned by |get_output| for module names */ @<Part 2@>=@[ eight_bits get_output(VOID) /* Returns the next token of output */ { sixteen_bits a; /* Current item read from |tok_mem| */ restart: while (cur_tok==cur_end) pop_level(); /* Get back to unfinished level. */ a=*(cur_tok++); if (a>=0400) { cur_name=a % id_flag + name_dir; switch (a / id_flag) { case 2: return res_word; /* |a==res_flag+cur_name| */ case 3: return mod_name; /* |a==mod_flag+cur_name| */ case 4: push_level(a % id_flag + tok_start); goto restart; /* |a==tok_flag+cur_name| */ case 5: push_level(a % id_flag + tok_start); cur_mode=inner; goto restart; /* |a==inner_tok_flag+cur_name| */ default: return identifier; /* |a==id_flag+cur_name| */ } } /* If we get here, it's a single-byte token. */ return (eight_bits)a; } @ The real work associated with token output is done by |make_output|. This procedure appends an |end_translation| token to the current token list, and then it repeatedly calls |get_output| and feeds characters to the output buffer until reaching the |end_translation| sentinel. It is possible for |make_output| to be called recursively, since a module name may include embedded \cee\ text; however, the depth of recursion never exceeds one level, since module names cannot be inside of module names. A procedure called |output_C| does the scanning, translation, and output of \cee\ text within `\Cb'~brackets, and this procedure uses |make_output| to output the current token list. Thus, the recursive call of |make_output| actually occurs when |make_output| calls |output_C| while outputting the name of a module. @^recursion@> @<Part 2@>=@[ SRTN output_C(VOID) /* Outputs the current token list */ { token_pointer save_tok_ptr; text_pointer save_text_ptr; eight_bits save_next_control; /* Values to be restored */ text_pointer p; /* Translation of the \cee\ text */ save_tok_ptr=tok_ptr; save_text_ptr=text_ptr; save_next_control=next_control; next_control=ignore; p=C_translate(); APP_FLAG(inner_tok,p,tok_start); scanning_meta = NO; make_output(); /* output the list */ if (text_ptr>mx_text_ptr) mx_text_ptr=text_ptr; if (tok_ptr>mx_tok_ptr) mx_tok_ptr=tok_ptr; text_ptr=save_text_ptr; tok_ptr=save_tok_ptr; /* Forget the tokens */ next_control=save_next_control; /* Restore |next_control| to original state */ } @ Here is \.{WEAVE}'s major output handler. @<Part 3@>=@[ SRTN make_output(VOID) /* outputs the equivalents of tokens */ { eight_bits a; // Current output byte. eight_bits b; // Next output byte. int c; // Count of |indent| and |outdent| tokens. boolean copying = NO; // Are we copying the \TeX\ part of a comment? app(end_translation); // Append a sentinel. freeze_text; push_level(text_ptr-1); WHILE() { a = get_output(); reswitch: switch(a) { case ignore: continue; // In case a null sneaks in. case begin_language: language = lan_enum(get_output()); /* The byte after |begin_language| contains the language number. */ continue; @<Cases for turning output on or off@>@:@; case end_translation: return; case identifier: case res_word: if(output_on) @<Output an identifier@>@; break; case mod_name: if(output_on) @<Output a module name@>@; @~ break; case math_bin: case math_rel: @<Output a \.{\\math} operator@>; @~ break; case toggle_meta: scanning_meta = BOOLEAN(!scanning_meta); break; case cancel: c=0; while ((a=get_output())>=indent && a<=big_force) { if (a==indent) c++; if (a==outdent) c--; } @<Output saved |indent| or |outdent| tokens@>; goto reswitch; case big_cancel: c=0; while (((a=get_output())>=indent || a==@' ') && a<=big_force) { if (a==indent) c++; if (a==outdent) c--; } @<Output saved...@>; goto reswitch; case indent: case outdent: case opt: case backup: case break_space: case force: case big_force: @<Output a control, look ahead in case of line breaks, possibly |goto reswitch@;|@>; break; case interior_semi: if(output_on) out(';'); break; case @'*': if(!(copying || nuweb_mode)) { OUT_STR("\\ast "); // Special macro for asterisks in code mode. @.\\ast@> break; } /* If |copying|, the asterisk case falls through to the default. */ default: if(output_on) { out(a); // Otherwise |a| is an |ASCII| character. if(scanning_meta && a=='\n') flush_buffer(out_ptr, NO); } } } } @ @<Cases for turning output on...@>= case protect_code: output_protect = BOOLEAN(!output_protect); @~ break; case copy_mode: copying = BOOLEAN(!copying); @~ break; case turn_output_off: @% OUT_STR("OFF"); // For debugging. output_on = NO; break; case turn_output_on: @% OUT_STR("ON"); // For debugging. output_on = YES; break; case Turn_output_off: skip_file(); strt_off = YES; @% OUT_STR("OFF"); // For debugging. output_on = NO; break; case Turn_output_on: strt_off = NO; @% OUT_STR("ON"); // For debugging. output_on = YES; break; @ @<Part 3@>=@[ SRTN skip_file(VOID) { #define TEMP_LEN (MAX_FILE_NAME_LENGTH + 11) outer_char temp[TEMP_LEN],temp1[TEMP_LEN]; esc_file_name(temp1,TEMP_LEN,prms[1].web.File_name); SPRINTF(TEMP_LEN,temp,`"\\Wskipped{%s}",temp1`); OUT_STR(temp); fin_line(); #undef TEMP_LEN } @ @<Part 3@>=@[ SRTN out_skip(VOID) { @<Toggle output@>; if(!output_on) { output_on = YES; OUT_STR("\\WY\\WP"); skip_file(); output_on = NO; } } @ @<Output an identifier@>= { if(nuweb_mode) { ASCII HUGE *k; for(k=cur_name->byte_start; k<(cur_name+1)->byte_start; k++) { out(*k); } } else @<Format and output an identifier@>@; } @ An identifier of length one does not have to be enclosed in braces, and it looks slightly better if set in a math-italic font instead of a (slightly narrower) text-italic font. Thus we output `\.{\\\char'174a}' but `\.{\\]\{aa\}}'. @d ALL_UC (all_uc && length(cur_name) > 1) @<Format and output an id...@>= { boolean all_uc = cur_name->info.upper_case; if(output_protect) OUT_STR("\\protect"); if (a==identifier) { if(is_intrinsic(cur_name)) OUT_STR(pfmt->intrinsic); /* Intrinsic function---e.g., |fopen|. */ @.\\\AT!@> else if(is_keyword(cur_name)) OUT_STR(ALL_UC ? pfmt->KEYWORD : pfmt->keyword); /* Fortran keyword---e.g., |@r BLOCKSIZE|. */ @.\\.@> else if (length(cur_name)==1) OUT_STR(pfmt->short_id); /* One-character identifier---e.g., |a|. */ @.\\|@> else @<Output the appropriate identifier prefix@>@; } else OUT_STR(ALL_UC ? pfmt->RESERVED : pfmt->reserved); /* Reserved word---e.g., |float|. */ @.\\\&@> out_name(IDENTIFIER,cur_name); } @ Some people prefer macros to be formatted differently from ordinary identifiers. @<Output the appro...@>= switch(DEFINED_TYPE(cur_name)) { case D_MACRO: OUT_STR(ALL_UC ? pfmt->ID_OUTER : pfmt->id_outer); // E.g., |NON_TEX_MACRO|. break; case M_MACRO: OUT_STR(ALL_UC ? pfmt->ID_INNER : pfmt->id_inner); // E.g., |_FWEAVE_|. break; default: OUT_STR(ALL_UC ? pfmt->ID: pfmt->id); // Longer ordinary identifier---e.g., |out|. break; @.\\\\@> } @ Here |a|~will only be |math_bin| or |math_rel|. @<Output a \....@>= OUT_STR(a==math_bin ? "\\mathbin{" : "\\mathrel{"); @.\\mathbin@> @.\\mathrel@> @ The current mode does not affect the behavior of \.{WEAVE}'s output routine except when we are outputting control tokens. @<Output a control...@>= if (a<break_space) { if (cur_mode==outer) { if(output_on) { out(@'\\'); @~ out(a-cancel+@'0'); /* As an example, $|backup| = |0345| - |0341| + \.{'0'} = \.{'4'} \to \.{\\4}$. */ } if (a==opt) if(output_on) {out(get_output());} /* |opt| is followed by a digit. */ else get_output(); } else if (a==opt) b=get_output(); // Ignore digit following |opt|. } else @<Look ahead for strongest line break, |goto reswitch@;|@>; /* Here $a \in \{|break_space|,|force|,|big_force|\}$. */ @ If several of the tokens |break_space|, |force|, |big_force| occur in a row, possibly mixed with blank spaces (which are ignored), the largest one is used. A line break also occurs in the output file, except at the very end of the translation. The very first line break is suppressed (i.e., a line break that follows `\.{\\WY\\WP}'). @<Look ahead for st...@>= { boolean save_mode; /* value of |cur_mode| before a sequence of breaks */ b=a; save_mode=cur_mode; c=0; WHILE() { a = get_output(); if (a==cancel || a==big_cancel) { @<Output saved |indent| or |outdent| tokens@>; goto reswitch; // |cancel| overrides everything. } if ((a!=@' ' && a<indent) || a==backup || a>big_force) { // Time to output something. if (save_mode==outer) { if (out_ptr>out_buf+5 && STRNCMP(out_ptr-5,"\\WY\\WP",6)==0) goto reswitch; @<Output saved |indent| or |outdent| tokens@>; if(output_on) if(strt_off) { if(STRNCMP(out_ptr-2,"\\WP",3)==0) { out_ptr = out_buf; goto reswitch; } } else { out(@'\\'); @~ out(b-cancel+@'0'); } if (a!=end_translation) fin_line(); } else if (a!=end_translation && cur_mode==inner) if(output_on) out(@' '); goto reswitch; } if (a==indent) c++; else if (a==outdent) c--; else /* Use only the largest. */ if (a>b) b=a; /* if |a==' '| we have |a<b| */ else if(a==opt) get_output(); /* Throw away digit after |opt|. */ } } @ While we're removing unwanted or duplicate tokens, we don't want to lose track of the indent level. So we count the |indent|s and |outdent|s, and write out the net here. @<Output saved...@>= for (;c>0;c--) OUT_STR("\\1"); for (;c<0;c++) OUT_STR("\\2"); @ The remaining part of |make_output| is somewhat more complicated. When we output a module name, we may need to enter the parsing and translation routines, since the name may contain code embedded in \Cb\~constructions. This code is placed at the end of the active input buffer and the translation process uses the end of the active |tok_mem| area. @<Output a module name@>= #if FCN_CALLS out_md_name(); #else @<Code to output a module name@>@; #endif @ @<Part 3@>= #if FCN_CALLS @[SRTN out_md_name(VOID) { @<Code to output a module name@>@; } #endif @ @<Code to output a module name@>= { name_pointer cur_mod_name; /* name of module being output */ OUT_STR("\\WX"); @.\\WX@> cur_xref = (xref_pointer)cur_name->xref; /* Output the module number, or zero if it was undefined */ if (cur_xref->num>=def_flag) { out_mod(cur_xref->num-def_flag,ENCAP); if (phase==3) { cur_xref=cur_xref->xlink; while (cur_xref->num>=def_flag) { OUT_STR(", "); out_mod(cur_xref->num-def_flag,ENCAP); cur_xref=cur_xref->xlink; } } } else out(@'0'); out(@':'); /* End the module number. */ @<Output the text of the module name@>; OUT_STR("\\X "); /* End the text. (Can't use a colon here, because there may be colons in the text.) */ OUT_STR(cur_xref->num >= def_flag ? language_symbol((LANGUAGE)cur_mod_name->mod_info->language) : (CONST outer_char *)""); OUT_STR("\\X"); /* End the language marker. */ } @ In most situations, we only want to output a language marker if we're in a language different from the global language. @d language_name_ptr(l) languages[lan_num(l)] /* Points to the full language name. */ @d language_symbol(l) (l!=global_language ? LANGUAGE_CODE(l) : (CONST outer_char *)"") @ @<Output the text...@>= { ASCII HUGE *k, HUGE *k_limit; /* indices into |byte_mem| */ ASCII HUGE *j; /* index into |cur_buffer| */ ASCII HUGE *save_loc, HUGE *save_limit; // |loc| and |limit| to be restored. eight_bits b; k=cur_name->byte_start; k_limit=(cur_name+1)->byte_start; cur_mod_name=cur_name; while (k<k_limit) { b=*(k++); if (b==@'@@') @<Skip next character, give error if not `\.{@@}'@>; if (b!=@'|') out(b)@; else { @<Copy the \cee\ text into the |cur_buffer| array@>; save_loc=loc; save_limit=limit; loc=limit+2; limit=j+1; *limit=@'|'; output_C(); loc=save_loc; limit=save_limit; } } } @ @<Skip next char...@>= if (*k++!=@'@@') { SET_COLOR(error); printf("\n! Illegal control code in section name: <"); @.Illegal control code...@> prn_id(cur_mod_name); printf("> "); mark_error; } @ The \cee\ text enclosed in~\Cb\ should not contain `\vertbar'~characters, except within strings. We put a~`\vertbar' at the front of the buffer, so that an error message that displays the whole buffer will look a little bit sensible. The variable |delim| is zero outside of strings, otherwise it equals the delimiter that began the string being copied. @<Copy the \cee\ text into...@>= { ASCII delim; /* first and last character of string being copied */ j=limit+1; *j=@'|'; delim=0; WHILE() { if (k>=k_limit) { SET_COLOR(error); printf("\n! C text in section name didn't end: <"); @.C text...didn't end@> prn_id(cur_mod_name); printf("> "); mark_error; break; } b=*(k++); if (b==@'@@') @<Copy a control code into the buffer@>@; else { if (b==@'\'' || b==@'"') if (delim==0) delim=b; else if ((eight_bits)delim == b) delim=0; if (b!=@'|' || delim!=0) { if (j>cur_buffer+buf_size-2) OVERFLW("buffer",""); *(++j)=b; } else break; } } } @ @<Copy a control code into the buffer@>= { if (j>cur_buffer+buf_size-3) OVERFLW("buffer",""); *(++j)=@'@@'; *(++j)=*(k++); } @* PHASE TWO PROCESSING. We have assembled enough pieces of the puzzle in order to be ready to specify the processing in \.{WEAVE}'s main pass over the source file. Phase two is analogous to phase one, except that more work is involved because we must actually output the \TeX\ material instead of merely looking at the \.{WEB} specifications. @<Part 2@>=@[ SRTN phase2(VOID) { extern outer_char wbflnm0[]; IN_COMMON int num_ifiles; phase = 2; // Prepare for second phase. the_part = LIMBO; params = global_params; frz_params(); rst_input(); strt_off = ending_off = NO; writing(YES,tex_fname); @~ if(tex_file==stdout) putchar('\n'); fin_line(); // Write out the ``\.{\\input\ fwebmac.sty}''. @<Issue the \.{\\Wbegin} command that sets up the beginning of the document@>@; module_count = 0; num_ifiles = 0; copy_limbo(); flush_buffer(out_buf,NO); /* Insert a blank line---it looks nice. */ math_flag = NO; while (!input_has_ended) @<Translate the current module@>@; } @ After the macros have been read in, we are ready to actually begin the document. The command has the form ``\.{\\Wbegin[\It{options}]\{\It{style}\}\{\It{TeXindent}\}\{\It{codeindent}\} \{\It{contents}\} \{\{\It{reserved}\}\{\It{short identifier}\}\{\It{identifier}\} \{\It{UPPERCASE identifier}\} \{\It{outer macro}\}\{\It{inner macro}\} \{\It{intrinsic}\}\{\It{keyword}\}\{\It{typewriter}\}\{\It{modtrans}\}\}}.'' The \It{options} and \It{style} field are used only by \LaTeX. @<Issue the \.{\\Wbegin} command...@>= { #define TEMP_LEN (MAX_FILE_NAME_LENGTH + 100) #define ARGS \ w_style.misc.LaTeX.class.options, w_style.misc.LaTeX.package.options,@\ \ w_style.misc.LaTeX.class.file, w_style.misc.LaTeX.package.file,@\ \ w_style.misc.TeXindent,@\ \ w_style.misc.codeindent,@\ \ w_style.contents.tex,@\ \ pfmt->reserved, pfmt->RESERVED,@\ \ pfmt->short_id,@\ \ pfmt->id, pfmt->ID,@\ \ pfmt->id_outer, pfmt->ID_OUTER,@\ \ pfmt->id_inner, pfmt->ID_INNER,@\ \ pfmt->intrinsic,@\ \ pfmt->keyword, pfmt->KEYWORD,@\ \ pfmt->typewritr,@\ \ w_style.indx.encap_prefix,@\ \ w_style.misc.doc_preamble, w_style.misc.doc_postamble,@\ \ w_style.indx.name outer_char temp0[TEMP_LEN]; outer_char HUGE *temp1 = GET_MEM("temp1",TEMP_LEN,outer_char); SPRINTF(TEMP_LEN,temp0, `"\n\\Wbegin[%s;%s]{%s;%s} {%s} {%s} {%s}\n\ {{%s%s} {%s} {%s%s} {%s%s} {%s%s} {%s} {%s%s} {%s}}\n\ {%s} {%s;%s} {%s}", ARGS`); OUT_STR(xpn_name(&temp1,TEMP_LEN,temp0,wbflnm0)); FREE(temp1); fin_line(); #undef TEMP_LEN #undef ARGS } @ The output file will contain the control sequence~\.{\\WY} between non-null sections of a module, e.g., between the \TeX\ and definition parts if both are nonempty. This puts a little white space between the parts when they are printed. However, we don't want \.{\\WY} to occur between two definitions within a single module. The variables |out_line| or |out_ptr| will change if a section is non-null, so the following macros `|save_position|' and `|emit_space_if_needed|' are able to handle the situation: @d save_position save_line=out_line; save_place=out_ptr@; @d emit_space_if_needed if (save_line!=out_line || save_place!=out_ptr) { OUT_STR("\\WY"); @.\\WY@> yskipped = YES; } @<Global...@>= EXTERN LINE_NUMBER save_line; // Former value of |out_line|. EXTERN ASCII HUGE *save_place; // Former value of |out_ptr|. EXTERN boolean in_module SET(NO); // Between \.{\\WN} and \.{\\fi}? EXTERN boolean yskipped SET(NO); // Did we skip between parts? @ @<Translate the current module@>= { the_part = TEX_; /* Again, all modules start off in the global language. */ params = global_params; frz_params(); scanning_meta = NO; // For safety. module_count++; @<Output the code for the beginning of a new module@>; save_position; trns_TeX(); trns_defn(); trns_code(); @<Show cross-references to this module@>; @<Output the code for the end of a module@>; } @ Modules beginning with the \.{WEB} control sequence~`\.{@@\ }' start in the output with the \TeX\ control sequence~`\.{\\WM}', followed by the module number. Similarly, `\.{@@*}'~modules lead to the control sequence~`\.{\\WN}'. If this is a changed module, we put~\.{*} just before the module number. @<Output the code for the beginning...@>= { @<Output the include file name if necessary@>; if(!in_module && output_on) { OUT_STR(*(loc-1) == @'*' ? "\\WN" : "\\WM"); @.\\WM@> @.\\WN@> in_module = YES; out_mod(module_count,NO_ENCAP); OUT_STR(". "); } progress(); // Progress report to terminal. } @ These variables remember the last and current name of the include file. @<Glob...@>= IN_COMMON outer_char last_include_file[],this_include_file[]; @ @<Output the include file name...@>= if(STRCMP(last_include_file,this_include_file) != 0) { STRCPY(last_include_file,this_include_file); OUT_STR("\\WIF{"); @~ out_fname(this_include_file); @~ OUT_STR("}"); fin_line(); } @ In the \TeX\ part of a module, we simply copy the source text, except that index entries are not copied and \cee\ text within \Cb\ is translated. @<Part 2@>=@[ SRTN trns_TeX(VOID) { the_part = TEX_; parsing_mode = OUTER; do { next_control = copy_TeX(); switch(next_control) { @<Cases to set |language| and |break|@>@:@; case toggle_output: out_skip(); break; case @'|': ini_stack; output_C(); break; case math_break: out(@'|'); // Literal vertical bar. break; case @'@@': out(@'@@'); // Literal '\.{@@}'. break; case invisible_cmnt: loc = limit + 1; break; case begin_meta: OUT_STR(w_style.misc.meta.TeX.begin); break; case end_meta: OUT_STR(w_style.misc.meta.TeX.end); break; case TeX_string: case xref_roman: case xref_wildcard: case xref_typewriter: case macro_module_name: case module_name: loc-=2; next_control=get_next(); /* skip to \.{@@>} */ if (next_control==TeX_string) ERR_PRINT(W,"TeX string should be in code text only"); break; @.TeX string should be...@> case thin_space: case line_break: case big_line_break: case no_line_break: case join: case pseudo_semi: case pseudo_expr: case pseudo_colon: case compiler_directive: case Compiler_Directive: case no_index: case begin_bp: case insert_bp: ERR_PRINT(W,"You can't do that in TeX text"); break; @.You can't do that...@> case protect_code: if(*loc != @'|') ERR_PRINT(W, "@@p should be immediately followed by '|'"); output_protect = YES; break; case USED_BY_NEITHER: err_print(W, "Invalid `@@%c' ignored", XCHR(*(loc-1))); break; } } while (next_control<formatt); output_protect = NO; } @ We need a flag to suppress phase~2 declarations of stuff recognized during macro definitions. Some other flags are useful too. @<Glob...@>= EXTERN boolean ok_to_define SET(YES); EXTERN boolean q_protected SET(NO); // For protecting with quotes. EXTERN boolean suppress_defn SET(NO); // For masking out formats, etc. EXTERN boolean output_protect SET(NO); // For writing \.{\\protect}. @ When we get to the following code we have |next_control>=formatt|, and the token memory is in its initial empty state. @d SUPPRESS(name) if(!defn_mask.name) suppress_defn = YES@; @<Part 2@>=@[ SRTN trns_defn(VOID) { boolean overload_ops0 = overload_ops; the_part = DEFINITION; parsing_mode = OUTER; if (next_control<begin_code) { /* definition part non-empty */ emit_space_if_needed; save_position; @<Store the output switch@>@; @% @<Append \.{\\WP}@>@; } while (next_control<begin_code) @<Translate a |definition|, |formatt|, etc.@>@; } @ Now deal with a |formatt|, |definition|, |undefinition|, |WEB_definition|, |limbo_text|, |op_def|, |macro_def|, or \.{@@\#...} command. @<Translate a |definition|...@>= { eight_bits last_control = next_control; boolean nuweb_mode0; ini_stack; switch(next_control) { case begin_comment: case invisible_cmnt: break; default: @<Store the output switch@>@; break; } nuweb_mode0 = nuweb_mode; nuweb_mode = NO; switch(next_control) { case formatt: @<Start a format definition@>@; break; case limbo_text: @<Start a limbo text definition@>@; break; case op_def: @<Start an overloaded operator definition@>@; break; case macro_def: @<Start an overloaded identifier definition@>@; break; case begin_comment: doing_cdir = NO; break; case invisible_cmnt: loc = limit + 1; // Skip the line. /* Skip any other extraneous material that doesn't belong in the definition section. */ while((next_control=get_next()) < formatt && next_control!=begin_comment); continue; default: @<Start a macro definition@>@; break; } ok_to_define = NO; nuweb_mode = nuweb_mode0; outr_parse(); // Scan the definition or whatever. if(auto_app_semi && last_control==WEB_definition) {app_scrap(semi,maybe_math);} overload_ops = overload_ops0; fin_C(); // Finish up the definition or whatever. ok_to_define = YES; } @ The switch into code mode is appended rather than just written directly out in order to deal with the |output_on| status properly. @<Append \.{\\WP}@>= { APP_STR("\\WP"); @.\\WP@> } @ The |fin_C| procedure outputs the translation of the current scraps, preceded by the control sequence~`\.{\\WP}' and followed by the control sequence~`\.{\\par}'. It also restores the token and scrap memories to their initial empty state. A |force| token is appended to the current scraps before translation takes place, so that the translation will normally end with~\.{\\6} or~\.{\\7} (the \TeX\ macros for |force| and |big_force|). This~\.{\\6} or~\.{\\7} is replaced by the concluding \.{\\par} or by \.{\\WY\\par}. @<Part 2@>=@[ SRTN fin_C(VOID) // Finishes a definition or a \cee\ part. { text_pointer p; // Translation of the scraps. boolean current_output_state = output_on; if(!suppress_defn) { @% output_on = YES; column_mode = NO; app_tok(force); // Last thing in the translation. app_scrap(ignore_scrap,no_math); // The last stuff doesn't count for syntax. /* We've accumulated all the stuff for one part. Translate it, then print it. */ p = translate(OUTER); APP_FLAG(tok,p,tok_start); make_output(); // Output the list. if (out_ptr>out_buf+1) @<Tidy up the end of the part@>@; OUT_STR("\\par"); fin_line(); /* Accumulate statistics. */ if (text_ptr>mx_text_ptr) mx_text_ptr=text_ptr; if (tok_ptr>mx_tok_ptr) mx_tok_ptr=tok_ptr; if (scrp_ptr>mx_scr_ptr) mx_scr_ptr=scrp_ptr; } else suppress_defn = NO; /* Forget the tokens and the scraps. */ tok_ptr=tok_mem+1; text_ptr=tok_start+1; scrp_ptr=scrp_info; #if(0) if(strt_off) output_on = strt_off = ending_off = NO; if(ending_off) { strt_off = ending_off = NO; output_on = YES; } #endif output_on = current_output_state; } @ @<Tidy up...@>= { if (*(out_ptr-1)==@'\\') { @.\\6@> @.\\7@> @.\\WY@> if (*out_ptr==@'6') out_ptr -= 2; // Throw away the \.{\\6}. else if (*out_ptr==@'7') { out_ptr -= 2; // Throw away the \.{\\7}\dots OUT_STR("\\WY"); // and replace it with \.{\\WY}. } } } @ Here is a nucleus that writes out the appropriate macro for the preprocessor command. @d APP_TEMP(letter,arg) app_temp(OC(letter),OC(arg)) @<Part 2@>=@[ SRTN app_temp FCN((letter,arg)) CONST outer_char letter[] C0("")@; CONST outer_char arg[] C1("")@; { char temp[50]; sprintf(temp,"\\W%s:%s:", (char *)letter, (char *)arg); APP_STR(temp); } @ This nucleus appends stuff for the preprocessor commands, macro definitions, formats, etc. @<Part 2@>=@[ SRTN app_proc FCN((next_control)) eight_bits next_control C1("")@; { if(the_part == DEFINITION) { @<Append \.{\\WP}@>@; if(yskipped) { @<Append the scrap header for the definition part@>@; yskipped = NO; } } switch(next_control) { case WEB_definition: // ``\.{@@m}'' APP_STR(upper_case_code ? "\\WMD" : "\\WMd"); @~ break; case undefinition: // ``\.{@@u}'' APP_LANG("Ud"); @~ break; case definition: // ``\.{@@d}'' APP_LANG(upper_case_code ? "D" : "d"); @~ break; case formatt: // ``\.{@@f}'' APP_LANG(upper_case_code ? "F" : "f"); @~ break; case limbo_text: // ``\.{@@l}'' APP_LANG("l"); @~ break; case op_def: // ``\.{@@v}'' APP_LANG("v"); @~ break; case macro_def: // `\.{@@w}'. APP_LANG(upper_case_code ? "WW" : "w"); @~ break; case m_ifdef: APP_TEMP("E","ifdef"); @~ break; case m_ifndef: APP_TEMP("E","ifndef"); @~ break; case m_line: APP_TEMP("E","line"); @~ break; case m_undef: APP_TEMP("E","undef"); @~ break; case m_if: APP_TEMP("E","if"); @~ break; case m_elif: APP_TEMP("E","elif"); @~ break; case m_else: APP_TEMP("E","else"); app_scrap(ignore_scrap,no_math); break; case m_for: APP_TEMP("E","for"); @~ break; case m_endfor: APP_TEMP("E","endfor"); app_scrap(ignore_scrap,no_math); break; case m_endif: APP_TEMP("E","endif"); app_scrap(ignore_scrap,no_math); break; } @.\\WD@> @.\\WMD@> @.\\WE@> } @ This function helps keep the code short. @d APP_LANG(suffix) app_lang(OC(suffix)) @<Part 2@>=@[ SRTN app_lang FCN((suffix)) CONST outer_char *suffix C1("")@; { APP_TEMP(suffix,(CONST outer_char *)(language_symbol(language))); } @ Macro definitions have the syntax `\.{@@m\ A\ b}' or `\.{@@m\ A(x)\ y}'. Keeping in line with the conventions of the C and~\.{WEB} preprocessors (and otherwise contrary to the rules of \.{WEB}) we distinguish here between the case that `\.('~immediately follows an identifier and the case that the two are separated by a space. In the latter case, and if the identifier is not followed by~`\.(' at all, the replacement text starts immediately after the identifier. In the former case, it starts after we scan the matching~`\.)'. @<Start a macro...@>= { LANGUAGE saved_language = language; if(next_control == definition) SUPPRESS(outer_macros); if(next_control == WEB_definition) SUPPRESS(macros); app_proc(next_control); if(language==TEX) language = C; if( ((C_LIKE(language) || language==LITERAL) && next_control<=WEB_definition) || next_control==WEB_definition || next_control==m_ifdef || next_control==m_ifndef || next_control==m_undef) { if( (next_control=get_next())!=identifier && next_control != @'[') { ERR_PRINT(W,"Improper macro definition: \ expected identifier"); @.Improper macro definition@> } else { if(next_control == @'[') @<Format auto insertion@>@; app(@'$'); APP_ID; if (*loc==@'(') @<Append argument of \WEB\ macro@>@; else { /* Id not followed by parenthesis. */ next_control = get_next(); } app(@'$'); app(break_space); app_scrap(ignore_scrap,no_math); /* scrap won't take part in the parsing */ } } else next_control = get_next(); if(saved_language == TEX) language = saved_language; } @ @<Format auto insert...@>= { APP_STR("\\Wauto"); get_string(@'[','\0'); *id_loc = '\0'; app_ASCII_str(id_first); next_control = get_next(); } @ @<Append argument of \WEB\ macro@>= { reswitch: next_control = get_next(); the_switch: switch(next_control) { case @'(': app(next_control); next_control = get_next(); if(next_control == @')') { b_app(@'\\'); @~ b_app(@','); // Extra thinspace for beauty. goto done_arg; } else goto the_switch; case @',': app(next_control); goto reswitch; case identifier: APP_ID; goto reswitch; case ellipsis: APP_STR("\\dots"); if( (next_control=get_next()) != @')') { ERR_PRINT(M,"Improper macro \ definition: expected ')' after ellipsis"); break; } case @')': done_arg: app(next_control); app(@'~'); next_control=get_next(); break; default: ERR_PRINT(M,"Improper macro definition: \ unrecognized token in argument list"); break; } } @ Here we append a format command, which has the two possible forms ``\.{@@f\ a\ b}'' or ``\.{@@f\ `\{\ 11}''. @<Start a format...@>= { LANGUAGE saved_language = language; scrap_pointer scrp_ptr0; if(upper_case_code) { SUPPRESS(Formats); } else { SUPPRESS(formats); } /* Mark formats that are not in the global language. */ app_proc(next_control); // |formatt|. scrp_ptr0 = scrp_ptr; // Save to help check valid format. app_scrap(expr,maybe_math); /* this will produce `\&{format}'. The macro inserts a blank after \&{format}. */ @.\\WF@> if(language==TEX) language = C; // This kludge ought to be removed! next_control=get_next(); /* First field: identifier, module name, or~'\.`'. */ if (next_control==identifier || next_control==module_name) @<Format an identifier or module name@>@; else if(next_control==@'`') @<Format a category code@>@; if (scrp_ptr!=scrp_ptr0+3) ERR_PRINT(W,"Improper format definition"); @.Improper format definition@> /* The following doesn't work right if the format command is immediately followed by a language-changing command. */ if(saved_language == TEX) language = saved_language; } @ @<Format an identifier or mod...@>= { if(next_control==identifier) APP_ID; else APP_FLAG(mod,cur_module,name_dir); APP_STR("\\ "); next_control=get_next(); /* Second field: identifier. */ if (next_control==identifier) { APP_ID; @<Finish appending format definition@>@; } } @ @<Finish appending format...@>= { app_scrap(expr,maybe_math); app_scrap(semi,maybe_math); // Pseudo-semi. sharp_include_line = NO; next_control=get_next(); } @ Here we typeset a format command that changes a category code, such as ``\.{@@f\ `a\ 10}''. @<Format a cat...@>= { @<Append commands for beginning of string@>@; app(@'`'); if( (next_control = get_TeX()) == constant) APP_STR((outer_char *)id_first); app(@'}'); APP_STR("\\ "); next_control = get_next(); // Integer category code. if(next_control == constant) { APP_STR("\\WO{"); while(id_first < id_loc) app_tok(*id_first++); app(@'}'); @<Finish appending format...@>@; } } @ Here we append a limbo text definition of the form ``\.{@@l\ "text"}''. @<Start a limbo...@>= { SUPPRESS(limbo); app_proc(next_control); app_scrap(expr,maybe_math); /* First field: String. */ if((next_control = get_next()) != stringg) ERR_PRINT(W,"A string must follow @@l"); } @ Here we append an operator-overload command, of the form ``\.{@@v\ .IN.\ "\\\\in"\ +}''. @<Start an overloaded op...@>= { SUPPRESS(v); overload_ops = NO; app_proc(next_control); app_scrap(expr,maybe_math); /* First field: The operator to be overloaded. */ if(valid_op(next_control = get_next())) { @<Append an operator name@>@; app(@' '); @~ app_scrap(expr,no_math); /* Second field: Replacement text. */ if((next_control = get_next()) == stringg) { @<Append commands for beginning of string@>@; @<Append the basic str...@>@; app_scrap(expr,yes_math); /* Third field: Cat of this operator. */ if(valid_op(next_control=get_next())) { app(@' '); @~ app_scrap(expr,no_math); @<Append an operator...@>@; next_control = get_next(); } } } } @ The last field of an \.{@@v}~command can be either an operator like~`\.+' or an identifier like~`\.{.IN.}'. @<Append an operator...@>= { switch(next_control) { case identifier: ERR_PRINT(W,"For future compatibility, please use syntax .NAME. for \ overloading dot operators"); APP_ID; break; case dot_const: @<Append commands for beginning of string@>@; app(wt_style.dot_delimiter.begin); app_ASCII_str(dot_op.name + 1); app(wt_style.dot_delimiter.end); app(@'}'); break; default: app(@'{'); app_overload(); app(@'}'); break; } app_scrap(expr,yes_math); } @ @<Start an overloaded id...@>= { SUPPRESS(w); app_proc(next_control); app_scrap(expr,maybe_math); /* First field: The identifier to be overloaded. */ if((next_control = get_next()) == identifier) { ASCII HUGE *id_first0, HUGE *id_loc0; /* Remember first identifier. */ id_first0 = id_first; id_loc0 = id_loc; APP_ID; app(@' '); @~ app_scrap(expr,no_math); /* Second field: Replacement text. */ switch(next_control = get_next()) { case @'\\': if((next_control=get_next()) != identifier) break; goto quick_code1; case QUICK_FORMAT: id_first = id_first0; id_loc = id_loc0; quick_code1: @<Append commands for beginning of string@>@; APP_STR("\\\\"); *id_loc = '\0'; // Make name into string. app_ASCII_str(id_first); app(@'}'); app_scrap(expr,yes_math); next_control = get_next(); break; case stringg: @<Append commands for beginning of string@>@; @<Append the basic str...@>@; app_scrap(expr,yes_math); next_control = get_next(); break; } } } @ Finally, when the \TeX\ and definition parts have been treated, we have |next_control>=begin_code|. We will make the global variable |this_module| point to the current module name, if it has a name; otherwise, it will be equal to |name_dir|. @<Global...@>= EXTERN name_pointer this_module; // The current module name, or zero. EXTERN name_pointer the_module; /* The module we're working on; equal to |cur_module| at the beginning of the entire module. */ @ @<Part 2@>=@[ SRTN trns_code(VOID) { the_part = CODE; this_module = name_dir; parsing_mode = OUTER; if (next_control<=module_name) { @% emit_space_if_needed; OUT_STR("\\WY"); ini_stack; @<Store the output switch@>@; @<Append \.{\\WP}@>@; if (next_control==begin_code) { /* We've hit an \.{@@a}. */ boolean nuweb_mode0 = nuweb_mode; unnamed_section = YES; params = global_params;// Unnamed module is in global language. nuweb_mode = nuweb_mode0; frz_params(); the_module = NULL; @<Maybe start column mode.@>@; @<Append the scrap header for code@>@; // !!!!! } else { /* Named module. */ unnamed_section = NO; if(cur_module != NULL) { params = cur_module->mod_info->params; // Restore state for this module. frz_params(); this_module = cur_module; } the_module = cur_module; @<Check that |=| or |==| follows this module name, and emit the scraps to start the module definition@>; } /* Now scan the whole module. */ while (next_control<=module_name) { outr_parse(); @<Emit the scrap for a module name if present@>; } @<Reset the language before translation@>@; fin_C(); unnamed_section = NO; } } @ @<Append the scrap header for the definition part@>= { app_hdr("defs"); } @ @<Append the scrap header for code@>= { app_hdr("code"); } @ The scrap header needs the file name as argument to \.{\\Wunnamed}; it must be escaped. We use the |mod_text| buffer as a scratch area. @<Part 2@>=@[ SRTN app_hdr FCN((section_part)) CONST char *section_part C1("Either \"code\" or \"defs\"")@; { outer_char temp[1000], *temp_end = temp + 1000, *t_first, *t_loc; t_first = temp; STRCPY(t_first, params.OUT_FILE_NAME); to_ASCII(t_first); t_first = esc_buf((ASCII HUGE *)t_first+STRLEN(t_first)+1, (ASCII HUGE *)temp_end, (CONST ASCII HUGE *)t_first, YES); to_outer((ASCII HUGE *)t_first); t_loc = t_first + STRLEN(t_first) + 1; sprintf((char *)t_loc, " \\Wunnamed{%s}{%s}%%\n", section_part, (char *)t_first); APP_STR(t_loc); app_scrap(ignore_scrap,no_math); } @ @<Check that |=|...@>= { LANGUAGE saved_language = language; if(language==TEX) language = C; /* Allow optional `\.{+=}'. */ do next_control=get_next(); while (next_control==@'+'); language = saved_language; switch(next_control) { case compound_assignment: if(assignment_token != plus_eq) { ERR_PRINT(W,"Invalid compound assignment after section \ name; please use one of `=', `==', or `+='"); @.Invalid compound assignment...@> break; } /* The |plus_eq| falls through to the next case. */ case @'=': case eq_eq: @<Maybe start column mode.@>@; // Positioned after `\.{@@<\dots@@>=}'. break; default: ERR_PRINT(W,"You need an = sign after the section name"); @.You need an = sign...@> break; } #if(0) if (out_ptr>out_buf+2 && STRNCMP(out_ptr-2,"\\WY",3)==0) #endif { app(backup); /* The module name will be flush left */ app(backup); } @.\\WY@> APP_FLAG(mod,this_module,name_dir); cur_xref = (xref_pointer)this_module->xref; APP_STR("${}"); if(cur_xref->num != module_count+def_flag) { APP_STR("\\PQ"); // Module name is multiply defined, @.\\PQ@> this_module=name_dir; // so we won't give cross-reference info here. } else APP_STR("\\WSQ"); // Output the equivalence sign~`$\equiv$'. @.\\WSQ@> APP_STR("{}$"); app_misc(w_style.misc.named_preamble); // Optional stuff from style file. app(force); // This forces a line break unless `\.{@@~}' follows. app_scrap(ignore_scrap,no_math); } @ Because the language may have changed in the middle of a module, we must reset it before we perform the translation of the scraps that have just been collected. @<Reset the language...@>= { boolean nuweb_mode0 = nuweb_mode; params = (the_module == NULL ? global_params : the_module->mod_info->params); nuweb_mode = nuweb_mode0; frz_params(); } @ When we append miscellaneous stuff from the style file, we must be a bit clever. If the stuff contains something like~`\.{\\7}' and we just appended it raw, it wouldn't be subject to the later output mechanism that takes the maximum of adjacent |force| and |big_force| tokens. Thus, we will translate the macros~`\.{\\1}' to~`\.{\\8}' into their internal tokens before appending them. Other text in the miscellaneous string is just left alone. @<Part 2@>=@[ SRTN app_misc FCN((s)) outer_char *s C1("")@; { outer_char *s0; for(s0=s; *s; ) if(*s++ == '\\') { if(isdigit(*s) && *s != '0' && *s != '8' && *s != '9') { *(s-1) = '\0'; // Terminate for |app_str|. APP_STR(s0); switch(*s) { case '1': app(indent); @~ break; case '2': app(outdent); @~ break; case '3': app(opt); @~ break; case '4': app(backup); @~ break; case '5': app(break_space); @~ break; case '6': app(force); @~ break; case '7': app(big_force); @~ break; } *(s-1) = '\\'; // Put it back for the next time. s0 = ++s; // Skip the digit. } } APP_STR(s0); } @ @<Maybe start column mode.@>= { if(!nuweb_mode && ((FORTRAN_LIKE(language) && !free_form_input) || (language==TEX)) ) { @<Set up column mode@>@; next_control = ignore; } else { @<Kill rest of line; no |auto_semi|@>@; next_control = (nuweb_mode ? begin_meta : get_next()); // !!!!! } } @ @<Kill rest of line; no...@>= if(Fortran88 && auto_semi) { loc = limit + 1; chk_end = NO; } @ When shifting into \FORTRAN\ mode, we skip any stuff on the same line as the~\.{@@n}, because surely that text isn't in the appropriate columns. @<Set up col...@>= { loc = limit + 1; // Skip rest of line. chk_end = NO; column_mode = YES; } @ @<Emit the scrap...@>= if (next_control<module_name) { switch(next_control) { case m_if: case m_ifdef: case m_ifndef: case m_undef: case m_else: case m_elif: case m_endif: case m_for: case m_endfor: case m_line: case WEB_definition: pre_scrap(next_control); break; default: ERR_PRINT(W,"You can't do that in code text"); @.You can't do that...@> break; } next_control=get_next(); } else if (next_control==module_name) { @<Append a module name@>@; next_control = (nuweb_mode ? begin_meta : get_next()); // !!!!! } @ Tack on the representation of a module name. @<Append a mod...@>= { if(cur_module) APP_FLAG(mod,cur_module,name_dir); app_scrap(cur_module != NULL ? cur_module->mod_ilk : expr,maybe_math); } @ Build a preprocessor scrap. @<Part 2@>=@[ SRTN pre_scrap FCN((last_control)) eight_bits last_control C1("")@; { scrap_pointer save_base; text_pointer p,q; LANGUAGE saved_language = language; app(force); app_proc(last_control); switch(last_control) { case WEB_definition: @<Start a deferred macro definition@>; break; } p = text_ptr; freeze_text; save_base = scrp_base; scrp_base = scrp_ptr + 1; *limit = @'@@'; @~ *(limit+1) = @'m'; /* Stop the |outr_parse|. */ next_control = ignore; if(language==TEX) language = C; outr_parse(); language = saved_language; if(last_control==WEB_definition) {app_scrap(semi,maybe_math);} q = translate(OUTER); scrp_ptr = scrp_base - 1; scrp_base = save_base; APP_FLAG(tok,p,tok_start); APP_FLAG(tok,q,tok_start); APP_STR("\\WPs"); app(force); // Terminate preprocessor command. app_scrap(ignore_scrap,no_math); } @ @<Start a deferred macro...@>= { if( (next_control=get_next())!=identifier) ERR_PRINT(M,"Improper deferred macro definition: \ expected identifier"); @.Improper macro definition@> else { app(@'$'); APP_ID; if (*loc==@'(') { reswitch: switch (next_control=get_next()) { case @'(': case @',': app(next_control); goto reswitch; case identifier: APP_ID; goto reswitch; case ellipsis: APP_STR("\\dots"); if( (next_control=get_next()) != @')') { ERR_PRINT(M,"Improper deferred macro \ definition: expected ')' after ellipsis"); break; } case @')': app(next_control); app(@' '); break; default: ERR_PRINT(M,"Improper deferred macro definition: \ unrecognized token within argument list"); break; } } app(@'$'); app(break_space); app_scrap(ignore_scrap,no_math); /* scrap won't take part in the parsing */ } } @ Cross references relating to a named module are given after the module ends. @<Show cross...@>= if (this_module>name_dir) { @<Rearrange the list pointed to by |cur_xref|@>; footnote(def_flag); footnote(0); } @ To rearrange the order of the linked list of cross-references, we need four more variables that point to cross-reference entries. We'll end up with a list pointed to by |cur_xref|. @<Global...@>= EXTERN xref_pointer next_xref, this_xref, first_xref, mid_xref; /* Pointer variables for rearranging a list */ @ We want to rearrange the cross-reference list so that all the entries with |def_flag| come first, in ascending order; then come all the other entries, in ascending order. There may be no entries in either one or both of these categories. @<Rearrange the list...@>= first_xref = (xref_pointer)this_module->xref; this_xref=first_xref->xlink; /* Bypass current module number */ if (this_xref->num>def_flag) { mid_xref=this_xref; cur_xref=0; /* This value doesn't matter */ do { next_xref=this_xref->xlink; this_xref->xlink=cur_xref; cur_xref=this_xref; this_xref=next_xref; } while (this_xref->num>def_flag); first_xref->xlink=cur_xref; } else mid_xref=xmem; /* First list null */ cur_xref=xmem; while (this_xref!=xmem) { next_xref=this_xref->xlink; this_xref->xlink=cur_xref; cur_xref=this_xref; this_xref=next_xref; } if (mid_xref>xmem) mid_xref->xlink=cur_xref; else first_xref->xlink=cur_xref; cur_xref=first_xref->xlink; @ The |footnote| procedure gives cross-reference information about multiply defined module names (if the |flag| parameter is |def_flag|), or about the uses of a module name (if the |flag| parameter is zero). It assumes that |cur_xref| points to the first cross-reference entry of interest, and it leaves |cur_xref| pointing to the first element not printed. Typical outputs: `\.{\\WA\ section 101.}'; `\.{\\WU\ sections 370 and 1009.}'; `\.{\\WA\ sections 8, 27\\*, and 64.}'. @<Part 3@>=@[ SRTN footnote FCN((flag)) /* Outputs module cross-references */ sixteen_bits flag C1("")@; { xref_pointer q; /* Cross-reference pointer variable */ if (cur_xref->num<=flag) return; fin_line(); OUT_STR("\\W"); @.\\WA@> @.\\WU@> out( flag==0 ? @'U' : @'A'); OUT_STR(" section"); @<Output all the module numbers on the reference list |cur_xref|@>; out(@'.'); fin_line(); } @ The following code distinguishes three cases, according as the number of cross-references is one, two, or more than two. Variable~|q| points to the first cross-reference, and the last link is a zero. @<Output all the module numbers...@>= q=cur_xref; if (q->xlink->num>flag) out(@'s'); // Pluralize. out(@'~'); WHILE() { out_mod(cur_xref->num-flag,ENCAP); cur_xref=cur_xref->xlink; /* Point to the next cross-reference to output */ if (cur_xref->num<=flag) break; if (cur_xref->xlink->num>flag || cur_xref!=q->xlink) out(@','); /* Not the last of two */ out(@' '); if (cur_xref->xlink->num<=flag) OUT_STR("and~"); /* the last */ } @ @<Output the code for the end of a module@>= { if(in_module && output_on) { outer_char temp[100]; SPRINTF(100,temp,`"\\fi %% End of %s", MOD_TRANS(module_count)`); OUT_STR(temp); @~ fin_line(); @.\\fi@> mfree(); in_module = NO; flush_buffer(out_buf,NO); // Insert a blank line for beauty. } } @* PHASE THREE PROCESSING. We are nearly finished! \.{WEAVE}'s only remaining task is to write out the index and module list, after sorting the identifiers and index entries. The index and module list are written into separate files, by default \.{INDEX.tex} and \.{MODULES.tex}. If the user has set the |no_xref| flag (the \.{-x} option on the command line), just finish off the page, omitting the index, module name list, and table of contents. (Fix this up.) @d NEW_TeX(file_name) if(tex_file != stdout) { fclose(tex_file); if((tex_file=FOPEN(file_name,"w"))==NULL) FATAL(W, "! Can't open output file ",file_name); } @<Part 3@>=@[ SRTN phase3(VOID) { language = global_language; if (no_xref && !prn_contents) { fin_line(); @<Finish off |phase3|@>@; } else { // Print cross-reference information. outer_char HUGE *temp_ndx,HUGE *temp_mds; IN_COMMON outer_char wbflnm0[]; temp_ndx = GET_MEM("temp_ndx",MAX_FILE_NAME_LENGTH,outer_char); temp_mds = GET_MEM("temp_mds",MAX_FILE_NAME_LENGTH,outer_char); phase = 3; nuweb_mode = NO; // Force full output of identifiers. if(prn_index) { OUT_STR("\\input "); OUT_STR(xpn_name(&temp_ndx,MAX_FILE_NAME_LENGTH, w_style.indx.tex,wbflnm0)); fin_line(); } if(prn_modules) { OUT_STR("\\input "); OUT_STR(xpn_name(&temp_mds,MAX_FILE_NAME_LENGTH, w_style.modules.tex,wbflnm0)); fin_line(); fin_line(); @<Print the command line, etc.@>; @.\\Winfo@> } if(prn_contents) { outer_char temp[20]; OUT_STR(w_style.contents.preamble); SPRINTF(20, temp, `"{%i}", module_count`); OUT_STR(temp); OUT_STR(w_style.contents.postamble); fin_line(); @.\\Wcon@> } else @<Finish off |phase3|@>@; if(prn_index) @<Output the index@>@; if(prn_modules) @<Output all the module names@>@; if(tex_file != stdout) fclose(tex_file); } CLR_PRINTF(info,("\nDone.")); chk_complete(); /* Was all of the change file used? */ } @ @<Finish off |phase3|@>= { OUT_STR("\\vfill\\FWEBend"); @~ fin_line(); } @ @d N_CMD 1000 @<Print the command line...@>= @{ outer_char HUGE *temp; @b temp = GET_MEM("temp",N_CMD,outer_char); OUT_STR(w_style.modules.info); OUT_STR(cmd_ln_buf); @~ fin_line(); /* Print a message identifying the global language. */ SPRINTF(N_CMD,temp,`" {%s}",language_name_ptr(global_language)`); OUT_STR(temp); @~ fin_line(); FREE_MEM(temp,"temp",N_CMD,outer_char); } @ Here we escape an |ASCII| string into another buffer. We return the beginning of the output buffer. @d TO_TEMP(val) if(temp < temp_end) *temp++ = val; else OVERFLW("Esc_buf:temp","")@; @<Part 3@>=@[ ASCII HUGE *esc_buf FCN((temp,temp_end,buf,all_cases)) ASCII HUGE *temp C0("Put it into here.")@; CONST ASCII HUGE *temp_end C0("End of |temp|.")@; CONST ASCII HUGE *buf C0("Translate from here.")@; boolean all_cases C1("")@; { ASCII HUGE *temp0 = temp; while(*buf != '\0') { switch(*buf) { @<Special \TeX\ cases@>: if(!all_cases) break; @<Other string cases@>: TO_TEMP(@'\\'); break; } TO_TEMP(*buf++); } TO_TEMP('\0'); return temp0; // Return the beginning of the output buffer. } @ Just before the index comes a list of all the changed modules, including the index module itself. @<Global...@>= EXTERN sixteen_bits k_module; /* Runs through the modules */ @ @<Tell about changed modules@>= { /* Remember that the index is already marked as changed */ k_module=0; while (!chngd_module[++k_module]); OUT_STR("\\Wch "); @.\\Wch@> out_mod(k_module,ENCAP); while (k_module < module_count) { while (!chngd_module[++k_module]); /* Skip over unchanged modules. */ OUT_STR(", "); out_mod(k_module,ENCAP); } out(@'.'); } @ A left-to-right radix sorting method is used, since this makes it easy to adjust the collating sequence and since the running time will be at worst proportional to the total length of all entries in the index. We put the identifiers into different lists based on their first characters. (Uppercase letters are put into the same list as the corresponding lowercase letters, since we want to have `$t<\\{TeX}<\&{to}$'.) The list for character~|c| begins at location |bucket[c]| and continues through the |blink| array. @<Global...@>= EXTERN name_pointer bucket[128]; // One for each standard |ASCII char|. EXTERN name_pointer next_name; /* Successor of |cur_name| when sorting */ IN_COMMON hash_pointer h; /* Index into |hash| */ IN_COMMON BUF_SIZE max_names; /* number of identifiers, strings, module names; must be less than~10240 */ EXTERN name_pointer HUGE *blink; /* Links in the buckets */ EXTERN ASCII last_letter SET('\0'); /* Used for separating groups in the index. */ @ @<Alloc...@>= ALLOC(name_pointer,blink,ABBREV(max_names),max_names,0); @ To begin the sorting, we go through all the hash lists and put each entry having a nonempty cross-reference list into the proper bucket. @<Do the first pass of sorting@>= @{ int c; @b for (c=0; c<=127; c++) bucket[c]=NULL; for (h=hash; h<=hash_end; h++) { next_name=*h; while (next_name) { cur_name=next_name; next_name=cur_name->link; if ((xref_pointer)cur_name->xref != xmem) { c=(cur_name->byte_start)[0]; c = A_TO_LOWER(c); blink[cur_name-name_dir]=bucket[c]; bucket[c]=cur_name; } } } } @ During the sorting phase we shall use the |cat| and |trans| arrays from \.{WEAVE}'s parsing algorithm and rename them |depth| and |head|. They now represent a stack of identifier lists for all the index entries that have not yet been output. The variable |sort_ptr| tells how many such lists are present; the lists are output in reverse order (first |sort_ptr|, then |sort_ptr-1|, etc.). The |j|th list starts at |head[j]|, and if the first |k| characters of all entries on this list are known to be equal we have |depth[j]=k|. @ @<Rest of |trans_plus| union@>= name_pointer Head; @ @f sort_pointer scrap_pointer @d depth cat /* reclaims memory that is no longer needed for parsing */ @d head trans_plus.Head /* ditto */ @d sort_pointer scrap_pointer /* ditto */ @d sort_ptr scrp_ptr /* ditto */ @d max_sorts max_scraps /* ditto */ @<Global...@>= EXTERN eight_bits cur_depth; /* Depth of current buckets */ EXTERN ASCII HUGE *cur_byte; /* Index into |byte_mem| */ EXTERN sixteen_bits cur_val; /* Current cross-reference number */ EXTERN sort_pointer mx_sort_ptr; /* largest value of |sort_ptr| */ @ @<Set init...@>= mx_sort_ptr=scrp_info; @ The desired alphabetic order is specified by the |collate| array; namely, |collate[0]==0 <collate[1]<@t$\cdots$@><collate[max_collate]|. The collate array can be set by the style file entry \.{collate}. @<Global...@>= EXTERN ASCII collate[128]; // collation order. EXTERN int max_collate; // Last index in |collate|. @ We use the order $\hbox{null}<\.\ <\hbox{other characters}<\.\_< \.A=\.a<\cdots<\.Z=\.z<\.0<\cdots<\.9.$ @<Set init...@>= collate[0] = 0; @ Procedure |unbucket| goes through the buckets and adds nonempty lists to the stack, using the collating sequence specified in the |collate| array. The parameter to |unbucket| tells the current depth in the buckets. Any two sequences that agree in their first 255 character positions are regarded as identical. @d INFTY 255 // $\infty$ (approximately). @<Part 3@>=@[ SRTN unbucket FCN((d)) /* Empties buckets having depth |d| */ eight_bits d C1("")@; { int c; /* Index into |bucket|. {\it Must be |int|.} */ for (c=max_collate; c>= 0; c--) if (bucket[collate[c]]) { if (sort_ptr>=scrp_end) OVERFLW("sort levels",ABBREV(max_scraps)); sort_ptr++; if (sort_ptr>mx_sort_ptr) mx_sort_ptr = sort_ptr; sort_ptr->depth = (eight_bits)(c==0 ? INFTY : d); sort_ptr->head = bucket[collate[c]]; bucket[collate[c]] = NULL; } } @ @<Sort and output the index@>= w_style.indx.collate = x__to_ASCII((outer_char *)w_style.indx.collate); max_collate = STRLEN(w_style.indx.collate); STRNCPY(collate+1,w_style.indx.collate,max_collate); sort_ptr=scrp_info; unbucket(1); while (sort_ptr>scrp_info) { cur_depth=sort_ptr->depth; if (blink[sort_ptr->head-name_dir]==0 || cur_depth==INFTY) @<Output index entries for the list at |sort_ptr|@>@; else @<Split the list at |sort_ptr| into further lists@>; } @ @<Split the list...@>= @{ ASCII c; @b next_name=sort_ptr->head; do { cur_name=next_name; next_name=blink[cur_name-name_dir]; cur_byte=cur_name->byte_start+cur_depth; if (cur_byte==(cur_name+1)->byte_start) c=0; /* hit end of the name */ else { c = *cur_byte; c = A_TO_LOWER(c); } blink[PTR_DIFF(size_t,cur_name,name_dir)]=bucket[c]; bucket[c]=cur_name; } while (next_name); --sort_ptr; unbucket((eight_bits)(cur_depth+(eight_bits)1)); } @ @<Output index...@>= { cur_name = sort_ptr->head; @<Separate the groups if necessary@>@; do { if(cur_name->defined_type(language) < 0x80) { /* Write index entry for one identifier. */ OUT_STR(w_style.indx.item_0); @.\\:@> @<Output the name at |cur_name|@>; @<Output the cross-references at |cur_name|@>; } cur_name = blink[cur_name-name_dir]; } while (cur_name); --sort_ptr; } @ Here we insert an optional macro between the different groups. @d NON_TEX_MACRO '\1' @<Separate the groups...@>= { ASCII letter = *cur_name->byte_start; /* In some special cases in \Cpp, the identifier may be a \TeX\ macro beginning with~'\.\\' at this point. We must then take special precautions. In particular, we assign a non-null, non-printable value to |letter|. */ if(letter == @'\\' && cur_name->ilk==normal && language!=TEX) letter = NON_TEX_MACRO; else letter = A_TO_LOWER(letter); if(letter != last_letter) { if(last_letter) OUT_STR(w_style.indx.group_skip); /* Separate groups, but not for the very first one. */ if(w_style.indx.lethead_flag && letter != NON_TEX_MACRO) { OUT_STR(w_style.indx.lethead_prefix); switch(letter) { @<Special string cases@>: out(@'\\'); } out((w_style.indx.lethead_flag > 0 ? A_TO_UPPER(letter) : A_TO_LOWER(letter))); OUT_STR(w_style.indx.lethead_suffix); } } last_letter = letter; } @ @<Output the name...@>= @{ boolean output_type; boolean all_uc = cur_name->info.upper_case; @b switch (cur_name->ilk) { case normal: output_type = IDENTIFIER; if(is_intrinsic(cur_name)) OUT_STR(pfmt->intrinsic); // E.g., |sqrt|. else if(is_keyword(cur_name)) OUT_STR(ALL_UC ? pfmt->KEYWORD : pfmt->keyword); // E.g., |@r BLOCKSIZE|. else if(language==TEX) OUT_STR(pfmt->typewritr); // E.g., \.{\\hfill}. else if (length(cur_name)==1) OUT_STR(pfmt->short_id); // E.g., |a|. else @<Output the appropriate identifier prefix@>@; break; @.\\\AT!@> @.\\|@> @.\\\\@> case roman: output_type = INDEX_ENTRY; @~ break; case wildcard: OUT_STR(pfmt->wildcrd); @~ output_type = INDEX_ENTRY; @~ break; @.\\9@> case typewriter: OUT_STR(pfmt->typewritr); output_type = INDEX_ENTRY; @~ break; @.\\.@> default: OUT_STR(ALL_UC ? pfmt->RESERVED : pfmt->reserved); output_type = IDENTIFIER; @~ break; // E.g., |int|. @.\\\&@> } out_name(output_type,cur_name); } @ Section numbers that are to be underlined are enclosed in `\.{\\[}$\,\ldots\,$\.]'. @d ENCAP YES @d NO_ENCAP NO @<Output the cross-references...@>= @<Invert the cross-reference list at |cur_name|, making |cur_xref| the head@>; OUT_STR(w_style.indx.delim_0); /* Immediately after identifier. */ WHILE() { cur_val=cur_xref->num; if (cur_val<def_flag) out_mod(cur_val,ENCAP); else { OUT_STR(w_style.indx.underline_prefix); out_mod(cur_val-def_flag,ENCAP); OUT_STR(w_style.indx.underline_suffix); } @.\\[@> /* If the language of this module isn't the global language, mark it in the |w_style|. */ if((LANGUAGE)cur_xref->Language != global_language) { char temp[50]; sprintf(temp,"%s%s%s", (char *)w_style.indx.language_prefix, (char *)language_symbol((LANGUAGE)cur_xref->Language), (char *)w_style.indx.language_suffix); OUT_STR(temp); } cur_xref=cur_xref->xlink; if(cur_xref == xmem) break; OUT_STR(w_style.indx.delim_n); /* Between identifiers. */ } out(@'.'); @~ fin_line(); @ List inversion is best thought of as popping elements off one stack and pushing them onto another. In this case |cur_xref| will be the head of the stack that we push things onto. @<Invert the cross-reference list at |cur_name|, making |cur_xref| the head@>= this_xref = (xref_pointer)cur_name->xref; cur_xref=xmem; do { next_xref=this_xref->xlink; this_xref->xlink=cur_xref; cur_xref=this_xref; this_xref=next_xref; } while (this_xref!=xmem); @ The following recursive procedure walks through the tree of module names and prints them. @^recursion@> @<Part 3@>=@[ SRTN mod_print FCN((p)) /* Print all module names in subtree |p|. */ name_pointer p C1("")@; { if (p) { mod_print(p->llink); OUT_STR("\\:"); @.\\:@> tok_ptr=tok_mem+1; text_ptr=tok_start+1; scrp_ptr=scrp_info; ini_stack; APP_FLAG(mod,p,name_dir); make_output(); footnote(0); /* |cur_xref| was set by |make_output| */ fin_line(); mod_print(p->rlink); } } @ @<Output the index@>= { writing(YES,temp_ndx); if(tex_file == stdout) puts(""); NEW_TeX(temp_ndx); if (change_exists) { @<Tell about changed modules@>; fin_line(); fin_line(); } OUT_STR(w_style.indx.preamble); @~ fin_line(); @.\\Winx@> @<Do the first pass of sorting@>; @<Sort and output the index@>; OUT_STR(w_style.indx.postamble); @~ fin_line(); @.\\Wfin@> } @ @<Output all the module names@>= { writing(BOOLEAN(!prn_index),temp_mds); NEW_TeX(temp_mds); OUT_STR(w_style.modules.preamble); @~ fin_line(); @.\\Wmods@> mod_print(root); OUT_STR(w_style.modules.postamble); @~ fin_line(); } @ Statistics are printed when the command-line option~\.{-s} is used. @<Part 3@>=@[ SRTN see_wstatistics(VOID) { CLR_PRINTF(info,("\n\nMEMORY USAGE STATISTICS:\n")); STAT0("names",sizeof(*name_ptr), SUB_PTRS(name_ptr,name_dir),max_names,UPPER(max_names),","); STAT0("cross-references",sizeof(*xref_ptr), SUB_PTRS(xref_ptr,xmem),max_refs,UPPER(max_refs),","); STAT0("bytes",sizeof(*byte_ptr), SUB_PTRS(byte_ptr,byte_mem),max_bytes,UPPER(max_bytes),";"); CLR_PRINTF(info,(" parsing required\n")); STAT0("scraps",sizeof(*mx_scr_ptr), SUB_PTRS(mx_scr_ptr,scrp_base),max_scraps,UPPER(max_scraps),","); STAT0("texts",sizeof(*mx_text_ptr), SUB_PTRS(mx_text_ptr,tok_start),max_texts,UPPER(max_texts),","); STAT0("tokens",sizeof(*mx_tok_ptr), SUB_PTRS(mx_tok_ptr,tok_mem),max_toks,UPPER(max_toks_w),","); STAT0("stack levels",sizeof(*mx_stck_ptr), SUB_PTRS(mx_stck_ptr,stack),stck_size,UPPER(stck_size_w),";"); CLR_PRINTF(info,(" sorting required")); printf(" %lu level(s).\n",SUB_PTRS(mx_sort_ptr,scrp_info)); mem_avail(1); /* How much memory left at end of run? */ } @ The following routines are invoked by \.{common.web}, but are used only by \.{TANGLE}. @<Part 3@>=@[ SRTN predefine_macros(VOID) {} SRTN open_out(VOID) {} boolean was_opened FCN((name,global_scope,pname,pptr)) CONST outer_char HUGE *name C0("")@; boolean global_scope C0("")@; outer_char HUGE * HUGE *pname C0("")@; FILE **pptr C1("")@; { *pname = GET_MEM("*pname",STRLEN(name)+1,outer_char); STRCPY(*pname,name); return NO; } SRTN ini_tokens FCN((language0)) LANGUAGE language0 C1("")@; {} @* STYLE FILE. The style file is common to \FWEAVE\ and \FTANGLE. See \.{style.web}. @<Include...@>= #include "map.h" // Relations between style file keywords and internal arrays. @* INDEX. If you have read and understood the code for Phase~III above, you know what is in this index and how it got here. All modules in which an identifier is used are listed with that identifier, except that reserved words are indexed only when they appear in format definitions, and the appearances of identifiers in module names are not indexed. Underlined entries correspond to where the identifier was declared. Error messages, control sequences put into the output, and a few other things like ``recursion'' are indexed here too.