Usenet 1994 January

home *** CD-ROM | disk | FTP | other *** search

/ Usenet 1994 January / usenetsourcesnewsgroupsinfomagicjanuary1994.iso / sources / misc / volume23 / texindex / part01 next >

Wrap

Text File | 1991-09-23 | 7.8 KB | 317 lines

Newsgroups: comp.sources.misc From: ib09@rz.uni-karlsruhe.de (Thomas Koenig) Subject: v23i006: texindex - generate index for LaTeX', Part01/01 Message-ID: <1991Sep22.201218.21826@sparky.imd.sterling.com> X-Md4-Signature: fbb6fd82a6b8a8afe622068d4812596b Date: Sun, 22 Sep 1991 20:12:18 GMT Approved: kent@sparky.imd.sterling.com Submitted-by: ib09@rz.uni-karlsruhe.de (Thomas Koenig) Posting-number: Volume 23, Issue 6 Archive-name: texindex/part01 Environment: UNIX This is texindex, a little utility for formatting LaTeX - generated *.idx (indexentry) - files into *.ind - files ready for inclusion as an index. It is written as a UNIX shell script, using the utilities expand, sed, sort, uniq and awk (old style). It has the following features: - parts enclosed in brackets [...] will be enclosed in \verb+...+ in the *.ind - file - entries consisting of several words will be rotated. If, for example, you have \index{foo bar} in your *.tex file, you'll find both "\item foo bar <page>" and "\item bar \subitem foo <page>" in your *.ind - file. This can be suppressed by inserting tildes between the words, as in \index{foo~bar}. - Commas can be used to get deeper in the \item hierarchy. Credits: The idea and some of the implementation comes from Aho, Weinberger and Kernighan: 'The Awk Programming Language'. I place this in the public domain; do with it whatever you please. Thomas Koenig ui0t@dkauni2.bitnet, ui0t@ibm3090.rz.uni-karlsruhe.dbp.de, ib09@rz.uni-karlsruhe.de (for now) #---------------------------------- cut here ---------------------------------- # This is a shell archive. Remove anything before this line, # then unpack it by saving it in a file and typing "sh file". # # Wrapped by <ib09@rz30.rz.uni-karlsruhe.de> on Thu Sep 19 21:16:45 1991 # # This archive contains: # README texindex ix.collapse ix.extract # ix.format ix.genkey ix.polish ix.rotate # # Error checking via wc(1) will be performed. unset LANG echo x - README cat >README <<'@EOF' This is texindex, a little utility for formatting LaTeX - generated *.idx (indexentry) - files into *.ind - files ready for inclusion as an index. It is written as a UNIX shell script, using the utilities expand, sed, sort, uniq and awk (old style). It has the following features: - parts enclosed in brackets [...] will be enclosed in \verb+...+ in the *.ind - file - entries consisting of several words will be rotated. If, for example, you have \index{foo bar} in your *.tex file, you'll find both "\item foo bar <page>" and "\item bar \subitem foo <page>" in your *.ind - file. This can be suppressed by inserting tildes between the words, as in \index{foo~bar}. - Commas can be used to get deeper in the \item hierarchy. Credits: The idea and some of the implementation comes from Aho, Weinberger and Kernighan: 'The Awk Programming Language'. I place this in the public domain; do with it whatever you please. -- Thomas Koenig ui0t@dkauni2.bitnet, ui0t@ibm3090.rz.uni-karlsruhe.dbp.de, ib09@rz.uni-karlsruhe.de (for now) @EOF set `wc -lwc <README` if test $1$2$3 != 301671068 then echo ERROR: wc results of README are $* should be 30 167 1068 fi chmod 644 README echo x - texindex cat >texindex <<'@EOF' expand $1.idx | sed -f ix.extract | sort -t +0 -1 +1n -2 | uniq | awk -f ix.collapse | awk -f ix.rotate | sed -f ix.genkey | sort -f -d | awk -f ix.format | sed -f ix.polish > $1.ind @EOF set `wc -lwc <texindex` if test $1$2$3 != 1041183 then echo ERROR: wc results of texindex are $* should be 10 41 183 fi chmod 755 texindex echo x - ix.collapse cat >ix.collapse <<'@EOF' # ix.collapse : awk - script # kollabiere Zahlenlisten fuer identische Begriffe # Eingabeformat : # string1 \t num # string2 \t num # string2 \t num # ... # Ausgabeformat: # string1 \t num # string2 \t num, num BEGIN { altwort = "ASDF!@#$*&%$IU11@" FS = OFS = "\t" } $1 != altwort { if (NR > 1) printf "\n" altwort = $1 printf "%s\t%s", $1, $2 next } { printf " %s",$2 } END { if (NR > 1) printf "\n" } @EOF set `wc -lwc <ix.collapse` if test $1$2$3 != 2884434 then echo ERROR: wc results of ix.collapse are $* should be 28 84 434 fi chmod 644 ix.collapse echo x - ix.extract cat >ix.extract <<'@EOF' # ix.extract : sed - script, um die Daten aus einem LaTeX - *.idx - File # rauszuholen. # Eingabeformat: # \indexentry{Begriff}{Seitenzahl} # Ausgabeformat: # Begriff \t Seitenzahl s/\\indexentry{$.*$}{$.*$}/\1 \2/ @EOF set `wc -lwc <ix.extract` if test $1$2$3 != 730219 then echo ERROR: wc results of ix.extract are $* should be 7 30 219 fi chmod 644 ix.extract echo x - ix.format cat >ix.format <<'@EOF' # ix.format : awk - script # entfernt Sortierschluessel, fuegt \item etc. ein BEGIN { FS = OFS = "\t" printf "\\begin{theindex}" } { n = split($2,feld,", ") if (n > 3) { for (i=4; i <=n; i++) feld[3] = feld[3] ", " feld[i] n = 3 } for (i = 1; feld[i]==altfeld[i] && i<=n ; i++) ; for (k=i; k <=n; k++) { printf "\n\\" for (j=1; j<k; j++) printf "sub" printf "item %s ",feld[k] } nzahl = split($3,zahlfeld," ") for (j=1; j<nzahl; j++) printf"%s, ",zahlfeld[j] printf "%s",zahlfeld[nzahl] for (j=1; j<=i; j++) altfeld[i] = feld[i] } END { printf "\n\\end{theindex}\n" } @EOF set `wc -lwc <ix.format` if test $1$2$3 != 33102635 then echo ERROR: wc results of ix.format are $* should be 33 102 635 fi chmod 644 ix.format echo x - ix.genkey cat >ix.genkey <<'@EOF' # ix.genkey : Generierung eines Sortierschluessels ohne Sonderzeichen # Eingabe: string1 string2 ... \t num num ... # Ausgabe: string1 string2 ... (ohne S.Z.) \t string1 string2 ... \t num num .. # # Ersetze global die Tilde durch Spaces s/~/ /g # kopiere die Zeile in den Zwischenpuffer h # alles hinter \t loeschen s/ .*$// # zum Sortieren loesche alle nicht - alphanumerischen Zeichen s/[^a-zA-Z0-9 ]//g # falls dabei eine leere Zeile rauskommt, setze ein Space rein s/^$/ / # kopiere die urspruengliche Zeile aus dem Puffer hinter den Speicher G # ersetze newline zwischen den Zeilen durch \t s/\n/ / # Falls ein Komma am Ende des Strings steht, loesche es s/, / / @EOF set `wc -lwc <ix.genkey` if test $1$2$3 != 20117669 then echo ERROR: wc results of ix.genkey are $* should be 20 117 669 fi chmod 644 ix.genkey echo x - ix.polish cat >ix.polish <<'@EOF' # ix.polish : sed - script zum letzten Nachpolieren der Eintraege # \item etc. einruecken, eckige Klammern in \verb - Anweisungen abaendern # # Packe vor jedes \item ein \indexspace /\\item/i\ \\indexspace # ruecke \items ein /^\\item/s/^/ / # ruecke \subitems etwas mehr ein /^\\subitem/s/^/ / # ruecke \subsubitems noch etwas mehr ein /^\\subsubitem/s/^/ / # aendere [ in \verb+ s/\[/\\verb+/g # aendere ] in + s/\]/+/g @EOF set `wc -lwc <ix.polish` if test $1$2$3 != 1667433 then echo ERROR: wc results of ix.polish are $* should be 16 67 433 fi chmod 644 ix.polish echo x - ix.rotate cat >ix.rotate <<'@EOF' # ix.rotate: awk - script zur Rotation von Index - Begriffen # Eingabe: # string1 string2 string3 ... \t num num ... # Ausgabe: # string1 string2 string3 ... \t num num ... # string2 string3 ... , string1 \t num num ... # string3 ..., string1 string2 \t num num ... # BEGIN { FS = OFS = "\t" } { print $1, $2 # Drucke die unrotierte Form for (i=1; (j=index(substr($1, i+1)," "))> 0;) { i += j # Finde jeden Blank und rotiere um ihn printf "%s, %s\t%s\n",substr($1,i+1),substr($1,1,i-1),$2 } } @EOF set `wc -lwc <ix.rotate` if test $1$2$3 != 1993513 then echo ERROR: wc results of ix.rotate are $* should be 19 93 513 fi chmod 644 ix.rotate exit 0 exit 0 # Just in case... -- Kent Landfield INTERNET: kent@sparky.IMD.Sterling.COM Sterling Software, IMD UUCP: uunet!sparky!kent Phone: (402) 291-8300 FAX: (402) 291-4362 Please send comp.sources.misc-related mail to kent@uunet.uu.net.