home *** CD-ROM | disk | FTP | other *** search
- /*
- * 78common.h - common functions for ISO 646 to 8-bit conversion
- *
- * Copyright 1989 Howard Lee Gayle
- *
- * $Header: 78common.h,v 1.2 89/08/24 13:03:41 howard Exp $
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 1,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Prerequisites:
- * limits.h
- * howard/port.h
- * howard/a2.h
- * howard/malf.h
- * howard/registers.i
- * cz.h
- * 78.h
- * 78????.h (defines byte2t[])
- */
-
- /* dif78 - compute trigram frequency difference for one word */
-
- PUBLIC int dif78 (wp, lim, tt)
- R1 bStrT wp; /* The word.*/
- R3 bStrT lim; /* Don't pass this.*/
- R4 triDifT *tt; /* Difference table.*/
-
- /* Function:
- * Sum the frequency differences of all the trigrams in a word.
- * Algorithm:
- * Special-case single-letter words. Otherwise, loop through
- * the word.
- * Returns:
- * Sum of the values in the difference table for each trigram in the word.
- * Notes:
- *
- */
- {
- R2 int r; /* Return.*/
-
- if (NULBSTR == wp) malf1 ("dif78: word NUL");
- if (EOS == B(*wp)) malf1 ("dif78: word empty");
- if (((triDifT *) NULL) == tt) malf1 ("dif78: no difference table");
- if ((EOS == B(wp[1])) || (lim == (wp + 1)))
- return (tt[TOTRI (TRIBEG, byte2t[B(*wp)], TRIEND)] - TRIBIAS);
- r = tt[TOTRI (TRIBEG, byte2t[B(*wp)], byte2t[B(wp[1])])] - TRIBIAS;
- for (; (EOS != B(wp[2])) && (lim != (wp + 2)); ++wp)
- r += tt[TOTRI (byte2t[B(*wp)], byte2t[B(wp[1])], byte2t[B(wp[2])])]-TRIBIAS;
- r += tt[TOTRI (byte2t[B(*wp)], byte2t[B(wp[1])], TRIEND)] - TRIBIAS;
- return (r);
- }
-
- /* frqmm - find minimum and maximum values in a trigram frequency table */
-
- PRIVATE void frqmm (fp, minp, pminp, maxp)
- R1 long *fp; /* Points to frequency table.*/
- R7 long *minp; /* Store min here.*/
- R8 long *pminp; /* Store positive min here.*/
- R9 long *maxp; /* Store max here.*/
-
- /* Function:
- * Find the minimum and maximum frequencies in the given table.
- * Algorithm:
- * Linear search.
- * Returns:
- *
- * Notes:
- *
- */
- {
- R2 long f; /* Current frequency.*/
- R6 long *fp2; /* End of fp[].*/
- R3 long l; /* Min.*/
- R4 long h; /* Max.*/
- R5 long p; /* Positive min.*/
-
- if (NULONGP == fp) malf1 ("frqmm: no frequency table");
- l = LONG_MAX;
- h = LONG_MIN;
- p = LONG_MAX;
- for (fp2 = fp + (TRIMAX + 1); fp != fp2;)
- {
- f = *fp++;
- if (f < l) l = f;
- if (f > h) h = f;
- if ((f > 0) && (f < p)) p = f;
- }
- if (NULONGP != minp) *minp = l;
- if (NULONGP != pminp) *pminp = p;
- if (NULONGP != maxp) *maxp = h;
- }
-
- /* mrdfrq - read in a trigram frequency file */
-
- PRIVATE void mrdfrq (fn, fp)
- R2 bStrT fn; /* File name.*/
- R3 long *fp; /* Points to frequency table.*/
-
- /* Function:
- * Read a file representation of a trigram frequency table into
- * its internal representation.
- * Algorithm:
- * Read each line in the file. Convert the frequency to a long.
- * Index into fp[] and store the value.
- * Returns:
- *
- * Notes:
- * 1) There is no checking for duplicate trigrams.
- * 2) fp[] is assumed to be zero on entry.
- */
- {
- R4 int a; /* Trigram code for first letter.*/
- R5 int b; /* Trigram code for second letter.*/
- R6 int c; /* Trigram code for third letter.*/
- R7 long f; /* Frequency.*/
- R8 streamT is; /* Input stream.*/
- unsigned ln = 0; /* Input line number.*/
- R1 bStrT p; /* Steps through lb[].*/
- bStrT p0; /* End of frequency.*/
- byteT lb[MLINE]; /* Line buffer.*/
-
- if (NULONGP == fp) malf1 ("mrdfrq: no frequency table");
- is = mfopen (fn, "r");
- while (NULBSTR != getlic (lb, MLINE, is, fn, &ln, 1, COMMENT))
- {
- f = mra2l (lb, NULBSTR, TRUE, S("Frequency"), 0L, (long) LONG_MAX, &p0);
- for (p = p0; ' ' == B(*p); ++p)
- ;
- if ((EOS == B(*p)) || (EOS == B(p[1])) || (EOS == B(p[2])))
- malf1 ("%s %u: bad trigram", fn, ln);
- a = (('(' == B(*p)) ? TRIBEG : byte2t[B(*p)]);
- b = byte2t[B(p[1])];
- c = ((')' == B(p[2])) ? TRIEND : byte2t[B(p[2])]);
- fp[TOTRI (a, b, c)] = f;
- }
- mfclose (is, fn);
- }
-
- /* mrdtri - read trigram table and handle errors */
-
- PUBLIC void mrdtri (sim, tt)
- bStrT sim; /* Simple part of file name.*/
- bStrT tt; /* Store trigram table here.*/
-
- /* Function:
- * Read in a trigram table.
- * Algorithm:
- * Call mopenp() to search path and open the file.
- * Call fread() to do the read. Close the file
- * Returns:
- *
- * Notes:
- *
- */
- {
- R1 streamT is; /* Input stream.*/
- byteT fnb[MFILE]; /* Store full paths here.*/
-
- is = mopenp (path, PATHSEP, sim, TTSUF, S("r"), fnb, MFILE);
- if ((1 != fread ((cStrT) tt, TRIMAX + 1, 1, is)) || ferror (is))
- malf1 ("%s: Read error", fnb);
- mfclose (is, fnb);
- }
-
- /* word78 - compute trigram index for one word */
-
- PUBLIC unsigned word78 (wp, lim, b2t, tt)
- R1 bStrT wp; /* The word.*/
- R4 bStrT lim; /* Don't pass this.*/
- R2 bStrT b2t; /* Byte to trigraph code map.*/
- R5 bStrT tt; /* Trigram table.*/
-
- /* Function:
- * Sum the frequencies of all the trigrams in a word.
- * Algorithm:
- * Special-case single-letter words. Otherwise, loop through
- * the word.
- * Returns:
- * Sum of the values in the trigram table for each trigram in the word.
- * Notes:
- *
- */
- {
- R3 unsigned r; /* Return.*/
-
- if (NULBSTR == wp) malf1 ("word78: word NUL");
- if (EOS == B(*wp)) malf1 ("word78: word empty");
- if (NULBSTR == b2t) malf1 ("word78: no code map");
- if (NULBSTR == tt) malf1 ("word78: no trigram table");
- if ((EOS == B(wp[1])) || (lim == (wp + 1)))
- return (tt[TOTRI (TRIBEG, b2t[B(*wp)], TRIEND)]);
- r = tt[TOTRI (TRIBEG, b2t[B(*wp)], b2t[B(wp[1])])];
- for (; (EOS != B(wp[2])) && (lim != (wp + 2)); ++wp)
- r += tt[TOTRI (b2t[B(*wp)], b2t[B(wp[1])], b2t[B(wp[2])])];
- r += tt[TOTRI (b2t[B(*wp)], b2t[B(wp[1])], TRIEND)];
- return (r);
- }
-