home *** CD-ROM | disk | FTP | other *** search
Text File | 1989-07-08 | 37.8 KB | 1,755 lines |
- Newsgroups: comp.sources.misc
- From: allbery@uunet.UU.NET (Brandon S. Allbery - comp.sources.misc)
- Subject: v07i063: information statistic and chi-square for 2-D contingency tables
- Reply-To: gwyn@BRL.MIL
-
- Posting-number: Volume 7, Issue 63
- Submitted-by: gwyn@BRL.MIL
- Archive-name: tot_info
-
- #!/bin/sh
- # Self-unpacking archive format. To unbundle, sh this file.
- echo 'Makefile' 1>&2
- cat >'Makefile' <<'END OF Makefile'
- # Makefile -- -- makefile for "tot_info" utility
-
- # last edit: 89/02/06 D A Gwyn
-
- # SCCS ID: @(#)tot_info.mk 1.1 (edited for publication)
-
- PRODUCT = tot_info
-
- MAKEFIL = Makefile
- HEADERS = chisq.h gamma.h
- CFILES = $(PRODUCT).c info.c gamma.c chisq.c # chisq.c is a "freebie"
- OBJECTS = $(PRODUCT).o info.o gamma.o # chisq.o
- LIBTEST = g_test
- TSRC = $(LIBTEST).c
- TOBJ = $(LIBTEST).o
- TFILES = $(PRODUCT).in $(PRODUCT).exp
- LIBES = -lm
- LLIBES = $(LIBES)
- MANSRCS = $(PRODUCT).1 chisq.3 gamma.3
-
- # "standard" UNIX utilities that may need invocations tweaked:
- INS = cp
- LINT = lint
- FLOW = cflow
- XREF = cxref -c -s -w132
-
- # printer configuration:
- LPR = lp
- PR = pr -f
-
- # DWB configuration:
- TOPT = -Ti300
- TMAC = -man
- EQN = eqn $(TOPT)
- TROFF = troff $(TOPT) $(TMAC)
- TPOST = | dimp
-
-
- $(PRODUCT): $(OBJECTS)
- $(CC) $(LDFLAGS) -o $@ $(OBJECTS) $(LIBES)
-
- chisq.o info.o: chisq.h
-
- gamma.o: gamma.h
-
- print: $(MAKEFIL) $(HEADERS) $(CFILES) $(TFILES) $(MANSRCS)
- $(PR) $(MAKEFIL) $(HEADERS) $(CFILES) $(TFILES) $(MANSRCS) | $(LPR)
-
- typeset: $(MANSRCS)
- $(EQN) $(PRODUCT).1 | $(TROFF) $(TPOST)
- $(EQN) chisq.3 | $(TROFF) $(TPOST)
- $(EQN) gamma.3 | $(TROFF) $(TPOST)
-
- lint: $(HEADERS) $(CFILES)
- $(LINT) $(CFILES) $(LLIBES) > $@
-
- flow: $(HEADERS) $(CFILES)
- $(FLOW) $(CFILES) > $@
-
- xref: $(HEADERS) $(CFILES)
- $(XREF) $(CFILES) > $@
-
- test: gamma_test tot_test
-
- tot_test: $(PRODUCT) $(TFILES)
- ./$(PRODUCT) < $(PRODUCT).in > $(PRODUCT).out
- @if cmp -s $(PRODUCT).exp $(PRODUCT).out ; \
- then echo 'Tested okay' ; \
- else echo '*** Test failed; differences:' ; \
- diff $(PRODUCT).exp $(PRODUCT).out ; \
- exit 1 ; \
- fi
-
- gamma_test: $(LIBTEST)
- ./$(LIBTEST)
-
- $(LIBTEST): $(TOBJ) gamma.o
- $(CC) $(LDFLAGS) -o $@ $(TOBJ) gamma.o $(LIBES)
-
- $(LIBTEST).o: gamma.h
-
- clean:
- -rm -f $(OBJECTS) $(TOBJ) $(LIBTEST) $(PRODUCT).out \
- lint flow xref core mon.out
-
- clobber: clean
- -rm -f $(PRODUCT)
- END OF Makefile
- echo 'chisq.3' 1>&2
- cat >'chisq.3' <<'END OF chisq.3'
- '\" e
- .TH CHISQ 3V VMB
- '\" last edit: 88/09/19 D A Gwyn
- '\" SCCS ID: @(#)chisq.3 1.2 (edited for publication)
- '\"
- '\" This source must be typeset via "eqn | troff -man"
- '\"
- .EQ
- delim @@
- .EN
- .SH NAME
- chisq \- independence tests for 2-way contingency tables
- .SH SYNOPSIS
- .ds cW (CW\" change to B (without the paren) if you don't have a CW font
- \f\*(cW
- #include "chisq.h"
- .br
- /* \fPThe following functions are declared in this header file.\f\*(cW */
- .sp
- double ChiSqTbl(int r, int c, const long *f, int *df);
- .br
- double InfoTbl(int r, int c, const long *f, int *df);\fP
- .SH DESCRIPTION
- \f\*(cWChiSqTbl\fP
- returns Pearson's @chi sup 2@ statistic
- for the \f\*(cWr\fP-by-\f\*(cWc\fP contingency table
- of observed frequencies of occurrence for each combination of
- row and column categories stored in row-major order
- in the vector starting at \f\*(cWf\fP.
- The computed number of degrees of freedom is stored into the location
- pointed to by \f\*(cWdf\fP.
- .br
- .EQ
- chi sup 2 ~==~ roman sum from i=0 to r-1 roman sum from j=0 to c-1
- { left ( { f sub ij ~-~ { f sub { i. } f sub { .j }} over N } right ) sup 2 }
- over { { f sub { i. } f sub { .j }} over N }
- .EN
- where the row and column sums are denoted
- @f sub { i. }~==~roman sum from j=0 to c-1 f sub ij@
- and
- @f sub { .j }~==~roman sum from i=0 to r-1 f sub ij@
- and the total number of occurrences is
- @N~==~roman sum from i=0 to r-1 roman sum from j=0 to c-1 f sub ij@.
- Terms involving zero row or column sum are omitted and the returned
- number of degrees of freedom are
- correspondingly reduced from the nominal value
- @(r-1)(c-1)@.
- .P
- \f\*(cWInfoTbl\fP
- returns 2 times Kullback's
- @"\v'-0.2'^\v'0.2'\h'-\w;^;u'I\^" ( H sub 1 : H sub 2 )@ statistic
- for a similar contingency table.
- The number of degrees of freedom to be used when relating this statistic
- to the @chi sup 2@ distribution is stored into the location
- pointed to by \f\*(cWdf\fP.
- (See \s-1DISCUSSION\s0 below.)
- .br
- .EQ
- 2 "\v'-0.2'^\v'0.2'\h'-\w;^;u'I\^" ( H sub 1 : H sub 2 ) ~==~
- 2 roman sum from i=0 to r-1 roman sum from j=0 to c-1
- { f sub ij log { { N f sub ij }
- over { f sub { i. } f sub { .j } } } }
- .EN
- where the row and column sums @f sub { i. }@ and @f sub { .j }@
- and the total number of observations @N@
- are the same as for Pearson's @chi sup 2@.
- .SH DISCUSSION
- @2 "\v'-0.2'^\v'0.2'\h'-\w;^;u'I\^" ( H sub 1 : H sub 2 )@
- represents twice the estimated information
- in favor of hypothesis @H sub 1@ over hypothesis @H sub 2@
- contained in the observed frequencies,
- where the \fInull hypothesis\fP @H sub 2@ is that the row and column
- categorizations are statistically independent and
- the \fIalternative hypothesis\fP @H sub 1@ is that they aren't.
- This statistic is asymptotically distributed as @chi sup 2@
- with @(r-1)(c-1)@ degrees of freedom;
- therefore by use of the \f\*(cWQChiSq\fP function (see \fIgamma\fP(3V))
- it can be converted to the probability that
- the statistic would be as large as was observed
- if the categorizations really are independent.
- This is of course the traditional use of Pearson's @chi sup 2@ statistic,
- which @2 "\v'-0.2'^\v'0.2'\h'-\w;^;u'I\^"@ approaches for large samples
- when @H sub 2@ is true.
- The main difference is that Pearson's @chi sup 2@ is not useful
- for small sample sizes,
- whereas @2 "\v'-0.2'^\v'0.2'\h'-\w;^;u'I\^"@ fully takes into account
- all available information for even the smallest samples.
- .P
- @2 "\v'-0.2'^\v'0.2'\h'-\w;^;u'I\^" ( H sub 1 : H sub 2 )@
- (along with its corresponding degrees of freedom
- for use with \f\*(cWQChiSq\fP) is \fIadditive\fP for independent samples,
- so that the information can be accumulated over the course of many
- independent experiments in order to improve one's ability
- to detect a violation of the null hypothesis.
- .SH CAVEATS
- Pearson's @chi sup 2@ test is unreliable for low frequencies;
- consequently it is usually recommended that
- categories be chosen to tally at least
- 5 occurrences in each bin.
- However, excessive combination of bins causes loss of information and
- consequently loss of discriminating power.
- Because @2 "\v'-0.2'^\v'0.2'\h'-\w;^;u'I\^"@ is correct for any sample size,
- it does not require combination of bins
- and is therefore immune from this problem.
- .bp
- .SH EXAMPLE
- \f\*(cW
- .ta 8n 16n 24n 32n 40n 48n 56n 64n
- .nf
- \&
- /*
- Example program to read table dimensions, then frequencies,
- and to print both statistics along with significance levels.
- */
- #include <stdio.h>
- #include <stdlib.h> /* for EXIT_SUCCESS */
- #include "chisq.h"
- #include "gamma.h" /* for QChiSq(\|) */
-
- #define MAXTBL 1000
- static long f[MAXTBL]; /* frequency tallies */
- static int r; /* # of rows */
- static int c; /* # of columns */
- #define x(i,j) f[(i)*c+(j)] /* convenient way to access freqs */
-
- static void Calc(char *name, double (*func)(int, int, const long *, int *))
- {
- int df; /* degrees of freedom for chi-square */
- double stat = (*func)(r, c, f, &df); /* computed statistic */
-
- /* print results */
- if ( stat >= 0.0 )
- (void)printf("%s = %5.2f\etdf = %2d\etq = %7.4f\en",
- name, stat, df, QChiSq(stat, df)
- );
- else
- (void)printf(stat < -3.5 ? "out of memory\en"
- : stat < -2.5 ? "table too small\en"
- : stat < -1.5 ? "negative frequency\en"
- : "too many zeros\en"
- );
- }
-
- int main(int argc, char *argv[])
- {
- register int i; /* row index */
- register int j; /* column index */
-
- while ( scanf("%d %d\en", &r, &c) == 2 ) /* start new table */
- {
- /* input tallies */
- for ( i = 0; i < r; ++i )
- for ( j = 0; j < c; ++j )
- (void)scanf(" %ld", &x(i,j));
-
- /* compute statistics and print results */
- Calc("chisq", ChiSqTbl);
- Calc("2info", InfoTbl);
- }
- return EXIT_SUCCESS;
- }\fP
- .bp
- .SH FILES
- .ta 3i
- chisq.h header file
- .br
- chisq.o, info.o object modules
- .SH REFERENCE
- Solomon Kullback, \fIInformation Theory and Statistics\fP (Dover, 1968).
- .SH "SEE ALSO"
- gamma(3V).
- .SH DIAGNOSTICS
- Both these functions a return negative value for the statistic
- when it cannot be meaningfully computed, as follows:
- .RS
- .PD 0
- .TP
- \-1.0
- too many 0 entries in the table
- (for \f\*(cWChiSqTbl\fP, this means insufficient degrees of freedom;
- for \f\*(cWInfoTbl\fP, this means \fIevery\fP entry in the table was 0)
- .TP
- \-2.0
- some frequency was specified as less than 0
- .TP
- \-3.0
- specified number of rows or columns was less than 2
- .TP
- \-4.0
- unable to dynamically allocate enough working storage
- .PD
- .RE
- .SH AUTHOR
- Douglas A.\& Gwyn, BRL/VLD-VMB
- END OF chisq.3
- echo 'chisq.c' 1>&2
- cat >'chisq.c' <<'END OF chisq.c'
- /*
- ChiSqTbl -- Pearson's chi-square test for a 2-way contingency table
-
- last edit: 88/09/19 D A Gwyn
-
- SCCS ID: @(#)chisq.c 1.1 (edited for publication)
-
- Special return values:
- -1.0 insufficient degrees of freedom (too many 0 entries)
- -2.0 invalid table entry (frequency less than 0)
- -3.0 invalid table dimensions (r or c less than 2)
- -4.0 unable to allocate enough working storage
- */
-
- #ifdef __STDC__
- #include <stdlib.h> /* malloc, free */
-
- #include "std.h"
- #else
- #include "std.h"
-
- extern pointer malloc();
- extern void free();
- #endif
-
- double
- ChiSqTbl( r, c, f, pdf )
- int r; /* # rows in table */
- int c; /* # columns in table */
- const long *f; /* -> r*c frequency tallies */
- int *pdf; /* -> return # degrees of freedom */
- {
- #define x(i,j) f[(i)*c+(j)] /* convenient way to access freqs */
- register int i; /* row index */
- register int j; /* column index */
- double N; /* (double)n */
- double chisq; /* accumulates chi-square */
- double *xi; /* row sums */
- double *xj; /* col sums */
- int rdf = r - 1; /* row degrees of freedom */
- int cdf = c - 1; /* column degrees of freedom */
-
- if ( rdf <= 0 || cdf <= 0 )
- {
- chisq = -3.0;
- goto ret3;
- }
-
- if ( (xi = (double *)malloc( r * sizeof(double) )) == NULL )
- {
- chisq = -4.0;
- goto ret3;
- }
-
- if ( (xj = (double *)malloc( c * sizeof(double) )) == NULL )
- {
- chisq = -4.0;
- goto ret2;
- }
-
- /* compute row sums and total */
-
- N = 0.0;
-
- for ( i = 0; i < r; ++i )
- {
- double sum = 0.0; /* accumulator */
-
- for ( j = 0; j < c; ++j )
- {
- register long k = x(i,j);
-
- if ( k < 0L )
- {
- chisq = -2.0;
- goto ret1;
- }
-
- sum += (double)k;
- }
-
- if ( (xi[i] = sum) <= 0.0 )
- --rdf;
- else
- N += sum;
- }
-
- /* compute column sums */
-
- for ( j = 0; j < c; ++j )
- {
- double sum = 0.0; /* accumulator */
-
- for ( i = 0; i < r; ++i )
- sum += (double)x(i,j);
-
- if ( (xj[j] = sum) <= 0.0 )
- --cdf;
- }
-
- if ( rdf <= 0 || cdf <= 0 )
- {
- chisq = -1.0;
- goto ret1;
- }
-
- *pdf = rdf * cdf; /* total degrees of freedom */
-
- /* compute chi-square */
-
- chisq = 0.0;
-
- for ( i = 0; i < r; ++i )
- {
- double pi = xi[i]; /* row sum */
-
- if ( pi > 0.0 )
- for ( j = 0; j < c; ++j )
- {
- double pj = xj[j]; /* column sum */
-
- if ( pj > 0.0 )
- {
- double expected = pi * pj / N;
- double delta =
- (double)x(i,j) - expected;
-
- chisq += delta * delta / expected;
- }
- }
- }
-
- ret1:
- free( (pointer)xj );
- ret2:
- free( (pointer)xi );
- ret3:
- return chisq;
- }
- END OF chisq.c
- echo 'chisq.h' 1>&2
- cat >'chisq.h' <<'END OF chisq.h'
- /*
- chisq.h -- definitions for contingency-table routines
-
- last edit: 88/09/19 D A Gwyn
-
- SCCS ID: @(#)chisq.h 1.1 (edited for publication)
- */
-
- /* library routines: */
-
- #ifdef __STDC__
- extern double ChiSqTbl( int r, int c, const long *f, int *pdf );
- extern double InfoTbl( int r, int c, const long *f, int *pdf );
- #else
- extern double ChiSqTbl();
- extern double InfoTbl();
- #endif
- END OF chisq.h
- echo 'g_test.c' 1>&2
- cat >'g_test.c' <<'END OF g_test.c'
- /*
- g_test -- tests for gamma and related functions
-
- last edit: 88/09/09 D A Gwyn
- SCCS ID: @(#)g_test.c 1.1 (edited for publication)
- */
-
- #include <stdio.h>
- #ifdef __STDC__
- #include <stdlib.h> /* for NULL etc. */
- #endif
-
- #include "std.h"
-
- #include "gamma.h"
-
- #define Printf (void)printf
-
- #define TOL 1.0e-5 /* tolerance for checks */
-
- static int errs = 0; /* tally errors */
-
- static double
- RelDif( a, b ) /* returns relative difference: */
- double a, b; /* 0.0 if exactly the same,
- otherwise ratio of difference
- to the larger of the two */
- {
- double c = Abs( a );
- double d = Abs( b );
-
- d = Max( c, d );
-
- return d == 0.0 ? 0.0 : Abs( a - b ) / d;
- }
-
- static void
- RCheck( d, r ) /* check real number */
- double d; /* real to be checked */
- double r; /* expected value */
- {
- if ( RelDif( d, r ) > TOL )
- {
- errs = 1;
- Printf( "value s.b. %g, was %g\n", r, d );
- }
- }
-
- void
- GTest()
- {
- static struct
- {
- double in; /* input values */
- double exp; /* expected output values */
- } tbl[] = /* table of test values */
- {
- { 1.0, 1.0 },
- { 2.0, 1.0 },
- { 3.0, 2.0 },
- { 4.0, 6.0 },
- { 5.0, 24.0 },
- { 6.0, 120.0 },
- { 0.5, 1.7724538509 },
- { 1.5, 0.8862269255 },
- { 0.25, 3.6256099082 },
- { 0.333333333333, 2.6789385347 },
- { 0.666666666667, 1.3541179394 },
- { 0.75, 1.2254167024 },
- { 10.0, 362880.0 },
- { 20.0, 1.2164510041e+17 },
- };
- register int i; /* indexes tbl[] test values */
-
- for ( i = 0; i < sizeof tbl / sizeof tbl[0]; ++i )
- RCheck( Gamma( tbl[i].in ), tbl[i].exp );
- }
-
- void
- FTest()
- {
- static struct
- {
- int in; /* input values */
- double exp; /* expected output values */
- } tbl[] = /* table of test values */
- {
- { 0, 1.0 },
- { 1, 1.0 },
- { 2, 2.0 },
- { 3, 6.0 },
- { 4, 24.0 },
- { 5, 120.0 },
- { 6, 720.0 },
- { 10, 3628800.0 },
- { 20, 2.4329020082e+18 },
- };
- register int i; /* indexes tbl[] test values */
-
- for ( i = 0; i < sizeof tbl / sizeof tbl[0]; ++i )
- RCheck( Factorial( tbl[i].in ), tbl[i].exp );
- }
-
- void
- BCTest()
- {
- static struct
- {
- int n; /* top parts of inputs */
- int k; /* bottom parts of inputs */
- double exp; /* expected output values */
- } tbl[] = /* table of test values */
- {
- { 1, 0, 1.0 },
- { 1, 1, 1.0 },
- { 2, 0, 1.0 },
- { 2, 1, 2.0 },
- { 2, 2, 1.0 },
- { 3, 0, 1.0 },
- { 3, 1, 3.0 },
- { 3, 2, 3.0 },
- { 5, 3, 10.0 },
- { 10, 4, 210.0 },
- { 10, 5, 252.0 },
- { 40, 6, 3838380.0 },
- { 50, 20, 47129212243960.0},
- };
- register int i; /* indexes tbl[] test values */
-
- for ( i = 0; i < sizeof tbl / sizeof tbl[0]; ++i )
- RCheck( BCoeff( tbl[i].n, tbl[i].k ), tbl[i].exp );
- }
-
- void
- ETest()
- {
- static struct
- {
- double in; /* input values */
- double exp; /* expected output values */
- } tbl[] = /* table of test values */
- {
- { 0.0, 0.0 },
- { 0.1, 0.1124629160 },
- { 0.2, 0.2227025892 },
- { 0.5, 0.5204998778 },
- { 0.8, 0.7421009647 },
- { 1.0, 0.8427007929 },
- { 1.5, 0.9661051465 },
- { 2.0, 0.9953222650 },
- };
- register int i; /* indexes tbl[] test values */
-
- for ( i = 0; i < sizeof tbl / sizeof tbl[0]; ++i )
- RCheck( Erf( tbl[i].in ), tbl[i].exp );
- }
-
- void
- QCTest()
- {
- static struct
- {
- double chisq; /* chi-square limit */
- int df; /* degrees of freedom */
- double exp; /* expected output values */
- } tbl[] = /* table of test values */
- {
- { 0.001, 1, 0.97477 },
- { 0.01, 1, 0.92034 },
- { 0.01, 2, 0.99501 },
- { 0.05, 1, 0.82306 },
- { 0.1, 1, 0.75183 },
- { 0.1, 2, 0.95123 },
- { 1.0, 1, 0.31731 },
- { 1.0, 2, 0.60653 },
- { 1.0, 3, 0.80125 },
- { 1.0, 4, 0.90980 },
- { 1.0, 5, 0.96257 },
- { 1.5, 2, 0.47237 },
- { 2.0, 1, 0.15730 },
- { 2.0, 3, 0.57241 },
- { 2.0, 5, 0.84915 },
- { 4.0, 6, 0.67668 },
- { 5.0, 5, 0.41588 },
- { 10.0, 7, 0.188573 },
- { 10.0, 10, 0.44049 },
- { 10.0, 20, 0.96817 },
- { 20.0, 30, 0.91654 },
- { 40.0, 30, 0.104864 },
- { 8.26040, 20, 0.99 },
- { 10.8508, 20, 0.95 },
- { 12.4426, 20, 0.90 },
- { 15.4518, 20, 0.75 },
- { 19.3374, 20, 0.50 },
- { 23.8277, 20, 0.25 },
- { 28.4120, 20, 0.10 },
- { 31.4104, 20, 0.05 },
- { 37.5662, 20, 0.01 },
- };
- register int i; /* indexes tbl[] test values */
-
- for ( i = 0; i < sizeof tbl / sizeof tbl[0]; ++i )
- RCheck( QChiSq( tbl[i].chisq, tbl[i].df ), tbl[i].exp );
- }
-
- /*ARGSUSED*/
- main( argc, argv )
- int argc;
- char *argv[];
- {
- GTest();
- FTest();
- BCTest();
- ETest();
- QCTest();
-
- return errs;
- }
- END OF g_test.c
- echo 'gamma.3' 1>&2
- cat >'gamma.3' <<'END OF gamma.3'
- '\" e
- .TH GAMMA 3V VMB
- '\" last edit: 88/09/09 D A Gwyn
- '\" SCCS ID: @(#)gamma.3 1.2 (edited for publication)
- '\"
- '\" This source must be typeset via "eqn | troff -man"
- '\"
- .EQ
- delim @@
- .EN
- .SH NAME
- gamma \- gamma and related functions
- .SH SYNOPSIS
- .ds cW (CW\" change to B (without the paren) if you don't have a CW font
- \f\*(cW
- #include "gamma.h"
- .br
- /* \fPAll the following functions are declared in this header file.\f\*(cW */
- .sp
- double Gamma(double z);
- .br
- double LGamma(double z);
- .br
- double Factorial(int n);
- .br
- double LFactorial(int n);
- .br
- double BCoeff(int n, int k);
- .br
- double Beta(double z, double w);
- .br
- double PGamma(double a, double x);
- .br
- double QGamma(double a, double x);
- .br
- double Erf(double x);
- .br
- double Erfc(double x);
- .br
- double CPoisson(double x, int k);
- .br
- double PChiSq(double chisq, int nu);
- .br
- double QChiSq(double chisq, int nu);
- .ft
- .SH DESCRIPTION
- \f\*(cWGamma\fP
- returns the real gamma function value
- @GAMMA (z)~==~ int "" from 0 to inf t sup z-1 e sup -t dt@
- for @z>0@.
- .P
- \f\*(cWLGamma\fP
- returns the value
- @ln ~ GAMMA (z)@
- for @z>0@.
- .P
- \f\*(cWFactorial\fP
- returns the factorial function value
- @n! ~=~ GAMMA (n+1)@
- for @n>=0@.
- .P
- \f\*(cWLFactorial\fP
- returns the value
- @ln ~n!@\&
- for @n>=0@.
- .P
- \f\*(cWBCoeff\fP
- returns the binomial coefficient value
- @size +8 ( pile { n above k } size +8 ) ~==~n! over k!(n-k)!@\&
- for @0<=k<=n@.
- .P
- \f\*(cWBeta\fP
- returns the real beta function value
- @B(z,w)~==~ int "" from 0 to 1 t sup z-1 (1-t) sup w-1 dt@
- for @z>0@ and @w>0@.
- .P
- \f\*(cWPGamma\fP
- returns the incomplete gamma function value
- @P(a,x)~==~ 1 over { GAMMA (a) } int "" from 0 to x e sup -t t sup a-1 dt@
- for @a>0@ and @0<=x<= inf@.
- .P
- \f\*(cWQGamma\fP
- returns the value
- @1-P(a,x)@.
- .P
- \f\*(cWErf\fP
- returns the error function value
- @roman erf (x)~==~2 over sqrt pi int "" from 0 to x e sup {-t sup 2 } dt@.
- .P
- \f\*(cWErfc\fP
- returns the complementary error function value
- @1- roman erf (x)@.
- .P
- \f\*(cWCPoisson\fP
- returns the cumulative Poisson probability value
- @P sub x (<k)@,
- which is the probability that the number of Poisson random events occurring
- will be between 0 and @k-1@ inclusive,
- where the expected mean is @x@.
- .P
- \f\*(cWPChiSq\fP
- returns the value
- @P( chi sup 2 | nu )@,
- which is the probability that the observed chi-square for a correct model
- will be less than @chi sup 2@,
- where @nu@ is the number of degrees of freedom.
- .P
- \f\*(cWQChiSq\fP
- returns the value
- @1-P( chi sup 2 | nu )@,
- which is the probability that the observed chi-square
- will exceed @chi sup 2@ by chance even for a correct model,
- where @nu@ is the number of degrees of freedom.
- .P
- All these functions return
- .I approximations
- to the exact results,
- generally accurate to about seven significant digits.
- .SH CAVEATS
- Overflow can occur.
- .P
- Invalid parameters cause a diagnostic and termination of the process.
- .SH FILES
- .ta 3i
- gamma.h header file
- .br
- gamma.o object module
- .SH AUTHORS
- William H.\& Press,
- Brian P.\& Flannery,
- Saul A.\& Teukolsky,
- and
- William T.\& Vetterling
- (\fINumerical Recipes in C\fP, Chapter 6)
- .br
- Douglas A.\& Gwyn, BRL/VLD-VMB
- END OF gamma.3
- echo 'gamma.c' 1>&2
- cat >'gamma.c' <<'END OF gamma.c'
- /*
- Gamma -- gamma and related functions
-
- last edit: 88/09/09 D A Gwyn
-
- SCCS ID: @(#)gamma.c 1.1 (edited for publication)
-
- Acknowledgement:
- Code based on that found in "Numerical Methods in C".
- */
-
- #include <assert.h>
- #include <math.h>
-
- #include "std.h"
-
- double
- LGamma( x )
- double x;
- {
- static const double cof[6] =
- {
- 76.18009173, -86.50532033, 24.01409822,
- -1.231739516, 0.120858003e-2, -0.536382e-5
- };
- double tmp, ser;
- register int j;
-
- assert(x > 0.0);
-
- if ( --x < 0.0 ) /* use reflection formula for accuracy */
- {
- double pix = PI * x;
-
- return log( pix / sin( pix ) ) - LGamma( 1.0 - x );
- }
-
- tmp = x + 5.5;
- tmp -= (x + 0.5) * log( tmp );
-
- ser = 1.0;
-
- for ( j = 0; j < 6; ++j )
- ser += cof[j] / ++x;
-
- return -tmp + log( 2.50662827465 * ser );
- }
-
- double
- Gamma( x )
- double x;
- {
- return exp( LGamma( x ) );
- }
-
- double
- Factorial( n )
- register int n;
- {
- static double a[33] =
- {
- 1.0, 1.0, 2.0, 6.0, 24.0
- };
- static int ntop = 4;
-
- assert(n >= 0);
-
- if ( n > 32 )
- return Gamma( (double)n + 1.0 );
-
- while ( ntop < n )
- {
- register int j = ntop++;
-
- a[ntop] = a[j] * (double)ntop;
- }
-
- return a[n];
- }
-
- double
- LFactorial( n )
- register int n;
- {
- static double a[101]; /* init 0.0 */
-
- assert(n >= 0);
-
- if ( n <= 1 )
- return 0.0;
-
- if ( n <= 100 )
- if ( a[n] > 0.0 )
- return a[n]; /* table value already set up */
- else
- return a[n] = LGamma( (double)n + 1.0 );
- else
- return LGamma( (double)n + 1.0 ); /* beyond table */
- }
-
- double
- BCoeff( n, k )
- register int n, k;
- {
- assert(k >= 0);
- assert(n >= k);
-
- return Round( exp( LFactorial( n )
- - (LFactorial( k ) + LFactorial( n - k ))
- )
- );
- }
-
- double
- Beta( z, w )
- double z, w;
- {
- return exp( LGamma( z ) + LGamma( w ) - LGamma( z + w ) );
- }
-
- #define ITMAX 100
- #define EPS 3.0e-7
-
- static double
- gser( a, x )
- double a, x;
- {
- double ap, del, sum;
- register int n;
-
- assert(x >= 0.0);
-
- if ( x <= 0.0 )
- return 0.0;
-
- del = sum = 1.0 / (ap = a);
-
- for ( n = 1; n <= ITMAX; ++n )
- {
- sum += del *= x / ++ap;
-
- if ( Abs( del ) < Abs( sum ) * EPS )
- return sum * exp( -x + a * log( x ) - LGamma( a ) );
- }
-
- assert(n <= ITMAX);
- /*NOTREACHED*/
- }
-
- static double
- gcf( a, x )
- double a, x;
- {
- register int n;
- double gold = 0.0, fac = 1.0, b1 = 1.0,
- b0 = 0.0, a0 = 1.0, a1 = x;
-
- for ( n = 1; n <= ITMAX; ++n )
- {
- double anf;
- double an = (double)n;
- double ana = an - a;
-
- a0 = (a1 + a0 * ana) * fac;
- b0 = (b1 + b0 * ana) * fac;
- anf = an * fac;
- b1 = x * b0 + anf * b1;
- a1 = x * a0 + anf * a1;
-
- if ( a1 != 0.0 )
- { /* renormalize */
- double g = b1 * (fac = 1.0 / a1);
-
- gold = g - gold;
-
- if ( Abs( gold ) < EPS * Abs( g ) )
- return exp( -x + a * log( x ) - LGamma( a ) )
- * g;
-
- gold = g;
- }
- }
-
- assert(n <= ITMAX);
- /*NOTREACHED*/
- }
-
- double
- PGamma( a, x )
- double a, x;
- {
- assert(x >= 0.0);
- assert(a > 0.0);
-
- return x < a + 1.0 ? gser( a, x ) : 1.0 - gcf( a, x );
- }
-
- double
- QGamma( a, x )
- double a, x;
- {
- assert(x >= 0.0);
- assert(a > 0.0);
-
- return x < a + 1.0 ? 1.0 - gser( a, x ) : gcf( a, x );
- }
-
- double
- Erf( x )
- double x;
- {
- return x < 0.0 ? -PGamma( 0.5, x * x ) : PGamma( 0.5, x * x );
- }
-
- double
- Erfc( x )
- double x;
- {
- return x < 0.0 ? 1.0 + PGamma( 0.5, x * x ) : QGamma( 0.5, x * x );
- }
-
- double
- CPoisson( x, k )
- double x;
- int k;
- {
- return QGamma( (double)k, x );
- }
-
- double
- PChiSq( chisq, df )
- double chisq;
- int df;
- {
- return PGamma( (double)df / 2.0, chisq / 2.0 );
- }
-
- double
- QChiSq( chisq, df )
- double chisq;
- int df;
- {
- return QGamma( (double)df / 2.0, chisq / 2.0 );
- }
- END OF gamma.c
- echo 'gamma.h' 1>&2
- cat >'gamma.h' <<'END OF gamma.h'
- /*
- <gamma.h> -- definitions for gamma-function routines
-
- last edit: 88/09/09 D A Gwyn
-
- SCCS ID: @(#)gamma.h 1.1 (edited for publication)
- */
-
- /* library routines: */
-
- #ifdef __STDC__
- extern double Gamma( double z ), LGamma( double z ),
- Factorial( int n ), LFactorial( int n ),
- BCoeff( int n, int k ),
- Beta( double z, double w ),
- PGamma( double a, double x ), QGamma( double a, double x ),
- Erf( double x ), Erfc( double x ),
- CPoisson( double x, int k ),
- PChiSq( double chisq, int nu ), QChiSq( double chisq, int nu );
- #else
- extern double Gamma(), LGamma(), Factorial(), LFactorial(),
- BCoeff(), Beta(), PGamma(), QGamma(), Erf(), Erfc(),
- CPoisson(), PChiSq(), QChiSq();
- #endif
- END OF gamma.h
- echo 'info.c' 1>&2
- cat >'info.c' <<'END OF info.c'
- /*
- InfoTbl -- Kullback's information measure for a 2-way contingency table
-
- last edit: 88/09/19 D A Gwyn
-
- SCCS ID: @(#)info.c 1.1 (edited for publication)
-
- Special return values:
- -1.0 entire table consisted of 0 entries
- -2.0 invalid table entry (frequency less than 0)
- -3.0 invalid table dimensions (r or c less than 2)
- -4.0 unable to allocate enough working storage
- */
-
- #include <math.h> /* for log() */
- #if __STDC__
- #include <stdlib.h> /* malloc, free */
-
- #include "std.h"
- #else
- #include "std.h"
-
- extern pointer malloc();
- extern void free();
- #endif
-
- double
- InfoTbl( r, c, f, pdf )
- int r; /* # rows in table */
- int c; /* # columns in table */
- const long *f; /* -> r*c frequency tallies */
- int *pdf; /* -> return # d.f. for chi-square */
- {
- #define x(i,j) f[(i)*c+(j)] /* convenient way to access freqs */
- register int i; /* row index */
- register int j; /* column index */
- double N; /* (double)n */
- double info; /* accumulates information measure */
- double *xi; /* row sums */
- double *xj; /* col sums */
- int rdf = r - 1; /* row degrees of freedom */
- int cdf = c - 1; /* column degrees of freedom */
-
- if ( rdf <= 0 || cdf <= 0 )
- {
- info = -3.0;
- goto ret3;
- }
-
- *pdf = rdf * cdf; /* total degrees of freedom */
-
- if ( (xi = (double *)malloc( r * sizeof(double) )) == NULL )
- {
- info = -4.0;
- goto ret3;
- }
-
- if ( (xj = (double *)malloc( c * sizeof(double) )) == NULL )
- {
- info = -4.0;
- goto ret2;
- }
-
- /* compute row sums and total */
-
- N = 0.0;
-
- for ( i = 0; i < r; ++i )
- {
- double sum = 0.0; /* accumulator */
-
- for ( j = 0; j < c; ++j )
- {
- register long k = x(i,j);
-
- if ( k < 0L )
- {
- info = -2.0;
- goto ret1;
- }
-
- sum += (double)k;
- }
-
- N += xi[i] = sum;
- }
-
- if ( N <= 0.0 )
- {
- info = -1.0;
- goto ret1;
- }
-
- /* compute column sums */
-
- for ( j = 0; j < c; ++j )
- {
- double sum = 0.0; /* accumulator */
-
- for ( i = 0; i < r; ++i )
- sum += (double)x(i,j);
-
- xj[j] = sum;
- }
-
- /* compute information measure (four parts) */
-
- info = N * log( N ); /* part 1 */
-
- for ( i = 0; i < r; ++i )
- {
- double pi = xi[i]; /* row sum */
-
- if ( pi > 0.0 )
- info -= pi * log( pi ); /* part 2 */
-
- for ( j = 0; j < c; ++j )
- {
- double pij = (double)x(i,j);
-
- if ( pij > 0.0 )
- info += pij * log( pij ); /* part 3 */
- }
- }
-
- for ( j = 0; j < c; ++j )
- {
- double pj = xj[j]; /* column sum */
-
- if ( pj > 0.0 )
- info -= pj * log( pj ); /* part 4 */
- }
-
- info *= 2.0; /* for comparability with chi-square */
-
- ret1:
- free( (pointer)xj );
- ret2:
- free( (pointer)xi );
- ret3:
- return info;
- }
- END OF info.c
- echo 'std.h' 1>&2
- cat >'std.h' <<'END OF std.h'
- /*
- std.h -- Douglas A. Gwyn's standard C programming definitions
-
- Prerequisite: <math.h> (if you invoke Round())
-
- last edit: 89/06/18 D A Gwyn
-
- SCCS ID: @(#)std.h 1.30
-
- The master source file is to be modified only by Douglas A. Gwyn
- <Gwyn@BRL.MIL>. When installing a VLD/VMB software distribution,
- this file may need to be tailored slightly to fit the target system.
- Usually this just involves enabling some of the "kludges for deficient
- C implementations" at the end of this file.
- */
-
- #ifndef VLD_STD_H_
- #define VLD_STD_H_ /* once-only latch */
-
- /* Extended data types */
-
- typedef int bool; /* Boolean data */
- #define false 0
- #define true 1
-
- /* ANSI C definitions */
-
- /* Defense against some silly systems defining __STDC__ to random things. */
- #ifdef STD_C
- #undef STD_C
- #endif
- #ifdef __STDC__
- #if __STDC__ > 0
- #define STD_C __STDC__ /* use this instead of __STDC__ */
- #endif
- #endif
-
- #ifdef STD_C
- typedef void *pointer; /* generic pointer */
- #else
- typedef char *pointer; /* generic pointer */
- #define const /* nothing */ /* ANSI C type qualifier */
- /* There really is no substitute for the following, but these might work: */
- #define signed /* nothing */ /* ANSI C type specifier */
- #define volatile /* nothing */ /* ANSI C type qualifier */
- #endif
-
- #ifndef EXIT_SUCCESS
- #define EXIT_SUCCESS 0
- #endif
-
- #ifndef EXIT_FAILURE
- #define EXIT_FAILURE 1
- #endif
-
- #ifndef NULL
- #define NULL 0 /* null pointer constant, all types */
- #endif
-
- /* Universal constants */
-
- #define DEGRAD 57.2957795130823208767981548141051703324054724665642
- /* degrees per radian */
- #define E 2.71828182845904523536028747135266249775724709369996
- /* base of natural logs */
- #define GAMMA 0.57721566490153286061
- /* Euler's constant */
- #define LOG10E 0.43429448190325182765112891891660508229439700580367
- /* log of e to the base 10 */
- #define PHI 1.618033988749894848204586834365638117720309180
- /* golden ratio */
- #define PI 3.14159265358979323846264338327950288419716939937511
- /* ratio of circumf. to diam. */
-
- /* Useful macros */
-
- /*
- The comment "UNSAFE" means that the macro argument(s) may be evaluated
- more than once, so the programmer must realize that the macro doesn't
- quite act like a genuine function. This matters only when evaluating
- an argument produces "side effects".
- */
-
- /* arbitrary numerical arguments and value: */
- #define Abs( x ) ((x) < 0 ? -(x) : (x)) /* UNSAFE */
- #define Max( a, b ) ((a) > (b) ? (a) : (b)) /* UNSAFE */
- #define Min( a, b ) ((a) < (b) ? (a) : (b)) /* UNSAFE */
-
- /* floating-point arguments and value: */
- #define Round( d ) floor( (d) + 0.5 ) /* requires <math.h> */
-
- /* arbitrary numerical arguments, integer value: */
- #define Sgn( x ) ((x) == 0 ? 0 : (x) > 0 ? 1 : -1) /* UNSAFE */
-
- /* string arguments, boolean value: */
- #ifdef gould /* UTX-32 2.0 compiler has problems with "..."[] */
- #define StrEq( a, b ) (strcmp( a, b ) == 0) /* UNSAFE */
- #else
- #define StrEq( a, b ) (*(a) == *(b) && strcmp( a, b ) == 0) /* UNSAFE */
- #endif
-
- /* array argument, integer value: */
- #define Elements( a ) (sizeof a / sizeof a[0])
-
- /* integer (or character) arguments and value: */
- #define fromhostc( c ) (c) /* map host char set to ASCII */
- #define tohostc( c ) (c) /* map ASCII to host char set */
- #define tonumber( c ) ((c) - '0') /* convt digit char to number */
- #define todigit( n ) ((n) + '0') /* convt digit number to char */
-
- /* Kludges for deficient C implementations */
-
- /*#define strchr index /* 7th Edition UNIX, 4.2BSD */
- /*#define strrchr rindex /* 7th Edition UNIX, 4.2BSD */
- /*#define void int /* K&R Appendix A followers */
-
- #endif /* VLD_STD_H_ */
- END OF std.h
- echo 'tot_info.1' 1>&2
- cat >'tot_info.1' <<'END OF tot_info.1'
- '\" e
- .TH TOT_INFO 1V VMB
- '\" last edit: 89/02/07 D A Gwyn
- '\" SCCS ID: @(#)tot_info.1 1.2 (edited for publication)
- '\"
- '\" This source must be typeset via "eqn | troff -man"
- '\"
- .EQ
- delim @@
- .EN
- .SH NAME
- tot_info \- total information for multiple 2-way contingency tables
- .SH SYNOPSIS
- .ds cW (CW\" change to I (without the paren) if you don't have a CW font
- .ds cB (CB\" change to B (without the paren) if you don't have a CB font
- \f\*(cBtot_info\fP
- .SH DESCRIPTION
- \f\*(cBtot_info\fP
- reads multiple contingency-table data sets from the standard input
- and prints Kullback's information statistic for each set,
- as well as for the aggregate over all sets,
- on the standard output.
- (See \fIchisq\^\fP(3V) for a discussion of this statistic.)
- .SH "INPUT FORMAT"
- Input consists of one or more data sets,
- each constituting a 2-way contingency table
- (not necessarily all of the same size).
- A data set may be preceded by any number of blank or comment lines;
- a comment line has a
- \f\*(cB#\fP
- character as the first non-whitespace character on the line.
- Following the optional comment lines is a header line
- containing the row and column dimensions of the contingency table
- (in that order),
- separated by white space.
- Finally, the contents of the contingency table (frequency counts)
- must be given in ``row major'' order.
- The table may be freely formatted with white-space separators;
- a row need not be on a single line.
- .SH "OUTPUT FORMAT"
- Input comments are copied to the output as they are encountered.
- Otherwise the output consists solely of an information line for
- each data set (or a diagnostic if the data is invalid) and a final
- information line for the aggregate over all data sets (preceded by
- a blank line).
- An information line exhibits twice the value of Kullback's
- @"\v'-0.2'^\v'0.2'\h'-\w;^;u'I\^" ( H sub 1 : H sub 2 )@ statistic,
- the corresponding number of degrees of freedom,
- and the probability that the statistic
- would be as large as was actually observed
- if the row and column categorizations really were independent.
- .P
- The aggregate statistic is valid if the data sets
- represent independent tests of the same hypothesis.
- .SH DIAGNOSTICS
- The diagnostic messages are intended to be self-explanatory.
- .SH "EXIT STATUS"
- \f\*(cBtot_info\fP
- returns a zero exit status if and only if no problems were encountered.
- .SH EXAMPLE
- .RS
- \f\*(cW$ \fP\f\*(cBtot_info
- .br
- # MilCrypI.60 biliteral cryptogram (trial pairings against G)
- .br
- # G vs B
- .br
- 2 10
- .br
- 2 2 2 0 3 0 0 1 0 1
- .br
- 3 1 1 1 1 2 2 1 2 1
- .br
- # G vs D
- .br
- 2 10
- .br
- 2 2 2 0 3 0 0 1 0 1
- .br
- 4 1 4 4 1 1 1 3 4 2
- .br
- # G vs J
- .br
- 2 10
- .br
- 2 2 2 0 3 0 0 1 0 1
- .br
- 1 1 1 1 1 1 2 1 1 1
- .br
- # G vs L
- .br
- 2 10
- .br
- 2 2 2 0 3 0 0 1 0 1
- .br
- 1 4 0 4 3 4 5 3 3 4
- .br
- # G vs N
- .br
- 2 10
- .br
- 2 2 2 0 3 0 0 1 0 1
- .br
- 4 1 4 3 1 1 1 2 3 3
- .br
- # G vs Q
- .br
- 2 10
- .br
- 2 2 2 0 3 0 0 1 0 1
- .br
- 0 2 0 2 1 1 1 0 1 0
- .br
- # G vs S (correct pairing)
- .br
- 2 10
- .br
- 2 2 2 0 3 0 0 1 0 1
- .br
- 1 2 2 0 2 1 0 0 0 1
- .br
- # G vs V
- .br
- 2 10
- .br
- 2 2 2 0 3 0 0 1 0 1
- .br
- 1 4 1 3 4 4 4 3 4 3
- .br
- # G vs X
- .br
- 2 10
- .br
- 2 2 2 0 3 0 0 1 0 1
- .br
- 0 1 0 1 2 1 1 0 2 0
- .br
- # Since most pairings are incorrect,
- .br
- # the aggregate probability is small.
- .br
- ^D\fP\f\*(cW
- .br
- # MilCrypI.60 biliteral cryptogram (trial pairings against G)
- .br
- # G vs B
- .br
- 2info = 11.01 df = 9 q = 0.2748
- .br
- # G vs D
- .br
- 2info = 12.40 df = 9 q = 0.1915
- .br
- # G vs J
- .br
- 2info = 9.00 df = 9 q = 0.4375
- .br
- # G vs L
- .br
- 2info = 19.03 df = 9 q = 0.0250
- .br
- # G vs N
- .br
- 2info = 10.89 df = 9 q = 0.2830
- .br
- # G vs Q
- .br
- 2info = 15.82 df = 9 q = 0.0707
- .br
- # G vs S (correct pairing)
- .br
- 2info = 3.11 df = 9 q = 0.9596
- .br
- # G vs V
- .br
- 2info = 14.47 df = 9 q = 0.1066
- .br
- # G vs X
- .br
- 2info = 15.31 df = 9 q = 0.0826
- .br
- # Since most pairings are incorrect,
- .br
- # the aggregate probability is small.
- .br
- .sp
- total 2info = 111.05 df = 81 q = 0.0150
- \fP
- .RE
- .SH REFERENCES
- Solomon Kullback, \fIInformation Theory and Statistics\fP (Dover, 1968).
- .br
- William F.\& Friedman and Lambros D.\& Callimahos,
- \fIMilitary Cryptanalytics, Part I \(em Volume 1\fP
- (reprinted by Aegean Park Press, 1985).
- .SH "SEE ALSO"
- chisq(3V).
- .SH AUTHOR
- Douglas A.\& Gwyn, BRL/VLD-VMB
- END OF tot_info.1
- echo 'tot_info.c' 1>&2
- cat >'tot_info.c' <<'END OF tot_info.c'
- /*
- tot_info -- combine information statistics for multiple tables
-
- last edit: 89/02/06 D A Gwyn
-
- SCCS ID: @(#)tot_info.c 1.1 (edited for publication)
- */
-
- #include <ctype.h>
- #include <stdio.h>
-
- #include "std.h"
-
- #include "chisq.h"
- #include "gamma.h" /* for QChiSq() */
-
- #ifndef MAXLINE
- #define MAXLINE 256
- #endif
-
- #ifndef MAXTBL
- #define MAXTBL 1000
- #endif
-
- static char line[MAXLINE]; /* row/column header input line */
- static long f[MAXTBL]; /* frequency tallies */
- static int r; /* # of rows */
- static int c; /* # of columns */
-
- #define x(i,j) f[(i)*c+(j)] /* convenient way to access freqs */
-
- #define COMMENT '#' /* comment character */
-
- /*ARGSUSED*/
- int
- main( argc, argv )
- int argc;
- char *argv[];
- {
- register char *p; /* input line scan location */
- register int i; /* row index */
- register int j; /* column index */
- double info; /* computed information measure */
- int infodf; /* degrees of freedom for information */
- double totinfo = 0.0; /* accumulated information */
- int totdf = 0; /* accumulated degrees of freedom */
-
- while ( fgets( line, MAXLINE, stdin ) != NULL ) /* start new table */
- {
- for ( p = line; *p != '\0' && isspace( (int)*p ); ++p )
- ;
-
- if ( *p == '\0' )
- continue; /* skip blank line */
-
- if ( *p == COMMENT )
- { /* copy comment through */
- (void)fputs( line, stdout );
- continue;
- }
-
- if ( sscanf( p, "%d %d\n", &r, &c ) != 2 )
- {
- (void)fputs( "* invalid row/column line *\n", stderr );
- return EXIT_FAILURE;
- }
-
- if ( r * c > MAXTBL )
- {
- (void)fputs( "* table too large *\n", stderr );
- return EXIT_FAILURE;
- }
-
- /* input tallies */
-
- for ( i = 0; i < r; ++i )
- for ( j = 0; j < c; ++j )
- if ( scanf( " %ld", &x(i,j) ) != 1 )
- {
- (void)fputs( "* EOF in table *\n",
- stderr
- );
- return EXIT_FAILURE;
- }
-
- /* compute statistic */
-
- info = InfoTbl( r, c, f, &infodf );
-
- /* print results */
-
- if ( info >= 0.0 )
- {
- (void)printf( "2info = %5.2f\tdf = %2d\tq = %7.4f\n",
- info, infodf,
- QChiSq( info, infodf )
- );
- totinfo += info;
- totdf += infodf;
- }
- else
- (void)fputs( info < -3.5 ? "out of memory\n"
- : info < -2.5 ? "table too small\n"
- : info < -1.5 ? "negative freq\n"
- : "table all zeros\n",
- stdout
- );
- }
-
- if ( totdf <= 0 )
- {
- (void)fputs( "\n*** no information accumulated ***\n", stdout );
- return EXIT_FAILURE;
- }
-
- (void)printf( "\ntotal 2info = %5.2f\tdf = %2d\tq = %7.4f\n",
- totinfo, totdf,
- QChiSq( totinfo, totdf )
- );
- return EXIT_SUCCESS;
- }
- END OF tot_info.c
- echo 'tot_info.exp' 1>&2
- cat >'tot_info.exp' <<'END OF tot_info.exp'
- # MilCrypI.60 biliteral cryptogram (trial pairings against G)
- # G vs B
- 2info = 11.01 df = 9 q = 0.2748
- # G vs D
- 2info = 12.40 df = 9 q = 0.1915
- # G vs J
- 2info = 9.00 df = 9 q = 0.4375
- # G vs L
- 2info = 19.03 df = 9 q = 0.0250
- # G vs N
- 2info = 10.89 df = 9 q = 0.2830
- # G vs Q
- 2info = 15.82 df = 9 q = 0.0707
- # G vs S (correct pairing)
- 2info = 3.11 df = 9 q = 0.9596
- # G vs V
- 2info = 14.47 df = 9 q = 0.1066
- # G vs X
- 2info = 15.31 df = 9 q = 0.0826
- # Since most pairings are incorrect,
- # the aggregate probability is small.
-
- total 2info = 111.05 df = 81 q = 0.0150
- END OF tot_info.exp
- echo 'tot_info.in' 1>&2
- cat >'tot_info.in' <<'END OF tot_info.in'
- # MilCrypI.60 biliteral cryptogram (trial pairings against G)
- # G vs B
- 2 10
- 2 2 2 0 3 0 0 1 0 1
- 3 1 1 1 1 2 2 1 2 1
- # G vs D
- 2 10
- 2 2 2 0 3 0 0 1 0 1
- 4 1 4 4 1 1 1 3 4 2
- # G vs J
- 2 10
- 2 2 2 0 3 0 0 1 0 1
- 1 1 1 1 1 1 2 1 1 1
- # G vs L
- 2 10
- 2 2 2 0 3 0 0 1 0 1
- 1 4 0 4 3 4 5 3 3 4
- # G vs N
- 2 10
- 2 2 2 0 3 0 0 1 0 1
- 4 1 4 3 1 1 1 2 3 3
- # G vs Q
- 2 10
- 2 2 2 0 3 0 0 1 0 1
- 0 2 0 2 1 1 1 0 1 0
- # G vs S (correct pairing)
- 2 10
- 2 2 2 0 3 0 0 1 0 1
- 1 2 2 0 2 1 0 0 0 1
- # G vs V
- 2 10
- 2 2 2 0 3 0 0 1 0 1
- 1 4 1 3 4 4 4 3 4 3
- # G vs X
- 2 10
- 2 2 2 0 3 0 0 1 0 1
- 0 1 0 1 2 1 1 0 2 0
- # Since most pairings are incorrect,
- # the aggregate probability is small.
- END OF tot_info.in
-
-