home *** CD-ROM | disk | FTP | other *** search
- #!/usr/local/bin/kermit +
- #
- # s t a t i s t i c s
- #
- # Given a file in which each line contains a pair of numbers, X and Y,
- # computes and prints the max, min, mean, variance, and standard deviation of
- # the X's and Y's, and the correlation coefficient of X and Y. The numbers in
- # file may (but need not) have decimal points and fractional parts.
- #
- # Usage: statistics <filename>
- # Requires: C-Kermit 7.0 or later.
- # Illustrates: Floating-point computation and formatting, file i/o.
- #
- # Note that floating-point arithmetic, unlike integer arithmetic, is done with
- # function calls. The floating point function names all begin with "ffp".
- # Also note the use of \fcontent() in the FOPEN statement; this to allow for
- # DOS filenames with backslash path-separators.
- #
- # Author: F. da Cruz, The Kermit Project, Columbia University, July 1999.
- # Version 2: Adds IF FLOAT checking.
- #
- if < \v(argc) 2 exit 1 Usage: \%0 filename ; A filename is required
-
- ; Output function for X and Y statistics.
- ; Args: Label, X value, Y value.
- ;
- define xxout {
- echo {\frpad(\%1:,16)\flpad(\ffprou(\%2,2),10)\%9\flpad(\ffprou(\%3,2),10)}
- }
-
- fopen /read \%c \fcontents(\%1) ; Open the data file
- if fail exit 1 ; Check
- .\%n = 0 ; Number of X,Y pairs
- .x:sum = 0 ; Initialize sums
- .y:sum = 0 ; (not strictly necessary)
- .x:sum2 = 0
- .y:sum2 = 0
- .xy:sum = 0
-
- while not \f_eof(\%c) { ; Until end of input file...
- fread \%c line ; Read a line (record)
- if fail break ; Check
- if not def line continue ; Ignore empty lines
- .\%x := \fword(\m(line),1,{ }) ; Extract X and Y from the record
- .\%y := \fword(\m(line),2,{ })
- if ( not ( float \%x && float \%y ) ) exit 1 Bad record: "\m(line)"
- .x:sum := \ffpadd(\m(x:sum),\%x) ; Get sums and check validity
- .y:sum := \ffpadd(\m(y:sum),\%y)
- if = \%n 0 { .x:max := \%x, .x:min := \%x, .y:max := \%y, .y:min := \%y }
- incr \%n ; Count this record
- .x:max := \ffpmax(\m(x:max),\%x) ; Accumulate max, min, other stats
- .x:min := \ffpmin(\m(x:min),\%x)
- .y:max := \ffpmax(\m(y:max),\%y)
- .y:min := \ffpmin(\m(y:min),\%y)
- .x:sum2 := \ffpadd(\m(x:sum2),\ffpmult(\%x,\%x)) ; Sum of squares (X)
- .y:sum2 := \ffpadd(\m(y:sum2),\ffpmult(\%y,\%y)) ; Sum of squares (Y)
- .xy:sum := \ffpadd(\m(xy:sum),\ffpmult(\%x,\%y)) ; Sum of products (XY)
- }
- fclose \%c ; Finished reading data - close file
- echo
- echo Records: \%n
- if < \%n 2 exit 1 {Sorry, at least two data pairs are required.}
-
- ; Compute means, intermediate quantities, variances.
- ;
- .x:mean := \ffpdiv(\m(x:sum),\%n)
- .y:mean := \ffpdiv(\m(y:sum),\%n)
- .x:ss := \ffpsub(\m(x:sum2),\ffpdiv(\ffpmult(\m(x:sum),\m(x:sum)),\%n))
- .y:ss := \ffpsub(\m(y:sum2),\ffpdiv(\ffpmult(\m(y:sum),\m(y:sum)),\%n))
- .xy:ss := \ffpsub(\m(xy:sum),\ffpdiv(\ffpmult(\m(x:sum),\m(y:sum)),\%n))
- .x:var := \ffpdiv(\m(x:ss),\%n)
- .y:var := \ffpdiv(\m(y:ss),\%n)
-
- ; Print the results
-
- echo { X Y}
- xxout Sum \m(x:sum) \m(y:sum)
- xxout Minimum \m(x:min) \m(y:min)
- xxout Maximum \m(x:max) \m(y:max)
- xxout Mean \m(x:mean) \m(y:mean)
- xxout Variance \m(x:var) \m(y:var)
- xxout {Std deviation} \ffpsqrt(\m(x:var)) \ffpsqrt(\m(y:var))
- echo
- echo Correlation coefficient: -
- \ffpround(\ffpdiv(\m(xy:ss),\ffpsqrt(\ffpmul(\m(x:ss),\m(y:ss)))),2)
-
- exit 0
-