home *** CD-ROM | disk | FTP | other *** search
- # Utility to find least squares best fit
- # Copyright (C) 1990 Terry D. Boldt, All Rights Reserved
- #
- # Least Square curve fit to best curve. May also specify point through which
- # desired curve must pass.
- # In general linearize and fit: Y = A + BX
- # If forcing through a specified point, Fit: Y = BX
- # Fit the following:
- # Linear:
- # 1) y = a + bx Y = y, A = a, B = b, X = x
- # 2) y = a + (b/x) Y = y, A = a, B = b, X = 1/x x ╪ 0
- # 3) y = 1/(a + bx) Y = 1/y, A = a, B = b, X = x y ╪ 0
- # 4) y = x/(a + bx) Y = 1/y, A = b, B = a, X = 1/x y ╪ 0, x ╪ 0
- # Exponential:
- # 5) y = ae^(bx) Y = ln(y), A = ln(a), B = b, X = x y > 0
- # 6) y = ae^(b/x) Y = ln(y), A = ln(a), B = b, X = 1/x y > 0, x ╪ 0
- # Logarithmic:
- # 7) y = a + b*ln(x) Y = y, A = a, B = b, X = ln(x) x > 0
- # 8) y = 1/(a + b*ln(x)) Y = 1/y, A = a, B = b, X = ln(x) y ╪ 0, x > 0
- # Power:
- # 9) y = ax^b Y = ln(y), A = ln(a), B = b, X = ln(x) y > 0, x > 0
- #
- # Input File Format: (Data and Commands)
- # x_data y_data # comment
- # solve # will find best fit to data input
- # solve n # 1 <= n <= 9, will fit data to curve n
- # new # will re-initialize
- # force # force curve to fit through origin, (0,0)
- # force x y # force curve to fit through point (x,y)
- # pred_x y # predict x given current curve and y
- # pred_y x # predict y given current curve and x
- #
- # Reference on forcing to a specified point:
- # Edward B. Hands, "Forcing Regression Through a Given Point Using Any
- # Familiar Computational Routine", U.S. Army, Corps of Engineers, Coastal
- # Engineering Research Center, Technical Paper No. 83-1, March 1983
- # Several errors in the equations are present and corrected in this
- # implementation. The technique translates the origin of the co-ordinate
- # system to the point to be fit, and fits the equation: y' = b*x'
- # then translates back to the original system, recovering 'a' in the
- # second translation
- #
- BEGIN {
- # command and data regular expressions
- input_data = /({_i}|{_f}|{_r})/;
- data_comment = /({_w}+#.*)?$/;
- data = /{input_data}{_w}+{input_data}{data_comment}/;
- data_line = /^{_w}*{data}/;
- Solve = /^{_w}*[Ss]olve({_w}|$)/;
- New = /^{_w}*[Nn]ew({_w}|$)/;
- Force = /^{_w}*[Ff]orce({_w}+{data})?/;
- Pred_x = /^{_w}*[Pp]red_x{_w}+{input_data}{data_comment}/;
- Pred_y = /^{_w}*[Pp]red_y{_w}+{input_data}{data_comment}/;
-
- # array to hold strings of equations solved, y indepedent variable
- curve_strx[1] = "x = (y - a)/b";
- curve_strx[2] = "x = b/(y - a)";
- curve_strx[3] = "x = ((1/y) - a)/b";
- curve_strx[4] = "x = a/((1/y) - b)";
- curve_strx[5] = "x = ln(y/a)/b";
- curve_strx[6] = "x = b/ln(y/a)";
- curve_strx[7] = "x = e^((y - a)/b)";
- curve_strx[8] = "x = e^(((1/y) - a)/b)";
- curve_strx[9] = "x = e^(ln(y/a)/b)";
-
- # array to hold strings of equations solved, x indepedent variable
- curve_stry[1] = "y = a + bx";
- curve_stry[2] = "y = a + (b/x)";
- curve_stry[3] = "y = 1/(a + bx)";
- curve_stry[4] = "y = x/(a + bx)";
- curve_stry[5] = "y = ae^(bx)";
- curve_stry[6] = "y = ae^(b/x)";
- curve_stry[7] = "y = a + b*ln(x)";
- curve_stry[8] = "y = 1/(a + b*ln(x))";
- curve_stry[9] = "y = ax^b";
-
- # array to hold strings of equations to be solved for x with "execute" function
- curve_x_exe[1] = "(y - ca)/cb;";
- curve_x_exe[2] = "cb / (y - ca);";
- curve_x_exe[3] = "((1/y) - ca)/cb;";
- curve_x_exe[4] = "ca/((1/y) - cb);";
- curve_x_exe[5] = "log(y/ca)/cb;";
- curve_x_exe[6] = "cb/log(y/ca);";
- curve_x_exe[7] = "exp((y - ca)/cb);";
- curve_x_exe[8] = "exp(((1/y) - ca)/cb);";
- curve_x_exe[9] = "exp(log(y/ca)/cb);";
-
- # array to hold strings of equations to be solved for y with "execute" function
- curve_y_exe[1] = "ca + cb * z;";
- curve_y_exe[2] = "ca + (cb / z);";
- curve_y_exe[3] = "1/(ca + cb * z);";
- curve_y_exe[4] = "x/(ca + cb * z);";
- curve_y_exe[5] = "ca * exp(cb * z);";
- curve_y_exe[6] = "ca * exp(cb / z);";
- curve_y_exe[7] = "ca + cb * log(z);";
- curve_y_exe[8] = "1/(ca + cb * log(z));";
- curve_y_exe[9] = "ca * z ^ cb;";
-
- # array to hold functions to be used to solve for 'a' coeffiecent when
- # forcing through a point. use "execute" function to solve
- fcoefa[1] = "force_y - coef_b * force_x;";
- fcoefa[2] = "force_y - coef_b * ifx;";
- fcoefa[3] = "ify - coef_b * force_x;";
- fcoefa[4] = "ify - coef_b * ifx;";
- fcoefa[5] = "force_y / exp(coef_b * force_x);";
- fcoefa[6] = "force_y / exp(coef_b * ifx);";
- fcoefa[7] = "force_y - coef_b * lnfx;";
- fcoefa[8] = "ify - coef_b * lnfx;";
- fcoefa[9] = "force_y / force_x ^ coef_b;";
-
- stderr = "stderr";
- copyright;
- }
-
- INITIAL {
- init_sums;
- }
-
- # solve current data to specified or best fit
- Solve {
- local cftn = TRUE;
-
- if ( NF > 1 && $2 ~~ /{_i}/ ) {
- curve_no = $2;
- cftn = specified_curve(curve_no,TRUE);
- } else curve_no = best_fit;
- if ( cftn ) print_curve(curve_no,a[curve_no],b[curve_no],r[curve_no]);
- next;
- }
-
- # new curve fit problem, re-initialize
- New {
- init_sums;
- next;
- }
-
- # force fit through a given point, must re-initialize
- Force {
- init_sums;
- if ( NF > 1 && $2 ~~ input_data ) force_x = $2 + 0.0;
- if ( NF > 1 && $3 ~~ input_data ) force_y = $3 + 0.0;
- if ( force_x <= 0 ) {
- ifx = lnfx = 0.0;
- if ( force_x == 0 ) crv[2] = crv[4] = crv[6] = FALSE;
- else {
- crv[7] = crv[8] = crv[9] = FALSE;
- ifx = 1/force_x;
- }
- } else {
- lnfx = log(force_x);
- ifx = 1/force_x;
- }
- if ( force_y <= 0 ) {
- ify = lnfy = 0.0;
- if ( force_y == 0 ) crv[3] = crv[4] = crv[8] = FALSE;
- else {
- crv[5] = crv[6] = crv[9] = FALSE;
- ify = 1/force_y;
- }
- } else {
- lnfy = log(force_y);
- ify = 1/force_y;
- }
- force_pt = TRUE;
- next;
- }
-
- # predict x from y
- Pred_x {
- local y = $2 + 0.0;
- local fmts = "y = " ∩ $2 ∩ ", X = %.14g\n";
-
- if ( curve_no ) {
- print "Predict: " ∩ curve_strx[curve_no];
- printf(fmts,pred_x(curve_no,y));
- }
- next;
- }
-
- # predict y from x
- Pred_y {
- local x = $2 + 0.0;
- local fmts = "x = " ∩ $2 ∩ ", Y = %.14g\n";
-
- if ( curve_no ) {
- print "Predict: " ∩ curve_stry[curve_no];
- printf(fmts,pred_y(curve_no,x));
- }
- next;
- }
-
- # input data
- data_line {
- accumulate_point($1 + 0.0,$2 + 0.0);
- # to print input data, un-comment following line
- # print "x = " ∩ $1 ∩ "y = " ∩ $2;
- ocurve = FALSE;
- }
-
- FINAL {
- if ( !curve_no ) curve_no = best_fit;
- if ( !ocurve ) print_curve(curve_no,a[curve_no],b[curve_no],r[curve_no]);
- }
-
- function print_curve(cn,a,b,r2) {
- local sofmt = OFMT;
-
- OFMT = "%.14g";
- print "Fit Curve Number: " ∩ cn;
- print curve_stry[cn];
- print "a = " ∩ a;
- print "b = " ∩ b;
- print "Goodness: " ∩ r2;
- ocurve = TRUE;
- OFMT = sofmt;
- }
-
- function init_sums() {
- sum_x = 0.0;
- sum_y = 0.0;
- sum_x2 = 0.0;
- sum_y2 = 0.0;
- sum_xy = 0.0;
- sum_ix = 0.0;
- sum_iy = 0.0;
- sum_ix2 = 0.0;
- sum_iy2 = 0.0;
- sum_ixy = 0.0;
- sum_lnx = 0.0;
- sum_lny = 0.0;
- sum_lnx2 = 0.0;
- sum_lny2 = 0.0;
- sum_lnxy = 0.0;
- sum_xlny = 0.0;
- sum_ixlny = 0.0;
- sum_ylnx = 0.0;
- sum_iylnx = 0.0;
- sum_yix = 0.0;
- sum_xiy = 0.0;
- num_points = curve_no = 0;
- ocurve = FALSE;
- for ( i = 1 ; i < 10 ; i++ ) crv[i] = TRUE;
-
- force_pt = FALSE;
- force_x = 0.0;
- force_y = 0.0;
- lnfx = 0.0;
- lnfy = 0.0;
- ifx = 0.0;
- ify = 0.0;
- }
-
- function accumulate_point(x,y) {
- local invx, lnx;
- local invy, lny;
-
- if ( x <= 0 ) {
- invx = -ifx;
- lnx = 0.0;
- if ( x == 0 ) crv[2] = crv[4] = crv[6] = FALSE;
- else {
- crv[7] = crv[8] = crv[9] = FALSE;
- invx = (1/x) - ifx;
- }
- } else {
- invx = (1/x) - ifx;
- lnx = log(x) - lnfx;
- }
- if ( y <= 0 ) {
- invy = -ify;
- lny = 0.0;
- if ( y == 0 ) crv[3] = crv[4] = crv[8] = FALSE;
- else {
- crv[5] = crv[6] = crv[9] = FALSE;
- invy = (1/y) - ify;
- }
- } else {
- invy = (1/y) - ify;
- lny = log(y) - lnfy;
- }
- x -= force_x;
- y -= force_y;
- accum_pt(x,invx,lnx,y,invy,lny);
- if ( force_pt ) accum_pt(-x,-invx,-lnx,-y,-invy,-lny);
- }
-
- function accum_pt(x,ix,lnx,y,iy,lny) {
- local x2 = x * x, y2 = y * y;
- local ix2 = ix * ix, iy2 = iy * iy;
-
- sum_x += x;
- sum_y += y;
- sum_x2 += x2;
- sum_y2 += y2;
- sum_xy += x * y;
- sum_ix += ix;
- sum_ix2 += ix2;
- sum_ixlny += lny * ix;
- sum_yix += y * ix;
- sum_iy += iy;
- sum_iy2 += iy2;
- sum_iylnx += lnx * iy;
- sum_xiy += x * iy;
- sum_ixy += ix * iy;
- sum_lnx += lnx;
- sum_lny += lny;
- sum_lnx2 += lnx * lnx;
- sum_lny2 += lny * lny;
- sum_lnxy += lnx * lny;
- sum_xlny += x * lny;
- sum_ylnx += y * lnx;
- num_points++;
- }
-
- function best_fit() {
- local curve_no = FALSE, i, lr = 0;
-
- for ( i = 1 ; i < 10 ; i++ ) {
- if ( specified_curve(i,FALSE) ) {
- if ( r[i] > lr ) {
- curve_no = i;
- lr = r[i];
- }
- }
- }
- return curve_no;
- }
-
- function specified_curve(curve_no,msg) {
- local fit = FALSE, swap_ab;
-
- if ( crv[curve_no] ) {
- switch ( curve_no ) {
- case 1:
- calc_a_b(sum_x,sum_y,sum_xy,sum_x2,sum_y2,num_points,1);
- break;
- case 2:
- calc_a_b(sum_ix,sum_y,sum_yix,sum_ix2,sum_y2,num_points,2);
- break;
- case 3:
- calc_a_b(sum_x,sum_iy,sum_xiy,sum_x2,sum_iy2,num_points,3);
- break;
- case 4:
- calc_a_b(sum_ix,sum_iy,sum_ixy,sum_ix2,sum_iy2,num_points,4);
- swap_ab = coef_a;
- coef_a = coef_b;
- coef_b = swap_ab;
- break;
- case 5:
- calc_a_b(sum_x,sum_lny,sum_xlny,sum_x2,sum_lny2,num_points,5);
- if ( !force_pt ) coef_a = exp(coef_a);
- break;
- case 6:
- calc_a_b(sum_ix,sum_lny,sum_ixlny,sum_ix2,sum_lny2,num_points,6);
- if ( !force_pt ) coef_a = exp(coef_a);
- break;
- case 7:
- calc_a_b(sum_lnx,sum_y,sum_ylnx,sum_lnx2,sum_y2,num_points,7);
- break;
- case 8:
- calc_a_b(sum_lnx,sum_iy,sum_iylnx,sum_lnx2,sum_iy2,num_points,8);
- break;
- case 9:
- calc_a_b(sum_lnx,sum_lny,sum_lnxy,sum_lnx2,sum_lny2,num_points,9);
- if ( !force_pt ) coef_a = exp(coef_a);
- break;
- default:
- print "Improper Curve Specified";
- print "File: "FILENAME;
- print "Line #: "FNR;
- exit 20;
- break;
- }
- a[curve_no] = coef_a;
- b[curve_no] = coef_b;
- r[curve_no] = rr;
- fit = TRUE;
- } else if ( msg ) print "Data Cannot Fit Curve Selected !";
- return fit;
- }
-
- function calc_a_b(sum_x,sum_y,sum_xy,sum_x2,sum_y2,n,eqn) {
- local sxy = sum_xy - sum_x * sum_y / n;
- local ssx2 = sum_x2 - sum_x * sum_x / n;
- local ssy2 = sum_y2 - sum_y * sum_y / n;
-
- coef_b = sxy / ssx2;
- if ( force_pt ) coef_a = execute(fcoefa[eqn],TRUE);
- else coef_a = (sum_y - coef_b * sum_x) / n;
- rr = sqrt((sxy * sxy)/(ssx2 * ssy2));
- }
-
- # function to compute x from y and curve number
- function pred_x(cn,y) {
- local sofmt = OFMT;
- local curve = curve_x_exe[cn];
- local ca = a[cn], cb = b[cn];
- local x;
-
- OFMT = "%.14g";
- gsub(/y/,y,curve);
- x = execute(curve,TRUE);
- OFMT = sofmt;
- return x;
- }
-
- # function to compute y from x and curve number
- function pred_y(cn,x) {
- local sofmt = OFMT;
- local curve = curve_y_exe[cn];
- local ca = a[cn], cb = b[cn];
- local y;
-
- OFMT = "%.14g";
- gsub(/z/,x,curve);
- y = execute(curve,TRUE);
- OFMT = sofmt;
- return y;
- }
-
- # function display copyright
- function copyright() {
- fprintf(stderr,"Least Squares Best Fit.\nCopyright (C) 1990 Terry D. Boldt, All Rights Reserved.\n");
- }
-