home *** CD-ROM | disk | FTP | other *** search
-
-
- (*:Version: Mathematica 2.0 *)
-
- (*:Name: Statistics`LinearRegression *)
-
- (*:Title: Linear Regression Analysis *)
-
-
- (*:Legal:
- Copyright (c) 1990, Wolfram Research, Inc.
- *)
-
- (*:Summary:
- Least squares or weighted least squares linear regression
- for data whose errors are assumed to be normally and
- independently distributed.
-
- *)
-
- (*:Keywords: linear regression, design matrix *)
-
- (*:Requirements: No special system requirements. *)
-
- (*:Warning: None. *)
-
- (*:Sources: Basic statistics texts. *)
-
-
- BeginPackage["Statistics`LinearRegression`",
- "Statistics`Common`DistributionsCommon`",
- "Statistics`NormalDistribution`",
- "Statistics`ConfidenceIntervals`",
- "Statistics`DescriptiveStatistics`"]
-
- Regress::usage =
- "Regress[data, funs, vars] finds a least-squares fit to a list of data as
- a linear combination of the functions funs of variables vars. The data
- can have the form {{x1, y1, ..., f1}, {x2, y2, ..., f2}, ...}, where the
- number of coordinates x, y, ... is equal to the number of variables in
- the list vars. The data can also be of the form {f1, f2, ...}, with a
- single coordinate assumed to take values 1, 2, .... The argument funs can
- be any list of functions that depend only on the objects vars."
-
- Options[Regress] = {OutputList -> Null, OutputControl->Automatic,
- IncludeConstant -> True, BasisNames -> Automatic,
- Weights -> Equal} ~Join~ Options[SingularValues] ~Join~
- Options[StudentTCI]
-
- DesignedRegress::usage =
- "DesignedRegress[designmatrix,rdata] finds a least-squares fit given a
- design matrix and a vector of the response data. "
-
- Options[DesignedRegress] = { OutputList -> Null, Weights -> Equal,
- BasisNames -> Automatic, OutputControl->Automatic } ~Join~
- Options[SingularValues] ~Join~ Options[StudentTCI]
-
-
- DesignMatrix::usage =
- "DesignMatrix[data,funs,vars] gives the design matrix for modeling the
- data as a linear combination of the functions. The data can have the
- form {{x1, y1, ..., f1}, {x2, y2, ..., f2}, ...}, where the
- number of coordinates x, y, ... is equal to the number of variables in
- the list vars. The data can also be of the form {f1, f2, ...}, with a
- single coordinate assumed to take values 1, 2, .... The argument funs can
- be any list of functions that depend only on the objects vars."
-
-
- (* options *)
-
- BasisNames::usage =
- "BasisNames is an option to regression functions and is used to
- include headings for each of the basis functions in the output."
-
- IncludeConstant::usage =
- "IncludeConstant is an option to the Regress function and is
- used to indicate whether a constant term is automatically
- included in the models."
-
- OutputList::usage =
- "OutputList is an option to regression functions and is used to
- specify a list of statistics to be reported about the fit."
-
- OutputControl::usage =
- "OutputControl is an option to regression functions and is used to
- specify whether standard information is automatically included
- in the output."
-
- Weights::usage =
- "Weights is an option to regression functions and is used to define a
- vector of weights for the variances. When weights are specified, the
- weighted least-squares fit is calculated."
-
- NoPrint::usage =
- "NoPrint specifies that the OutputControl option should force only the
- information specified by the OutputList option to be returned."
-
- (* Possible list elements for OutputList *)
-
-
- BestFit::usage =
- "BestFit is used in the output of regression functions to
- identify the best fit."
-
- BestFitCoefficients::usage =
- "BestFitCoefficients is used in the output of regression
- functions to identify the list of values of the fitted parameters
- in the best fit."
-
- ANOVATable::usage =
- "ANOVATable is used in the output of regression functions to
- identify the analysis of variance table. This table includes F-ratio
- tests for comparing the models specified in the function, as well as
- the degrees of freedom and sums of squares used to obtain these
- F ratios."
-
- ConfidenceIntervalTable::usage =
- "ConfidenceIntervalTable is used in the output of regression
- functions to identify a table of confidence intervals for the best
- fit parameters."
-
- CovarianceMatrix::usage =
- "CovarianceMatrix is used in the output of regression functions
- to identify the estimated covariance matrix of the fit coefficients."
-
- CorrelationMatrix::usage =
- "CorrelationMatrix is used in the output of regression functions
- to identify the estimated correlation matrix of the fit coefficients."
-
- EstimatedVariance::usage =
- "EstimatedVariance is used in the output of regression
- functions to identify the variance estimate, or the residual
- mean square."
-
- FitResiduals::usage =
- "FitResiduals is used in the output of regression functions
- to identify the list of differences between the response data and
- the best fit evaluated at the same abscissa points."
-
- ParameterTable::usage =
- "ParameterTable is used in the output of regression functions
- to identify a table of information about the best fit parameters."
-
- PredictedResponse::usage =
- "PredictedResponse is used in the output of regression functions
- to identify the best fit evaluated at the data points."
-
- RSquared::usage =
- "RSquared is used in the output of regression functions to
- identify the square of the multiple correlation coefficient."
-
- AdjustedRSquared::usage =
- "AdjustedRSquared is used in the output of regression functions
- to identify the multiple correlation coefficient adjusted for the number
- of degrees of freedom in the fit."
-
-
-
- Begin["`private`"]
-
- Regress[arguments___] :=
- Block[{answer = vRegress[arguments]},
- answer /; answer =!= Fail
- ]
-
- vRegress[data_, basis_?VectorQ, vars_, options___] :=
- Module[
- {varlist, rdata, cbasis, cpresent,report,localDesignMatrix,
- name,optionlist,old},
- optionlist = {options};
- If[Head[vars] === List, varlist = vars, varlist = {vars}];
- {idata, rdata} = GetData[data, Length[varlist]];
- If[idata === Fail, Return[Fail]];
- cbasis = basis;
- cpresent = Or @@ Apply[And, Outer[FreeQ, basis, varlist], {1}];
- If[And[!cpresent, IncludeConstant /.optionlist/. Options[Regress]],
- PrependTo[cbasis, 1];
- cpresent = True
- ];
- localDesignMatrix = DesignMatrix[idata, cbasis, varlist];
- If[Length[rdata] <= Length[cbasis],
- Message[Regress::mindata]
- ];
- name = BasisNames/.optionlist/.Options[Regress];
- If[name == Automatic,
- optionlist=ChangeValue[optionlist,BasisNames,cbasis]];
- report = DesignedRegress[localDesignMatrix, rdata, optionlist];
- If[!FreeQ[report, BestFit],
- old = GetValue[report,BestFit];
- report = ChangeValue[report,BestFit,old.cbasis]];
- report
- ]
-
- vRegress[badargs___]:=
- (If[badargs =!= Fail,Message[Regress::regf]];Fail)
-
- DesignMatrix[arguments___] :=
- Block[{answer = vDesignMatrix[arguments]},
- answer /; answer =!= Fail
- ]
-
- vDesignMatrix[data_, basis_?VectorQ, vars_] :=
- Module[{PureFunction,varlist,ndata,func,slots,rules,n},
- If[Head[vars] === List, varlist = vars, varlist = {vars}];
- If[Length[data[[1]]]==0,
- ndata = Transpose[{Range[Length[data]],data}],
- ndata = data];
- If[!ListQ[data],Message[DesignMatrix::desd]; Return[Fail]];
- PureFunction = Map[Release,PrepareFunction[basis,varlist]];
- If[!FreeQ[basis,Slot],
- Message[Regress::slot];
- slots = Table[Slot[n], {n, 1, Length[varlist]}];
- rules = Thread[slots -> varlist];
- PureFunction = PureFunction /.rules ] ;
- Apply[PureFunction,ndata, 1]
- ]
-
- vDesignMatrix[badargs___]:=
- (If[badargs =!= Fail,Message[DesignMatrix::desf]];Fail)
-
-
- PrepareFunction[basis_, vars_] :=
- If[VectorQ[vars],Return[Function[vars, basis]] ]
-
- DesignedRegress[arguments___] :=
- Block[{answer = vDesignedRegress[arguments]},
- answer /; answer =!= Fail
- ]
-
- vDesignedRegress[designmatrix_List, rdata_?VectorQ, options___] :=
- (* designmatrix::design matrix for regression *)
- (* rdata::vector of response values *)
- (* optionlist::list of options *)
- Module[{names, optionlist, weights, sqrtweights, beta,
- ValidWeights = False,
- localRData = rdata, localDesignMatrix = designmatrix,
- tol, SVD, u, w, v, (* input and output of SingularValues *)
- OrthoResponse, (* orthogonal projection of localRData *)
- fitlist, (* best fit coefficients *)
- localPredictedResponse, localFitResiduals,
- ModelDOF, TotalDOF, ErrorDOF,
- ModelSS, TotalSS, ErrorSS,
- TotalResiduals, (* residuals from fitting a constant *)
- localRSquared,
- ResponseVariance, (* estimated variance from best fit *)
- localPValue, localFRatio, CM, rootCM,
- report, (* output accumulation *)
- SEList, (* list of standard errors *)
- CILevel, CIList, ctable,
- localCorrelationMatrix,
- TRatioList, tpvalues, ttable },
-
- (* READ IN OPTIONS *)
-
- optionlist = {options};
- If[Length[optionlist] == 1 && !VectorQ[optionlist],
- {optionlist} = optionlist];
- tol = Tolerance /. optionlist /. Options[DesignedRegress];
- weights = Weights /. optionlist /. Options[DesignedRegress];
- CILevel = ConfidenceLevel /. optionlist /. Options[DesignedRegress];
- ValidWeights = VectorQ[weights] && Length[weights]==Length[rdata] &&
- Apply[And, Map[NumberQ, N[weights]]];
- If[ValidWeights,
- sqrtweights = Sqrt[weights];
- localDesignMatrix = sqrtweights localDesignMatrix;
- localRData = sqrtweights localRData,
- If[weights =!= Equal, Message[Regress::regw, Length[rdata]]]
- ];
-
- (* NUMERICALIZE DESIGN MATRIX & RESPONSE DATA *)
- {localDesignMatrix, localRData} =
- MakeNumerical[localDesignMatrix, localRData];
- If[localDesignMatrix === Fail, Return[Fail]];
-
- (* GET SVD AND COMPUTE COEFFICIENTS *)
-
- SVD = SingularValues[localDesignMatrix, Tolerance -> tol];
- If[Head[SVD] === List && Length[SVD] === 3,
- {u, w, v} = SVD,
- (* else *)
- Message[Regress::noSVD];
- Return[Fail]
- ];
- If[Length[w] < Length[localDesignMatrix[[1]]],
- Message[Regress::notdep]];
-
- OrthoResponse = u . localRData;
-
- fitlist = Transpose[v] . (OrthoResponse / w);
-
- localPredictedResponse = localDesignMatrix . fitlist;
- localFitResiduals = localRData - localPredictedResponse;
- ErrorDOF = Length[rdata] - Length[Transpose[localDesignMatrix]];
- ErrorSS = localFitResiduals . localFitResiduals;
-
- (* COMPARING MODELS *)
-
- If[IncludeConstant /.optionlist/. Options[Regress],
- beta = If[ValidWeights,
- (* numericalize sqrtweights *)
- If[Precision[sqrtweights] == Infinity,
- sqrtweights = N[sqrtweights]];
- sqrtweights (localRData.sqrtweights)/Apply[Plus,weights],
- Mean[localRData]];
- TotalResiduals = localRData - beta;
- TotalDOF = Length[rdata] - 1,
- (* else *)
- TotalResiduals = localRData;
- TotalDOF = Length[rdata],
- (* error *)
- Message[Regress::uncon];
- Return[Fail]
- ];
- TotalSS = TotalResiduals . TotalResiduals;
- ModelDOF = TotalDOF - ErrorDOF;
- ModelSS = TotalSS - ErrorSS;
-
- ResponseVariance = ErrorSS/ErrorDOF;
- localRSquared = ModelSS/TotalSS;
-
-
- (* REPORT GENERATION *)
-
- report = GetOutputList[optionlist];
- If[MemberQ[report, ANOVATable],
- localFRatio = (ModelSS/ModelDOF)/ResponseVariance;
- localPValue = Chop[1-CDF[FRatioDistribution[ModelDOF,
- ErrorDOF],localFRatio],.00001];
- report = report /. ANOVATable ->
- (ANOVATable ->
- TableForm[{{ModelDOF, ModelSS, ModelSS/ModelDOF,
- localFRatio, localPValue},
- {ErrorDOF, ErrorSS, ErrorSS/ErrorDOF},
- {TotalDOF, TotalSS}},
- TableHeadings->{{"Model", "Error", "Total"},
- {"DoF", "SoS", "MeanSS", "FRatio", "PValue"}}
- ] );
- ];
-
- (* compute things involving the covariance matrix *)
- If[Or @@ Map[MemberQ[report, #]&, {CovarianceMatrix,
- CorrelationMatrix, ParameterTable,
- ConfidenceIntervalTable}],
- names = BasisNames/.optionlist/.Options[Regress];
- CM = Transpose[v] . (v / w^2);
- rootCM = Sqrt[DiagonalElements[CM]];
- SEList = Sqrt[ResponseVariance] rootCM;
- CIList := Apply[N[StudentTCI[#1, #2, ErrorDOF,
- ConfidenceLevel -> CILevel]]&,
- Transpose[{fitlist, SEList}], 1];
- localCorrelationMatrix := Transpose[Transpose[CM/rootCM]/rootCM];
- TRatioList = fitlist / SEList;
- tpvalues = Map[
- CDF[StudentTDistribution[ErrorDOF],#]&,
- TRatioList];
- tpvalues =Chop[2 Map[If[#>.5, 1-#, #]&,tpvalues],.00001];
- ttable := Transpose[{fitlist, SEList, TRatioList, tpvalues}];
- ctable := Transpose[{fitlist, CIList}];
- report = report /. CovarianceMatrix ->
- (CovarianceMatrix -> MatrixForm[CM]);
- report = report /. CorrelationMatrix ->
- (CorrelationMatrix ->
- MatrixForm[localCorrelationMatrix]);
- report = report /. ParameterTable ->
- (ParameterTable -> TableForm[ttable,
- TableHeadings -> {names, {"Estimate", "SE", "TStat", "PValue"}}]);
- report = report /. ConfidenceIntervalTable ->
- (ConfidenceIntervalTable -> TableForm[ctable, TableDepth -> 2,
- TableHeadings -> {names, {"Estimate", "CI"}}])
- ];
- report = report /. RSquared -> (RSquared -> localRSquared);
- report = report /. AdjustedRSquared ->
- (AdjustedRSquared -> 1-(1-localRSquared)(TotalDOF/ErrorDOF));
- report = report /. EstimatedVariance ->
- (EstimatedVariance -> ResponseVariance);
- report = report /. BestFit -> (BestFit->fitlist);
- report = report /. BestFitCoefficients ->
- (BestFitCoefficients -> fitlist);
- (* correct the predicted response and residuals if weighted. *)
- If[ValidWeights,
- localPredictedResponse /= sqrtweights;
- localFitResiduals /= sqrtweights ];
- report = report /. PredictedResponse :>
- (PredictedResponse -> localPredictedResponse);
- report = report /. FitResiduals :>
- (FitResiduals -> localFitResiduals);
- report
- ] (* end of DesignedRegress *)
-
- vDesignedRegress[badargs___]:=
- (If[badargs =!= Fail,Message[Regress::regd]];Fail)
-
- DiagonalElements[matrix_] :=
- Module[{k, size = Length[matrix]},
- Table[matrix[[k, k]], {k, 1, size}]
- ]
-
-
- GetValue[list_,name_]:=Module[{rule},
- rule = Select[list,!FreeQ[#,name]&];
- name /. rule]
-
- ChangeValue[list_,name_,newval_]:=
- If[FreeQ[list,name],Prepend[list,(name->newval)],
- Map[If[!FreeQ[#,name],
- Replace[name->_,(name->_)->(name->newval)],#]&,list]]
-
-
- GetData[data_?VectorQ, varcount_] :=
- If[varcount === 1,
- {Transpose[{Range[Length[data]],data}], data},
- Message[Regress::fitc, 1, varcount]; {Fail, Fail}
- ]
-
- GetData[data_?MatrixQ, varcount_] :=
- If[Length[data[[1]]] > varcount,
- {data, Map[Last, data]},
- Message[Regress::fitc, Length[data[[1]]]-1, varcount];
- {Fail, Fail}
- ]
-
- GetData[___] := (Message[Regress::regd]; {Fail, Fail})
-
- MakeNumerical[DesignMatrix_, rdata_] :=
- (* The purpose of MakeNumerical is to determine if DesignMatrix *)
- (* is in a form appropriate for SingularValues. *)
- Module[{dmPrecision = Precision[{DesignMatrix, rdata}],
- nDesignMatrix, nrdata},
- {nrdata, nDesignMatrix} =
- If[dmPrecision < Infinity,
- N[{rdata, DesignMatrix}, dmPrecision],
- N[{rdata, DesignMatrix}]
- ];
- If[MatrixQ[nDesignMatrix],
- {nDesignMatrix, nrdata},
- (* else *)
- Message[Regress::notnum];
- {Fail, Fail}
- ]
- ]
-
- GetOutputList[options_] :=
- Module[{localReport,control,outlist},
- control = OutputControl /. options /.Options[Regress];
- outlist = OutputList /. options /.Options[Regress];
- If[outlist === Null, localReport = {}, localReport = outlist];
- Which[control === Automatic,
- localReport = Join[localReport,{ParameterTable,RSquared,
- AdjustedRSquared,EstimatedVariance,ANOVATable}],
- control =!= NoPrint,
- Message[Regress::optx, control, OutputControl -> control]
- ];
- localReport ]
-
-
- DesignMatrix::desf = "The second argument to DesignMatrix is not a list
- of functions."
-
- DesignMatrix::desd = "The first argument to DesignMatrix is not a list
- or rectangular array of data."
-
- Regress::regf = "The second argument to Regress is not a list of
- functions."
-
- Regress::optx = "Unknown option `1` in `2`."
-
- Regress::uncon = "Regress was unable to determine that a constant term is
- present in the model. (This should not happen under normal conditions.)"
-
- Regress::notnum = "The basis functions yielded non-numerical results
- when evaluated at the data points."
-
- Regress::regc = "Number of independent coordinates (`1`) does not
- equal the number of parameters (`2`)."
-
- Regress::slot = "Warning: The basis functions contain Slot specifiers
- that will be interpreted as the corresponding variables from the
- variable list."
-
- Regress::regd = "The first argument to Regress is not a list or
- rectangular array of data."
-
- Regress::regw = "Warning: the Weights option must be set to a vector of numbers
- having length ``, the length of the first argument to Regress. Weights ->
- Equal assumed."
-
- Regress::notdep = "The fit is numerically independent of a linear
- combination of the basis functions."
-
- Regress::noSVD = "Regress was unable to obtain the singular value
- decomposition for the design matrix of this problem."
-
- Regress::mindata = "There are as many or more basis functions than data
- points. Subsequent results may be misleading."
-
- End[]
-
-
- SetAttributes[ Regress ,ReadProtected];
- SetAttributes[ DesignedRegress ,ReadProtected];
- SetAttributes[ DesignMatrix ,ReadProtected];
-
- Protect[ Regress, DesignedRegress, DesignMatrix];
-
-
- EndPackage[]
-
-