regress.c File Reference

Compute regression coefficients with a regression constant given two vectors, having nterm variables and npts dimension (usually time). More...

#include <regress.h>
#include <misc.h>
Include dependency graph for regress.c:

Go to the source code of this file.

Functions

int regress (double *coef, double *x, double *y, double *cte, double *yreg, double *yerr, double *chisq, double *rsq, double *vif, double *autocor, int nterm, int npts)
 Compute regression coefficients with a regression constant given two vectors, having nterm variables (usually parameters) and npts dimension (usually time).

Detailed Description

Compute regression coefficients with a regression constant given two vectors, having nterm variables and npts dimension (usually time).

Definition in file regress.c.


Function Documentation

int regress ( double *  coef,
double *  x,
double *  y,
double *  cte,
double *  yreg,
double *  yerr,
double *  chisq,
double *  rsq,
double *  vif,
double *  autocor,
int  nterm,
int  npts 
)

Compute regression coefficients with a regression constant given two vectors, having nterm variables (usually parameters) and npts dimension (usually time).

Parameters:
[out] coef Regression coefficients
[in] x X vectors (nterm X npts) npts is usually time, nterm the number of parameters
[in] y Y vectors (npts) npts is usually time
[out] cte Regression constant
[out] yreg Y vector reconstructed with regression
[out] yerr Y error vector when reconstructing Y vector with regression
[out] chisq Chi-square diagnostic
[out] rsq Coefficient of determination diagnostic R^2 = 1 - ^2 / TSS
[out] vif Variance Inflation Factor for each parameter
[out] autocor Autocorrelation of residuals (Durbin-Watson test)
[in] nterm Variables dimension
[in] npts Vector dimension
Returns:
Status

Regression diagnostics

Definition at line 62 of file regress.c.

References alloc_error().

Referenced by main(), and wt_learning().

00063                                                                                        {
00081   gsl_vector_view row; /* To retrieve vector rows */
00082 
00083   gsl_matrix *xx; /* X matrix */
00084   gsl_matrix *cov; /* Covariance matrix */
00085 
00086   gsl_multifit_linear_workspace *work; /* Workspace */
00087 
00088   gsl_vector *yy; /* Y vector */
00089   gsl_vector *ccoef; /* Coefficients vector */
00090   gsl_vector *res; /* Residuals vector */
00091 
00092   int istat; /* Diagnostic status */
00093   int term; /* Loop counter for variables dimension */
00094   int vterm;
00095   int cterm;
00096   int pts; /* Loop counter for vector dimension */
00097 
00098   size_t stride = 1; /* Stride for GSL functions */
00099 
00100   double vchisq; /* Temporary value for chi^2 for VIF */
00101   double *ytmp; /* Temporary vector for y for VIF */
00102   double ystd; /* Standard deviation of fitted function */
00103 
00104   /* Allocate memory and create matrices and vectors */
00105   xx = gsl_matrix_alloc(npts, nterm+1);
00106   yy = gsl_vector_alloc(npts);
00107 
00108   ccoef = gsl_vector_alloc(nterm+1);
00109   cov = gsl_matrix_alloc(nterm+1, nterm+1);
00110 
00111   /* Create X matrix */
00112   for (term=0; term<nterm; term++)
00113     for (pts=0; pts<npts; pts++)
00114       (void) gsl_matrix_set(xx, pts, term+1, x[pts+term*npts]);
00115 
00116   /* Create first column of matrix for regression constant */
00117   for (pts=0; pts<npts; pts++)
00118     (void) gsl_matrix_set(xx, pts, 0, 1.0);  
00119   
00120   /* Create Y vector for all vector dimension */
00121   for (pts=0; pts<npts; pts++)
00122     (void) gsl_vector_set(yy, pts, y[pts]);
00123 
00124   /* Allocate workspace */
00125   work = gsl_multifit_linear_alloc(npts, nterm+1);
00126 
00127   /* Perform linear regression */
00128   istat = gsl_multifit_linear(xx, yy, ccoef, cov, chisq, work);
00129   if (istat != GSL_SUCCESS) {
00130     (void) fprintf(stderr, "%s: Line %d: Error %d in multifitting algorithm!\n", __FILE__, __LINE__, istat);
00131     (void) gsl_multifit_linear_free(work);
00132     (void) gsl_matrix_free(xx);
00133     (void) gsl_matrix_free(cov);
00134     (void) gsl_vector_free(yy);
00135     (void) gsl_vector_free(ccoef);
00136     return istat;
00137   }
00138   /* Free workspace */
00139   (void) gsl_multifit_linear_free(work);
00140 
00141   /* Debug output */
00142 #if DEBUG >= 7
00143 #define C(i) (gsl_vector_get(ccoef,(i)))
00144 #define COV(i,j) (gsl_matrix_get(cov,(i),(j)))
00145 #define X(i,j) (gsl_matrix_get(xx,(i),(j)))
00146      
00147   {
00148     printf ("# best fit: Y = %g + %g X + %g X^2\n", 
00149             C(0), C(1), C(2));
00150 
00151     printf ("# x matrix:\n");
00152     
00153     for (term=0; term<nterm+1; term++)
00154       for (pts=0; pts<npts; pts++)
00155         printf("term=%d pts=%d x=%lf\n", term, pts, X(pts,term));
00156     
00157     printf ("# covariance matrix:\n");
00158     printf ("[ %+.5e, %+.5e, %+.5e  \n",
00159             COV(0,0), COV(0,1), COV(0,2));
00160     printf ("  %+.5e, %+.5e, %+.5e  \n", 
00161             COV(1,0), COV(1,1), COV(1,2));
00162     printf ("  %+.5e, %+.5e, %+.5e ]\n", 
00163             COV(2,0), COV(2,1), COV(2,2));
00164     printf ("# chisq = %g\n", *chisq);
00165   }
00166 #endif
00167 
00168   /* Retrieve regression coefficients */
00169   for (term=0; term<nterm; term++)
00170     coef[term] = gsl_vector_get(ccoef, term+1);
00171   /* Retrieve regression constant */
00172   *cte = gsl_vector_get(ccoef, 0);
00173 
00174   /* Compute R^2 = 1 - \chi^2 / TSS */
00175   *rsq = 1.0 - ( (*chisq) / gsl_stats_tss(y, stride, (size_t) pts) );
00176 
00177   /* Reconstruct vector using regression coefficients, and calculate its standard deviation */
00178 #if DEBUG >= 7
00179   (void) fprintf(stdout, "%s: Vector reconstruction\n", __FILE__);
00180 #endif
00181   for (pts=0; pts<npts; pts++) {
00182     row = gsl_matrix_row(xx, pts);
00183     istat = gsl_multifit_linear_est(&row.vector, ccoef, cov, &(yreg[pts]), &ystd);
00184     if (istat != GSL_SUCCESS) {
00185       (void) fprintf(stderr, "%s: Line %d: Error %d in multifitting linear values estimation!\n", __FILE__, __LINE__, istat);
00186       (void) gsl_matrix_free(xx);
00187       (void) gsl_matrix_free(cov);
00188       (void) gsl_vector_free(yy);
00189       (void) gsl_vector_free(ccoef);
00190       return istat;
00191     }
00192   }
00193   /* Compute also residuals */
00194 #if DEBUG >= 7
00195   (void) fprintf(stdout, "%s: Residuals calculation\n", __FILE__);
00196 #endif
00197   res = gsl_vector_alloc(npts);
00198   istat = gsl_multifit_linear_residuals(xx, yy, ccoef, res);
00199   if (istat != GSL_SUCCESS) {
00200     (void) fprintf(stderr, "%s: Line %d: Error %d in multifitting linear values residuals estimation!\n", __FILE__, __LINE__, istat);
00201     (void) gsl_matrix_free(xx);
00202     (void) gsl_matrix_free(cov);
00203     (void) gsl_vector_free(yy);
00204     (void) gsl_vector_free(res);
00205     (void) gsl_vector_free(ccoef);
00206     return istat;
00207   }
00208   /* Store residuals */
00209   for (pts=0; pts<npts; pts++)
00210     yerr[pts] = gsl_vector_get(res, pts);
00211 
00212   /* Compute autocorrelation of residuals (Durbin-Watson) */
00213   *autocor = gsl_stats_lag1_autocorrelation(yerr, stride, npts);
00214 
00215   /* Dealloc matrices and vectors memory */
00216   (void) gsl_matrix_free(xx);
00217   (void) gsl_matrix_free(cov);
00218   (void) gsl_vector_free(yy);
00219   (void) gsl_vector_free(res);
00220   (void) gsl_vector_free(ccoef);
00221 
00222 
00225   /* VIF */
00226 #if DEBUG >= 7
00227   (void) fprintf(stdout, "%s: VIF calculation\n", __FILE__);
00228 #endif
00229   xx = gsl_matrix_alloc(npts, nterm);
00230   yy = gsl_vector_alloc(npts);
00231 
00232   ccoef = gsl_vector_alloc(nterm);
00233   cov = gsl_matrix_alloc(nterm, nterm);
00234 
00235   ytmp = (double *) malloc(npts * sizeof(double));
00236   if (ytmp == NULL) alloc_error(__FILE__, __LINE__);
00237   
00238   /* Loop over parameters */
00239   for (vterm=0; vterm<nterm; vterm++) {
00240 
00241     (void) gsl_matrix_set_zero(xx);
00242     (void) gsl_matrix_set_zero(cov);
00243 
00244     (void) gsl_vector_set_zero(yy);
00245     (void) gsl_vector_set_zero(ccoef);
00246 
00247     /* Create X matrix */
00248     cterm = 0;
00249     for (term=0; term<nterm; term++) {
00250       if (term != vterm) {
00251         for (pts=0; pts<npts; pts++)
00252           (void) gsl_matrix_set(xx, pts, cterm+1, x[pts+term*npts]);
00253         cterm++;
00254       }
00255     }
00256     
00257     /* Create first column of matrix for regression constant */
00258     for (pts=0; pts<npts; pts++)
00259       (void) gsl_matrix_set(xx, pts, 0, 1.0);  
00260     
00261     /* Create Y vector for all vector dimension, using the vterm X values */
00262     for (pts=0; pts<npts; pts++) {
00263       ytmp[pts] = x[pts+vterm*npts];
00264       (void) gsl_vector_set(yy, pts, ytmp[pts]);
00265     }
00266     
00267     /* Allocate workspace */
00268     work = gsl_multifit_linear_alloc(npts, nterm);
00269 
00270     /* Perform linear regression just to get chi^2 */
00271     istat = gsl_multifit_linear(xx, yy, ccoef, cov, &vchisq, work);
00272     if (istat != GSL_SUCCESS) {
00273       (void) fprintf(stderr, "%s: Line %d: Error %d in multifitting algorithm!\n", __FILE__, __LINE__, istat);
00274       (void) gsl_multifit_linear_free(work);
00275       (void) gsl_matrix_free(xx);
00276       (void) gsl_matrix_free(cov);
00277       (void) gsl_vector_free(yy);
00278       (void) gsl_vector_free(ccoef);
00279       (void) free(ytmp);
00280       return istat;
00281     }
00282     
00283     /* Free workspace */
00284     (void) gsl_multifit_linear_free(work);
00285 
00286     /* Compute R^2 = 1 - \chi^2 / TSS */
00287     /* and finally VIF = 1.0 / (1.0 - R^2) */
00288     vif[vterm] = 1.0 / (1.0 - (1.0 - ( vchisq / gsl_stats_tss(ytmp, stride, (size_t) pts) ) ));
00289   }
00290     
00291   /* Dealloc matrices and vectors memory */
00292   (void) gsl_matrix_free(xx);
00293   (void) gsl_matrix_free(cov);
00294   (void) gsl_vector_free(yy);
00295   (void) gsl_vector_free(ccoef);
00296   
00297   (void) free(ytmp);
00298 
00299   /* Success status */
00300   return 0;
00301 }


Generated on 12 May 2016 for DSCLIM by  doxygen 1.6.1