regress.h File Reference

Include file for regression library. More...

#include <stdio.h>
#include <sys/types.h>
#include <signal.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <string.h>
#include <gsl/gsl_matrix.h>
#include <gsl/gsl_vector.h>
#include <gsl/gsl_multifit.h>
#include <gsl/gsl_statistics_double.h>
Include dependency graph for regress.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Defines

#define _GNU_SOURCE
 GNU extensions.

Functions

int regress (double *coef, double *x, double *y, double *cte, double *yreg, double *yerr, double *chisq, double *rsq, double *vif, double *autocor, int nterm, int npts)
 Compute regression coefficients with a regression constant given two vectors, having nterm variables (usually parameters) and npts dimension (usually time).
void apply_regression (double *buf, double *reg, double *cst, double *dist, double *sup_dist, int npts, int ntime, int nclust, int nreg)
 Compute a field using regression coefficients and the field values.

Detailed Description

Include file for regression library.

Definition in file regress.h.


Define Documentation

#define _GNU_SOURCE

GNU extensions.

Definition at line 55 of file regress.h.


Function Documentation

void apply_regression ( double *  buf,
double *  reg,
double *  cst,
double *  dist,
double *  sup_dist,
int  npts,
int  ntime,
int  nclust,
int  nreg 
)

Compute a field using regression coefficients and the field values.

Parameters:
[out] buf 2D field
[in] reg Regression coefficients
[in] cst Regression constant
[in] dist Values of input vector
[in] sup_dist If there is one supplemental regression coefficient, use a supplemental vector
[in] npts Points dimension
[in] ntime Time dimension
[in] nclust Cluster dimension
[in] nreg Regression dimension

Definition at line 59 of file apply_regression.c.

Referenced by wt_downscaling().

00060                                        {
00073   int nt; /* Time loop counter */
00074   int pts; /* Points loop counter */
00075   int clust; /* Cluster loop counter */
00076 
00077   /* Loop over all regression points */
00078   for (pts=0; pts<npts; pts++) {
00079     /* Loop over all times */
00080     for (nt=0; nt<ntime; nt++) {
00081       /* Initialize value */
00082       buf[pts+nt*npts] = 0.0;
00083       /* Loop over all clusters and add each value after regression is applied */
00084       for (clust=0; clust<nclust; clust++) {
00085         buf[pts+nt*npts] += (dist[nt+clust*ntime] * reg[pts+clust*npts]);
00086 
00087         //        if (pts == 0 && nt == (ntime-5))
00088         //          printf("%d %lf %lf %lf\n",clust,buf[pts+nt*npts],(dist[nt+clust*ntime]),(reg[pts+clust*npts]));
00089       }
00090       /* Extra regression coefficient with a second supplemental vector */
00091       if (nclust == (nreg-1)) {
00092         buf[pts+nt*npts] += (sup_dist[nt] * reg[pts+(nreg-1)*npts]);
00093       }      
00094       
00095       /* Add regression constant */
00096       buf[pts+nt*npts] += cst[pts];
00097 
00098       //     if (pts == 0 && nt == (ntime-5))
00099       //       printf("%lf %lf\n",buf[pts+nt*npts],cst[pts]);
00100     }
00101   }
00102   //  nt = ntime-1;
00103   //  pts = 0;
00104   //  for (clust=0; clust<nclust; clust++)
00105   //    printf("REGRESS %d %lf %lf %lf %lf\n",clust,buf[pts+nt*npts],dist[nt+clust*ntime],reg[pts+clust*npts],cst[pts]);
00106 }

int regress ( double *  coef,
double *  x,
double *  y,
double *  cte,
double *  yreg,
double *  yerr,
double *  chisq,
double *  rsq,
double *  vif,
double *  autocor,
int  nterm,
int  npts 
)

Compute regression coefficients with a regression constant given two vectors, having nterm variables (usually parameters) and npts dimension (usually time).

Parameters:
[out] coef Regression coefficients
[in] x X vectors (nterm X npts) npts is usually time, nterm the number of parameters
[in] y Y vectors (npts) npts is usually time
[out] cte Regression constant
[out] yreg Y vector reconstructed with regression
[out] yerr Y error vector when reconstructing Y vector with regression
[out] chisq Chi-square diagnostic
[out] rsq Coefficient of determination diagnostic R^2 = 1 - ^2 / TSS
[out] vif Variance Inflation Factor for each parameter
[out] autocor Autocorrelation of residuals (Durbin-Watson test)
[in] nterm Variables dimension
[in] npts Vector dimension
Returns:
Status

Regression diagnostics

Definition at line 62 of file regress.c.

References alloc_error().

Referenced by main(), and wt_learning().

00063                                                                                        {
00081   gsl_vector_view row; /* To retrieve vector rows */
00082 
00083   gsl_matrix *xx; /* X matrix */
00084   gsl_matrix *cov; /* Covariance matrix */
00085 
00086   gsl_multifit_linear_workspace *work; /* Workspace */
00087 
00088   gsl_vector *yy; /* Y vector */
00089   gsl_vector *ccoef; /* Coefficients vector */
00090   gsl_vector *res; /* Residuals vector */
00091 
00092   int istat; /* Diagnostic status */
00093   int term; /* Loop counter for variables dimension */
00094   int vterm;
00095   int cterm;
00096   int pts; /* Loop counter for vector dimension */
00097 
00098   size_t stride = 1; /* Stride for GSL functions */
00099 
00100   double vchisq; /* Temporary value for chi^2 for VIF */
00101   double *ytmp; /* Temporary vector for y for VIF */
00102   double ystd; /* Standard deviation of fitted function */
00103 
00104   /* Allocate memory and create matrices and vectors */
00105   xx = gsl_matrix_alloc(npts, nterm+1);
00106   yy = gsl_vector_alloc(npts);
00107 
00108   ccoef = gsl_vector_alloc(nterm+1);
00109   cov = gsl_matrix_alloc(nterm+1, nterm+1);
00110 
00111   /* Create X matrix */
00112   for (term=0; term<nterm; term++)
00113     for (pts=0; pts<npts; pts++)
00114       (void) gsl_matrix_set(xx, pts, term+1, x[pts+term*npts]);
00115 
00116   /* Create first column of matrix for regression constant */
00117   for (pts=0; pts<npts; pts++)
00118     (void) gsl_matrix_set(xx, pts, 0, 1.0);  
00119   
00120   /* Create Y vector for all vector dimension */
00121   for (pts=0; pts<npts; pts++)
00122     (void) gsl_vector_set(yy, pts, y[pts]);
00123 
00124   /* Allocate workspace */
00125   work = gsl_multifit_linear_alloc(npts, nterm+1);
00126 
00127   /* Perform linear regression */
00128   istat = gsl_multifit_linear(xx, yy, ccoef, cov, chisq, work);
00129   if (istat != GSL_SUCCESS) {
00130     (void) fprintf(stderr, "%s: Line %d: Error %d in multifitting algorithm!\n", __FILE__, __LINE__, istat);
00131     (void) gsl_multifit_linear_free(work);
00132     (void) gsl_matrix_free(xx);
00133     (void) gsl_matrix_free(cov);
00134     (void) gsl_vector_free(yy);
00135     (void) gsl_vector_free(ccoef);
00136     return istat;
00137   }
00138   /* Free workspace */
00139   (void) gsl_multifit_linear_free(work);
00140 
00141   /* Debug output */
00142 #if DEBUG >= 7
00143 #define C(i) (gsl_vector_get(ccoef,(i)))
00144 #define COV(i,j) (gsl_matrix_get(cov,(i),(j)))
00145 #define X(i,j) (gsl_matrix_get(xx,(i),(j)))
00146      
00147   {
00148     printf ("# best fit: Y = %g + %g X + %g X^2\n", 
00149             C(0), C(1), C(2));
00150 
00151     printf ("# x matrix:\n");
00152     
00153     for (term=0; term<nterm+1; term++)
00154       for (pts=0; pts<npts; pts++)
00155         printf("term=%d pts=%d x=%lf\n", term, pts, X(pts,term));
00156     
00157     printf ("# covariance matrix:\n");
00158     printf ("[ %+.5e, %+.5e, %+.5e  \n",
00159             COV(0,0), COV(0,1), COV(0,2));
00160     printf ("  %+.5e, %+.5e, %+.5e  \n", 
00161             COV(1,0), COV(1,1), COV(1,2));
00162     printf ("  %+.5e, %+.5e, %+.5e ]\n", 
00163             COV(2,0), COV(2,1), COV(2,2));
00164     printf ("# chisq = %g\n", *chisq);
00165   }
00166 #endif
00167 
00168   /* Retrieve regression coefficients */
00169   for (term=0; term<nterm; term++)
00170     coef[term] = gsl_vector_get(ccoef, term+1);
00171   /* Retrieve regression constant */
00172   *cte = gsl_vector_get(ccoef, 0);
00173 
00174   /* Compute R^2 = 1 - \chi^2 / TSS */
00175   *rsq = 1.0 - ( (*chisq) / gsl_stats_tss(y, stride, (size_t) pts) );
00176 
00177   /* Reconstruct vector using regression coefficients, and calculate its standard deviation */
00178 #if DEBUG >= 7
00179   (void) fprintf(stdout, "%s: Vector reconstruction\n", __FILE__);
00180 #endif
00181   for (pts=0; pts<npts; pts++) {
00182     row = gsl_matrix_row(xx, pts);
00183     istat = gsl_multifit_linear_est(&row.vector, ccoef, cov, &(yreg[pts]), &ystd);
00184     if (istat != GSL_SUCCESS) {
00185       (void) fprintf(stderr, "%s: Line %d: Error %d in multifitting linear values estimation!\n", __FILE__, __LINE__, istat);
00186       (void) gsl_matrix_free(xx);
00187       (void) gsl_matrix_free(cov);
00188       (void) gsl_vector_free(yy);
00189       (void) gsl_vector_free(ccoef);
00190       return istat;
00191     }
00192   }
00193   /* Compute also residuals */
00194 #if DEBUG >= 7
00195   (void) fprintf(stdout, "%s: Residuals calculation\n", __FILE__);
00196 #endif
00197   res = gsl_vector_alloc(npts);
00198   istat = gsl_multifit_linear_residuals(xx, yy, ccoef, res);
00199   if (istat != GSL_SUCCESS) {
00200     (void) fprintf(stderr, "%s: Line %d: Error %d in multifitting linear values residuals estimation!\n", __FILE__, __LINE__, istat);
00201     (void) gsl_matrix_free(xx);
00202     (void) gsl_matrix_free(cov);
00203     (void) gsl_vector_free(yy);
00204     (void) gsl_vector_free(res);
00205     (void) gsl_vector_free(ccoef);
00206     return istat;
00207   }
00208   /* Store residuals */
00209   for (pts=0; pts<npts; pts++)
00210     yerr[pts] = gsl_vector_get(res, pts);
00211 
00212   /* Compute autocorrelation of residuals (Durbin-Watson) */
00213   *autocor = gsl_stats_lag1_autocorrelation(yerr, stride, npts);
00214 
00215   /* Dealloc matrices and vectors memory */
00216   (void) gsl_matrix_free(xx);
00217   (void) gsl_matrix_free(cov);
00218   (void) gsl_vector_free(yy);
00219   (void) gsl_vector_free(res);
00220   (void) gsl_vector_free(ccoef);
00221 
00222 
00225   /* VIF */
00226 #if DEBUG >= 7
00227   (void) fprintf(stdout, "%s: VIF calculation\n", __FILE__);
00228 #endif
00229   xx = gsl_matrix_alloc(npts, nterm);
00230   yy = gsl_vector_alloc(npts);
00231 
00232   ccoef = gsl_vector_alloc(nterm);
00233   cov = gsl_matrix_alloc(nterm, nterm);
00234 
00235   ytmp = (double *) malloc(npts * sizeof(double));
00236   if (ytmp == NULL) alloc_error(__FILE__, __LINE__);
00237   
00238   /* Loop over parameters */
00239   for (vterm=0; vterm<nterm; vterm++) {
00240 
00241     (void) gsl_matrix_set_zero(xx);
00242     (void) gsl_matrix_set_zero(cov);
00243 
00244     (void) gsl_vector_set_zero(yy);
00245     (void) gsl_vector_set_zero(ccoef);
00246 
00247     /* Create X matrix */
00248     cterm = 0;
00249     for (term=0; term<nterm; term++) {
00250       if (term != vterm) {
00251         for (pts=0; pts<npts; pts++)
00252           (void) gsl_matrix_set(xx, pts, cterm+1, x[pts+term*npts]);
00253         cterm++;
00254       }
00255     }
00256     
00257     /* Create first column of matrix for regression constant */
00258     for (pts=0; pts<npts; pts++)
00259       (void) gsl_matrix_set(xx, pts, 0, 1.0);  
00260     
00261     /* Create Y vector for all vector dimension, using the vterm X values */
00262     for (pts=0; pts<npts; pts++) {
00263       ytmp[pts] = x[pts+vterm*npts];
00264       (void) gsl_vector_set(yy, pts, ytmp[pts]);
00265     }
00266     
00267     /* Allocate workspace */
00268     work = gsl_multifit_linear_alloc(npts, nterm);
00269 
00270     /* Perform linear regression just to get chi^2 */
00271     istat = gsl_multifit_linear(xx, yy, ccoef, cov, &vchisq, work);
00272     if (istat != GSL_SUCCESS) {
00273       (void) fprintf(stderr, "%s: Line %d: Error %d in multifitting algorithm!\n", __FILE__, __LINE__, istat);
00274       (void) gsl_multifit_linear_free(work);
00275       (void) gsl_matrix_free(xx);
00276       (void) gsl_matrix_free(cov);
00277       (void) gsl_vector_free(yy);
00278       (void) gsl_vector_free(ccoef);
00279       (void) free(ytmp);
00280       return istat;
00281     }
00282     
00283     /* Free workspace */
00284     (void) gsl_multifit_linear_free(work);
00285 
00286     /* Compute R^2 = 1 - \chi^2 / TSS */
00287     /* and finally VIF = 1.0 / (1.0 - R^2) */
00288     vif[vterm] = 1.0 / (1.0 - (1.0 - ( vchisq / gsl_stats_tss(ytmp, stride, (size_t) pts) ) ));
00289   }
00290     
00291   /* Dealloc matrices and vectors memory */
00292   (void) gsl_matrix_free(xx);
00293   (void) gsl_matrix_free(cov);
00294   (void) gsl_vector_free(yy);
00295   (void) gsl_vector_free(ccoef);
00296   
00297   (void) free(ytmp);
00298 
00299   /* Success status */
00300   return 0;
00301 }


Generated on 12 May 2016 for DSCLIM by  doxygen 1.6.1