regress.c

Go to the documentation of this file.
00001 /* ***************************************************** */
00002 /* Compute regression coefficients with a regression     */
00003 /* constant given two vectors, having nterm variables    */
00004 /* and npts dimension (usually time)                     */
00005 /* regress.c                                             */
00006 /* ***************************************************** */
00007 /* Author: Christian Page, CERFACS, Toulouse, France.    */
00008 /* ***************************************************** */
00013 /* LICENSE BEGIN
00014 
00015 Copyright Cerfacs (Christian Page) (2015)
00016 
00017 christian.page@cerfacs.fr
00018 
00019 This software is a computer program whose purpose is to downscale climate
00020 scenarios using a statistical methodology based on weather regimes.
00021 
00022 This software is governed by the CeCILL license under French law and
00023 abiding by the rules of distribution of free software. You can use, 
00024 modify and/ or redistribute the software under the terms of the CeCILL
00025 license as circulated by CEA, CNRS and INRIA at the following URL
00026 "http://www.cecill.info". 
00027 
00028 As a counterpart to the access to the source code and rights to copy,
00029 modify and redistribute granted by the license, users are provided only
00030 with a limited warranty and the software's author, the holder of the
00031 economic rights, and the successive licensors have only limited
00032 liability. 
00033 
00034 In this respect, the user's attention is drawn to the risks associated
00035 with loading, using, modifying and/or developing or reproducing the
00036 software by the user in light of its specific status of free software,
00037 that may mean that it is complicated to manipulate, and that also
00038 therefore means that it is reserved for developers and experienced
00039 professionals having in-depth computer knowledge. Users are therefore
00040 encouraged to load and test the software's suitability as regards their
00041 requirements in conditions enabling the security of their systems and/or 
00042 data to be ensured and, more generally, to use and operate it in the 
00043 same conditions as regards security. 
00044 
00045 The fact that you are presently reading this means that you have had
00046 knowledge of the CeCILL license and that you accept its terms.
00047 
00048 LICENSE END */
00049 
00050 
00051 
00052 
00053 
00054 
00055 
00056 #include <regress.h>
00057 #include <misc.h>
00058 
00061 int
00062 regress(double *coef, double *x, double *y, double *cte, double *yreg, double *yerr,
00063         double *chisq, double *rsq, double *vif, double *autocor, int nterm, int npts) {
00081   gsl_vector_view row; /* To retrieve vector rows */
00082 
00083   gsl_matrix *xx; /* X matrix */
00084   gsl_matrix *cov; /* Covariance matrix */
00085 
00086   gsl_multifit_linear_workspace *work; /* Workspace */
00087 
00088   gsl_vector *yy; /* Y vector */
00089   gsl_vector *ccoef; /* Coefficients vector */
00090   gsl_vector *res; /* Residuals vector */
00091 
00092   int istat; /* Diagnostic status */
00093   int term; /* Loop counter for variables dimension */
00094   int vterm;
00095   int cterm;
00096   int pts; /* Loop counter for vector dimension */
00097 
00098   size_t stride = 1; /* Stride for GSL functions */
00099 
00100   double vchisq; /* Temporary value for chi^2 for VIF */
00101   double *ytmp; /* Temporary vector for y for VIF */
00102   double ystd; /* Standard deviation of fitted function */
00103 
00104   /* Allocate memory and create matrices and vectors */
00105   xx = gsl_matrix_alloc(npts, nterm+1);
00106   yy = gsl_vector_alloc(npts);
00107 
00108   ccoef = gsl_vector_alloc(nterm+1);
00109   cov = gsl_matrix_alloc(nterm+1, nterm+1);
00110 
00111   /* Create X matrix */
00112   for (term=0; term<nterm; term++)
00113     for (pts=0; pts<npts; pts++)
00114       (void) gsl_matrix_set(xx, pts, term+1, x[pts+term*npts]);
00115 
00116   /* Create first column of matrix for regression constant */
00117   for (pts=0; pts<npts; pts++)
00118     (void) gsl_matrix_set(xx, pts, 0, 1.0);  
00119   
00120   /* Create Y vector for all vector dimension */
00121   for (pts=0; pts<npts; pts++)
00122     (void) gsl_vector_set(yy, pts, y[pts]);
00123 
00124   /* Allocate workspace */
00125   work = gsl_multifit_linear_alloc(npts, nterm+1);
00126 
00127   /* Perform linear regression */
00128   istat = gsl_multifit_linear(xx, yy, ccoef, cov, chisq, work);
00129   if (istat != GSL_SUCCESS) {
00130     (void) fprintf(stderr, "%s: Line %d: Error %d in multifitting algorithm!\n", __FILE__, __LINE__, istat);
00131     (void) gsl_multifit_linear_free(work);
00132     (void) gsl_matrix_free(xx);
00133     (void) gsl_matrix_free(cov);
00134     (void) gsl_vector_free(yy);
00135     (void) gsl_vector_free(ccoef);
00136     return istat;
00137   }
00138   /* Free workspace */
00139   (void) gsl_multifit_linear_free(work);
00140 
00141   /* Debug output */
00142 #if DEBUG >= 7
00143 #define C(i) (gsl_vector_get(ccoef,(i)))
00144 #define COV(i,j) (gsl_matrix_get(cov,(i),(j)))
00145 #define X(i,j) (gsl_matrix_get(xx,(i),(j)))
00146      
00147   {
00148     printf ("# best fit: Y = %g + %g X + %g X^2\n", 
00149             C(0), C(1), C(2));
00150 
00151     printf ("# x matrix:\n");
00152     
00153     for (term=0; term<nterm+1; term++)
00154       for (pts=0; pts<npts; pts++)
00155         printf("term=%d pts=%d x=%lf\n", term, pts, X(pts,term));
00156     
00157     printf ("# covariance matrix:\n");
00158     printf ("[ %+.5e, %+.5e, %+.5e  \n",
00159             COV(0,0), COV(0,1), COV(0,2));
00160     printf ("  %+.5e, %+.5e, %+.5e  \n", 
00161             COV(1,0), COV(1,1), COV(1,2));
00162     printf ("  %+.5e, %+.5e, %+.5e ]\n", 
00163             COV(2,0), COV(2,1), COV(2,2));
00164     printf ("# chisq = %g\n", *chisq);
00165   }
00166 #endif
00167 
00168   /* Retrieve regression coefficients */
00169   for (term=0; term<nterm; term++)
00170     coef[term] = gsl_vector_get(ccoef, term+1);
00171   /* Retrieve regression constant */
00172   *cte = gsl_vector_get(ccoef, 0);
00173 
00174   /* Compute R^2 = 1 - \chi^2 / TSS */
00175   *rsq = 1.0 - ( (*chisq) / gsl_stats_tss(y, stride, (size_t) pts) );
00176 
00177   /* Reconstruct vector using regression coefficients, and calculate its standard deviation */
00178 #if DEBUG >= 7
00179   (void) fprintf(stdout, "%s: Vector reconstruction\n", __FILE__);
00180 #endif
00181   for (pts=0; pts<npts; pts++) {
00182     row = gsl_matrix_row(xx, pts);
00183     istat = gsl_multifit_linear_est(&row.vector, ccoef, cov, &(yreg[pts]), &ystd);
00184     if (istat != GSL_SUCCESS) {
00185       (void) fprintf(stderr, "%s: Line %d: Error %d in multifitting linear values estimation!\n", __FILE__, __LINE__, istat);
00186       (void) gsl_matrix_free(xx);
00187       (void) gsl_matrix_free(cov);
00188       (void) gsl_vector_free(yy);
00189       (void) gsl_vector_free(ccoef);
00190       return istat;
00191     }
00192   }
00193   /* Compute also residuals */
00194 #if DEBUG >= 7
00195   (void) fprintf(stdout, "%s: Residuals calculation\n", __FILE__);
00196 #endif
00197   res = gsl_vector_alloc(npts);
00198   istat = gsl_multifit_linear_residuals(xx, yy, ccoef, res);
00199   if (istat != GSL_SUCCESS) {
00200     (void) fprintf(stderr, "%s: Line %d: Error %d in multifitting linear values residuals estimation!\n", __FILE__, __LINE__, istat);
00201     (void) gsl_matrix_free(xx);
00202     (void) gsl_matrix_free(cov);
00203     (void) gsl_vector_free(yy);
00204     (void) gsl_vector_free(res);
00205     (void) gsl_vector_free(ccoef);
00206     return istat;
00207   }
00208   /* Store residuals */
00209   for (pts=0; pts<npts; pts++)
00210     yerr[pts] = gsl_vector_get(res, pts);
00211 
00212   /* Compute autocorrelation of residuals (Durbin-Watson) */
00213   *autocor = gsl_stats_lag1_autocorrelation(yerr, stride, npts);
00214 
00215   /* Dealloc matrices and vectors memory */
00216   (void) gsl_matrix_free(xx);
00217   (void) gsl_matrix_free(cov);
00218   (void) gsl_vector_free(yy);
00219   (void) gsl_vector_free(res);
00220   (void) gsl_vector_free(ccoef);
00221 
00222 
00225   /* VIF */
00226 #if DEBUG >= 7
00227   (void) fprintf(stdout, "%s: VIF calculation\n", __FILE__);
00228 #endif
00229   xx = gsl_matrix_alloc(npts, nterm);
00230   yy = gsl_vector_alloc(npts);
00231 
00232   ccoef = gsl_vector_alloc(nterm);
00233   cov = gsl_matrix_alloc(nterm, nterm);
00234 
00235   ytmp = (double *) malloc(npts * sizeof(double));
00236   if (ytmp == NULL) alloc_error(__FILE__, __LINE__);
00237   
00238   /* Loop over parameters */
00239   for (vterm=0; vterm<nterm; vterm++) {
00240 
00241     (void) gsl_matrix_set_zero(xx);
00242     (void) gsl_matrix_set_zero(cov);
00243 
00244     (void) gsl_vector_set_zero(yy);
00245     (void) gsl_vector_set_zero(ccoef);
00246 
00247     /* Create X matrix */
00248     cterm = 0;
00249     for (term=0; term<nterm; term++) {
00250       if (term != vterm) {
00251         for (pts=0; pts<npts; pts++)
00252           (void) gsl_matrix_set(xx, pts, cterm+1, x[pts+term*npts]);
00253         cterm++;
00254       }
00255     }
00256     
00257     /* Create first column of matrix for regression constant */
00258     for (pts=0; pts<npts; pts++)
00259       (void) gsl_matrix_set(xx, pts, 0, 1.0);  
00260     
00261     /* Create Y vector for all vector dimension, using the vterm X values */
00262     for (pts=0; pts<npts; pts++) {
00263       ytmp[pts] = x[pts+vterm*npts];
00264       (void) gsl_vector_set(yy, pts, ytmp[pts]);
00265     }
00266     
00267     /* Allocate workspace */
00268     work = gsl_multifit_linear_alloc(npts, nterm);
00269 
00270     /* Perform linear regression just to get chi^2 */
00271     istat = gsl_multifit_linear(xx, yy, ccoef, cov, &vchisq, work);
00272     if (istat != GSL_SUCCESS) {
00273       (void) fprintf(stderr, "%s: Line %d: Error %d in multifitting algorithm!\n", __FILE__, __LINE__, istat);
00274       (void) gsl_multifit_linear_free(work);
00275       (void) gsl_matrix_free(xx);
00276       (void) gsl_matrix_free(cov);
00277       (void) gsl_vector_free(yy);
00278       (void) gsl_vector_free(ccoef);
00279       (void) free(ytmp);
00280       return istat;
00281     }
00282     
00283     /* Free workspace */
00284     (void) gsl_multifit_linear_free(work);
00285 
00286     /* Compute R^2 = 1 - \chi^2 / TSS */
00287     /* and finally VIF = 1.0 / (1.0 - R^2) */
00288     vif[vterm] = 1.0 / (1.0 - (1.0 - ( vchisq / gsl_stats_tss(ytmp, stride, (size_t) pts) ) ));
00289   }
00290     
00291   /* Dealloc matrices and vectors memory */
00292   (void) gsl_matrix_free(xx);
00293   (void) gsl_matrix_free(cov);
00294   (void) gsl_vector_free(yy);
00295   (void) gsl_vector_free(ccoef);
00296   
00297   (void) free(ytmp);
00298 
00299   /* Success status */
00300   return 0;
00301 }

Generated on 12 May 2016 for DSCLIM by  doxygen 1.6.1