Include file for regression library. More...
#include <stdio.h>
#include <sys/types.h>
#include <signal.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <string.h>
#include <gsl/gsl_matrix.h>
#include <gsl/gsl_vector.h>
#include <gsl/gsl_multifit.h>
#include <gsl/gsl_statistics_double.h>
Go to the source code of this file.
Defines | |
#define | _GNU_SOURCE |
GNU extensions. | |
Functions | |
int | regress (double *coef, double *x, double *y, double *cte, double *yreg, double *yerr, double *chisq, double *rsq, double *vif, double *autocor, int nterm, int npts) |
Compute regression coefficients with a regression constant given two vectors, having nterm variables (usually parameters) and npts dimension (usually time). | |
void | apply_regression (double *buf, double *reg, double *cst, double *dist, double *sup_dist, int npts, int ntime, int nclust, int nreg) |
Compute a field using regression coefficients and the field values. |
Include file for regression library.
Definition in file regress.h.
void apply_regression | ( | double * | buf, | |
double * | reg, | |||
double * | cst, | |||
double * | dist, | |||
double * | sup_dist, | |||
int | npts, | |||
int | ntime, | |||
int | nclust, | |||
int | nreg | |||
) |
Compute a field using regression coefficients and the field values.
[out] | buf | 2D field |
[in] | reg | Regression coefficients |
[in] | cst | Regression constant |
[in] | dist | Values of input vector |
[in] | sup_dist | If there is one supplemental regression coefficient, use a supplemental vector |
[in] | npts | Points dimension |
[in] | ntime | Time dimension |
[in] | nclust | Cluster dimension |
[in] | nreg | Regression dimension |
Definition at line 59 of file apply_regression.c.
Referenced by wt_downscaling().
00060 { 00073 int nt; /* Time loop counter */ 00074 int pts; /* Points loop counter */ 00075 int clust; /* Cluster loop counter */ 00076 00077 /* Loop over all regression points */ 00078 for (pts=0; pts<npts; pts++) { 00079 /* Loop over all times */ 00080 for (nt=0; nt<ntime; nt++) { 00081 /* Initialize value */ 00082 buf[pts+nt*npts] = 0.0; 00083 /* Loop over all clusters and add each value after regression is applied */ 00084 for (clust=0; clust<nclust; clust++) { 00085 buf[pts+nt*npts] += (dist[nt+clust*ntime] * reg[pts+clust*npts]); 00086 00087 // if (pts == 0 && nt == (ntime-5)) 00088 // printf("%d %lf %lf %lf\n",clust,buf[pts+nt*npts],(dist[nt+clust*ntime]),(reg[pts+clust*npts])); 00089 } 00090 /* Extra regression coefficient with a second supplemental vector */ 00091 if (nclust == (nreg-1)) { 00092 buf[pts+nt*npts] += (sup_dist[nt] * reg[pts+(nreg-1)*npts]); 00093 } 00094 00095 /* Add regression constant */ 00096 buf[pts+nt*npts] += cst[pts]; 00097 00098 // if (pts == 0 && nt == (ntime-5)) 00099 // printf("%lf %lf\n",buf[pts+nt*npts],cst[pts]); 00100 } 00101 } 00102 // nt = ntime-1; 00103 // pts = 0; 00104 // for (clust=0; clust<nclust; clust++) 00105 // printf("REGRESS %d %lf %lf %lf %lf\n",clust,buf[pts+nt*npts],dist[nt+clust*ntime],reg[pts+clust*npts],cst[pts]); 00106 }
int regress | ( | double * | coef, | |
double * | x, | |||
double * | y, | |||
double * | cte, | |||
double * | yreg, | |||
double * | yerr, | |||
double * | chisq, | |||
double * | rsq, | |||
double * | vif, | |||
double * | autocor, | |||
int | nterm, | |||
int | npts | |||
) |
Compute regression coefficients with a regression constant given two vectors, having nterm variables (usually parameters) and npts dimension (usually time).
[out] | coef | Regression coefficients |
[in] | x | X vectors (nterm X npts) npts is usually time, nterm the number of parameters |
[in] | y | Y vectors (npts) npts is usually time |
[out] | cte | Regression constant |
[out] | yreg | Y vector reconstructed with regression |
[out] | yerr | Y error vector when reconstructing Y vector with regression |
[out] | chisq | Chi-square diagnostic |
[out] | rsq | Coefficient of determination diagnostic R^2 = 1 - ^2 / TSS |
[out] | vif | Variance Inflation Factor for each parameter |
[out] | autocor | Autocorrelation of residuals (Durbin-Watson test) |
[in] | nterm | Variables dimension |
[in] | npts | Vector dimension |
Regression diagnostics
Definition at line 62 of file regress.c.
References alloc_error().
Referenced by main(), and wt_learning().
00063 { 00081 gsl_vector_view row; /* To retrieve vector rows */ 00082 00083 gsl_matrix *xx; /* X matrix */ 00084 gsl_matrix *cov; /* Covariance matrix */ 00085 00086 gsl_multifit_linear_workspace *work; /* Workspace */ 00087 00088 gsl_vector *yy; /* Y vector */ 00089 gsl_vector *ccoef; /* Coefficients vector */ 00090 gsl_vector *res; /* Residuals vector */ 00091 00092 int istat; /* Diagnostic status */ 00093 int term; /* Loop counter for variables dimension */ 00094 int vterm; 00095 int cterm; 00096 int pts; /* Loop counter for vector dimension */ 00097 00098 size_t stride = 1; /* Stride for GSL functions */ 00099 00100 double vchisq; /* Temporary value for chi^2 for VIF */ 00101 double *ytmp; /* Temporary vector for y for VIF */ 00102 double ystd; /* Standard deviation of fitted function */ 00103 00104 /* Allocate memory and create matrices and vectors */ 00105 xx = gsl_matrix_alloc(npts, nterm+1); 00106 yy = gsl_vector_alloc(npts); 00107 00108 ccoef = gsl_vector_alloc(nterm+1); 00109 cov = gsl_matrix_alloc(nterm+1, nterm+1); 00110 00111 /* Create X matrix */ 00112 for (term=0; term<nterm; term++) 00113 for (pts=0; pts<npts; pts++) 00114 (void) gsl_matrix_set(xx, pts, term+1, x[pts+term*npts]); 00115 00116 /* Create first column of matrix for regression constant */ 00117 for (pts=0; pts<npts; pts++) 00118 (void) gsl_matrix_set(xx, pts, 0, 1.0); 00119 00120 /* Create Y vector for all vector dimension */ 00121 for (pts=0; pts<npts; pts++) 00122 (void) gsl_vector_set(yy, pts, y[pts]); 00123 00124 /* Allocate workspace */ 00125 work = gsl_multifit_linear_alloc(npts, nterm+1); 00126 00127 /* Perform linear regression */ 00128 istat = gsl_multifit_linear(xx, yy, ccoef, cov, chisq, work); 00129 if (istat != GSL_SUCCESS) { 00130 (void) fprintf(stderr, "%s: Line %d: Error %d in multifitting algorithm!\n", __FILE__, __LINE__, istat); 00131 (void) gsl_multifit_linear_free(work); 00132 (void) gsl_matrix_free(xx); 00133 (void) gsl_matrix_free(cov); 00134 (void) gsl_vector_free(yy); 00135 (void) gsl_vector_free(ccoef); 00136 return istat; 00137 } 00138 /* Free workspace */ 00139 (void) gsl_multifit_linear_free(work); 00140 00141 /* Debug output */ 00142 #if DEBUG >= 7 00143 #define C(i) (gsl_vector_get(ccoef,(i))) 00144 #define COV(i,j) (gsl_matrix_get(cov,(i),(j))) 00145 #define X(i,j) (gsl_matrix_get(xx,(i),(j))) 00146 00147 { 00148 printf ("# best fit: Y = %g + %g X + %g X^2\n", 00149 C(0), C(1), C(2)); 00150 00151 printf ("# x matrix:\n"); 00152 00153 for (term=0; term<nterm+1; term++) 00154 for (pts=0; pts<npts; pts++) 00155 printf("term=%d pts=%d x=%lf\n", term, pts, X(pts,term)); 00156 00157 printf ("# covariance matrix:\n"); 00158 printf ("[ %+.5e, %+.5e, %+.5e \n", 00159 COV(0,0), COV(0,1), COV(0,2)); 00160 printf (" %+.5e, %+.5e, %+.5e \n", 00161 COV(1,0), COV(1,1), COV(1,2)); 00162 printf (" %+.5e, %+.5e, %+.5e ]\n", 00163 COV(2,0), COV(2,1), COV(2,2)); 00164 printf ("# chisq = %g\n", *chisq); 00165 } 00166 #endif 00167 00168 /* Retrieve regression coefficients */ 00169 for (term=0; term<nterm; term++) 00170 coef[term] = gsl_vector_get(ccoef, term+1); 00171 /* Retrieve regression constant */ 00172 *cte = gsl_vector_get(ccoef, 0); 00173 00174 /* Compute R^2 = 1 - \chi^2 / TSS */ 00175 *rsq = 1.0 - ( (*chisq) / gsl_stats_tss(y, stride, (size_t) pts) ); 00176 00177 /* Reconstruct vector using regression coefficients, and calculate its standard deviation */ 00178 #if DEBUG >= 7 00179 (void) fprintf(stdout, "%s: Vector reconstruction\n", __FILE__); 00180 #endif 00181 for (pts=0; pts<npts; pts++) { 00182 row = gsl_matrix_row(xx, pts); 00183 istat = gsl_multifit_linear_est(&row.vector, ccoef, cov, &(yreg[pts]), &ystd); 00184 if (istat != GSL_SUCCESS) { 00185 (void) fprintf(stderr, "%s: Line %d: Error %d in multifitting linear values estimation!\n", __FILE__, __LINE__, istat); 00186 (void) gsl_matrix_free(xx); 00187 (void) gsl_matrix_free(cov); 00188 (void) gsl_vector_free(yy); 00189 (void) gsl_vector_free(ccoef); 00190 return istat; 00191 } 00192 } 00193 /* Compute also residuals */ 00194 #if DEBUG >= 7 00195 (void) fprintf(stdout, "%s: Residuals calculation\n", __FILE__); 00196 #endif 00197 res = gsl_vector_alloc(npts); 00198 istat = gsl_multifit_linear_residuals(xx, yy, ccoef, res); 00199 if (istat != GSL_SUCCESS) { 00200 (void) fprintf(stderr, "%s: Line %d: Error %d in multifitting linear values residuals estimation!\n", __FILE__, __LINE__, istat); 00201 (void) gsl_matrix_free(xx); 00202 (void) gsl_matrix_free(cov); 00203 (void) gsl_vector_free(yy); 00204 (void) gsl_vector_free(res); 00205 (void) gsl_vector_free(ccoef); 00206 return istat; 00207 } 00208 /* Store residuals */ 00209 for (pts=0; pts<npts; pts++) 00210 yerr[pts] = gsl_vector_get(res, pts); 00211 00212 /* Compute autocorrelation of residuals (Durbin-Watson) */ 00213 *autocor = gsl_stats_lag1_autocorrelation(yerr, stride, npts); 00214 00215 /* Dealloc matrices and vectors memory */ 00216 (void) gsl_matrix_free(xx); 00217 (void) gsl_matrix_free(cov); 00218 (void) gsl_vector_free(yy); 00219 (void) gsl_vector_free(res); 00220 (void) gsl_vector_free(ccoef); 00221 00222 00225 /* VIF */ 00226 #if DEBUG >= 7 00227 (void) fprintf(stdout, "%s: VIF calculation\n", __FILE__); 00228 #endif 00229 xx = gsl_matrix_alloc(npts, nterm); 00230 yy = gsl_vector_alloc(npts); 00231 00232 ccoef = gsl_vector_alloc(nterm); 00233 cov = gsl_matrix_alloc(nterm, nterm); 00234 00235 ytmp = (double *) malloc(npts * sizeof(double)); 00236 if (ytmp == NULL) alloc_error(__FILE__, __LINE__); 00237 00238 /* Loop over parameters */ 00239 for (vterm=0; vterm<nterm; vterm++) { 00240 00241 (void) gsl_matrix_set_zero(xx); 00242 (void) gsl_matrix_set_zero(cov); 00243 00244 (void) gsl_vector_set_zero(yy); 00245 (void) gsl_vector_set_zero(ccoef); 00246 00247 /* Create X matrix */ 00248 cterm = 0; 00249 for (term=0; term<nterm; term++) { 00250 if (term != vterm) { 00251 for (pts=0; pts<npts; pts++) 00252 (void) gsl_matrix_set(xx, pts, cterm+1, x[pts+term*npts]); 00253 cterm++; 00254 } 00255 } 00256 00257 /* Create first column of matrix for regression constant */ 00258 for (pts=0; pts<npts; pts++) 00259 (void) gsl_matrix_set(xx, pts, 0, 1.0); 00260 00261 /* Create Y vector for all vector dimension, using the vterm X values */ 00262 for (pts=0; pts<npts; pts++) { 00263 ytmp[pts] = x[pts+vterm*npts]; 00264 (void) gsl_vector_set(yy, pts, ytmp[pts]); 00265 } 00266 00267 /* Allocate workspace */ 00268 work = gsl_multifit_linear_alloc(npts, nterm); 00269 00270 /* Perform linear regression just to get chi^2 */ 00271 istat = gsl_multifit_linear(xx, yy, ccoef, cov, &vchisq, work); 00272 if (istat != GSL_SUCCESS) { 00273 (void) fprintf(stderr, "%s: Line %d: Error %d in multifitting algorithm!\n", __FILE__, __LINE__, istat); 00274 (void) gsl_multifit_linear_free(work); 00275 (void) gsl_matrix_free(xx); 00276 (void) gsl_matrix_free(cov); 00277 (void) gsl_vector_free(yy); 00278 (void) gsl_vector_free(ccoef); 00279 (void) free(ytmp); 00280 return istat; 00281 } 00282 00283 /* Free workspace */ 00284 (void) gsl_multifit_linear_free(work); 00285 00286 /* Compute R^2 = 1 - \chi^2 / TSS */ 00287 /* and finally VIF = 1.0 / (1.0 - R^2) */ 00288 vif[vterm] = 1.0 / (1.0 - (1.0 - ( vchisq / gsl_stats_tss(ytmp, stride, (size_t) pts) ) )); 00289 } 00290 00291 /* Dealloc matrices and vectors memory */ 00292 (void) gsl_matrix_free(xx); 00293 (void) gsl_matrix_free(cov); 00294 (void) gsl_vector_free(yy); 00295 (void) gsl_vector_free(ccoef); 00296 00297 (void) free(ytmp); 00298 00299 /* Success status */ 00300 return 0; 00301 }