Compute or read learning data needed for downscaling climate scenarios using weather typing. More...
#include <dsclim.h>
Go to the source code of this file.
Functions | |
int | wt_learning (data_struct *data) |
Compute or read learning data needed for downscaling climate scenarios using weather typing. |
Compute or read learning data needed for downscaling climate scenarios using weather typing.
Definition in file wt_learning.c.
int wt_learning | ( | data_struct * | data | ) |
Compute or read learning data needed for downscaling climate scenarios using weather typing.
[in] | data | MASTER data structure. |
Return status.
Return status solid precipitation.
Read learning data
Compute learning data
Assume EOFs are already pre-computed
Normalize secondary large-scale fields for re-analysis learning data
Construct time vectors
Merge observation and reanalysis principal components for clustering algorithm and normalize using first Singular Value
Definition at line 71 of file wt_learning.c.
References alloc_error(), best_clusters(), learning_data_struct::class_clusters, class_days_pc_clusters(), conf_struct::classif_type, data_struct::conf, learning_struct::data, time_vect_struct::day, reg_struct::dist, dist_clusters_normctrl(), distance_point(), learning_eof_struct::eof, extract_subdomain(), extract_subperiod_months(), FALSE, mask_struct::field, learning_struct::filename_rea_sup, time_vect_struct::hour, mask_struct::lat, reg_struct::lat, learning_struct::lat, data_struct::learning, conf_struct::learning_mask_latitude_max, conf_struct::learning_mask_latitude_min, conf_struct::learning_mask_longitude_max, conf_struct::learning_mask_longitude_min, conf_struct::learning_maskfile, learning_struct::learning_provided, learning_struct::learning_save, mask_struct::lon, reg_struct::lon, learning_struct::lon, mask_points(), mask_region(), mean_field_spatial(), time_vect_struct::minutes, season_struct::month, time_vect_struct::month, conf_struct::nclassifications, season_struct::nclusters, mask_struct::nlat, learning_struct::nlat, mask_struct::nlon, learning_struct::nlon, season_struct::nmonths, learning_struct::nomvar_rea_sup, normalize_field(), normalize_field_2d(), conf_struct::npartitions, reg_struct::npts, season_struct::nreg, conf_struct::nseasons, learning_data_struct::ntime, learning_eof_struct::ntime, learning_struct::obs, learning_struct::obs_neof, learning_struct::pc_normalized_var, learning_data_struct::precip_index, learning_data_struct::precip_index_obs, learning_data_struct::precip_reg, learning_data_struct::precip_reg_autocor, learning_data_struct::precip_reg_cst, learning_data_struct::precip_reg_dist, learning_data_struct::precip_reg_err, learning_data_struct::precip_reg_rsq, learning_data_struct::precip_reg_vif, learning_struct::rea, learning_struct::rea_coords, learning_struct::rea_dimxname, learning_struct::rea_dimyname, learning_struct::rea_gridname, learning_struct::rea_latname, learning_struct::rea_lonname, learning_struct::rea_neof, learning_struct::rea_timename, read_field_subdomain_period(), read_learning_fields(), read_learning_obs_eof(), read_learning_rea_eof(), read_obs_period(), data_struct::reg, regress(), conf_struct::season, conf_struct::secondary_latitude_max, conf_struct::secondary_latitude_min, conf_struct::secondary_longitude_max, conf_struct::secondary_longitude_min, data_struct::secondary_mask, time_vect_struct::seconds, learning_eof_struct::sing, sub_period_common(), learning_data_struct::sup_index, learning_data_struct::sup_index_mean, learning_data_struct::sup_index_var, learning_struct::sup_lat, learning_struct::sup_lon, learning_struct::sup_nlat, learning_struct::sup_nlon, learning_data_struct::sup_val, learning_data_struct::time, time_mean_variance_field_2d(), learning_data_struct::time_s, learning_struct::time_s, learning_eof_struct::time_s, conf_struct::time_units, TRUE, mask_struct::use_mask, learning_data_struct::weight, write_learning_fields(), and time_vect_struct::year.
Referenced by main().
00071 { 00078 double *buf_learn = NULL; 00079 double *buf_weight = NULL; 00080 double *buf_learn_obs = NULL; 00081 double *buf_learn_rea = NULL; 00082 double *buf_learn_obs_sub = NULL; 00083 double *buf_learn_rea_sub = NULL; 00084 double *buf_learn_pc = NULL; 00085 double *buf_learn_pc_sub = NULL; 00086 00087 double *precip_liquid_obs = NULL; 00088 double *precip_solid_obs = NULL; 00089 double *precip_obs = NULL; 00090 double *mean_precip = NULL; 00091 double *mean_precip_sub = NULL; 00092 00093 double *precip_reg = NULL; 00094 double *precip_err = NULL; 00095 double *precip_index = NULL; 00096 double *dist_reg = NULL; 00097 double *vif = NULL; 00098 double chisq; 00099 double rsq; 00100 double autocor; 00101 00102 double obs_first_sing; 00103 double rea_sing; 00104 double obs_sing; 00105 double *rea_var = NULL; 00106 double rea_first_sing; 00107 00108 double *tas_rea = NULL; 00109 double *tas_rea_sub = NULL; 00110 double *tas_rea_mean = NULL; 00111 double *tas_rea_mean_sub = NULL; 00112 00113 double missing_value; 00114 double missing_value_precip; 00115 00116 double *mean_dist = NULL; 00117 double *var_dist = NULL; 00118 double *dist = NULL; 00119 double dist_pt; 00120 00121 double *mask_subd = NULL; 00122 short int *mask_sub = NULL; 00123 int nlon_mask; 00124 int nlat_mask; 00125 double *lon_mask = NULL; 00126 double *lat_mask = NULL; 00127 00128 int ntime_learn_all; 00129 int *ntime_sub = NULL; 00130 00131 double *sup_mean = NULL; 00132 double *sup_var = NULL; 00133 00134 double meanvif = 0.0; 00135 00136 int eof; 00137 int clust; 00138 int nt; 00139 int ntt; 00140 int t; 00141 int s; 00142 int i; 00143 int j; 00144 int pt; 00145 int term; 00146 int *npt = NULL; 00147 short int allpt; 00148 00149 /* udunits variables */ 00150 ut_system *unitSystem = NULL; /* Unit System (udunits) */ 00151 ut_unit *dataunits = NULL; /* Data units (udunits) */ 00152 00153 int niter = 2; 00154 00155 int istat; 00156 int istat_solid; 00158 if (data->learning->learning_provided == TRUE) { 00160 istat = read_learning_fields(data); 00161 if (istat != 0) return istat; 00162 } 00163 else { 00167 /* Read re-analysis pre-computed EOF and Singular Values */ 00168 istat = read_learning_rea_eof(data); 00169 if (istat != 0) return istat; 00170 00171 /* Read observations pre-computed EOF and Singular Values */ 00172 istat = read_learning_obs_eof(data); 00173 if (istat != 0) return istat; 00174 00175 /* Select common time period between the re-analysis and the observation data periods */ 00176 if (data->learning->obs_neof != 0) { 00177 istat = sub_period_common(&buf_learn_obs, &ntime_learn_all, data->learning->obs->eof, 00178 data->learning->obs->time_s->year, data->learning->obs->time_s->month, data->learning->obs->time_s->day, 00179 data->learning->rea->time_s->year, data->learning->rea->time_s->month, data->learning->rea->time_s->day, 00180 1, data->learning->obs_neof, 1, data->learning->obs->ntime, data->learning->rea->ntime); 00181 if (istat != 0) return istat; 00182 } 00183 istat = sub_period_common(&buf_learn_rea, &ntime_learn_all, data->learning->rea->eof, 00184 data->learning->rea->time_s->year, data->learning->rea->time_s->month, data->learning->rea->time_s->day, 00185 data->learning->obs->time_s->year, data->learning->obs->time_s->month, data->learning->obs->time_s->day, 00186 1, data->learning->rea_neof, 1, data->learning->rea->ntime, data->learning->obs->ntime); 00187 if (istat != 0) return istat; 00188 00189 rea_var = (double *) malloc(data->learning->rea_neof * sizeof(double)); 00190 if (rea_var == NULL) alloc_error(__FILE__, __LINE__); 00191 00192 /* Compute normalisation factor of EOF of large-scale field for the whole period */ 00193 00194 data->learning->pc_normalized_var = (double *) malloc(data->learning->rea_neof * sizeof(double)); 00195 if (data->learning->pc_normalized_var == NULL) alloc_error(__FILE__, __LINE__); 00196 buf_learn_pc = (double *) malloc(data->learning->rea_neof * ntime_learn_all * sizeof(double)); 00197 if (buf_learn_pc == NULL) alloc_error(__FILE__, __LINE__); 00198 00199 for (eof=0; eof<data->learning->rea_neof; eof++) { 00200 00201 for (nt=0; nt<ntime_learn_all; nt++) 00202 buf_learn_pc[nt+eof*ntime_learn_all] = buf_learn_rea[nt+eof*ntime_learn_all] * data->learning->rea->sing[eof]; 00203 00204 rea_var[eof] = gsl_stats_variance(&(buf_learn_pc[eof*ntime_learn_all]), 1, ntime_learn_all); 00205 if (rea_var[eof] == 0.0) { 00206 (void) fprintf(stderr, "%s: ERROR: Variance of the projection of the large-scale field onto EOF is 0.0. You probably have too many EOFs for your field. EOF number=%d. Variance=%f. Must abort...\n", 00207 __FILE__, eof, rea_var[eof]); 00208 return -1; 00209 } 00210 00211 /* Renormalize EOF of large-scale field for the whole period using the first EOF norm and the Singular Value */ 00212 for (nt=0; nt<ntime_learn_all; nt++) 00213 buf_learn_pc[nt+eof*ntime_learn_all] = buf_learn_pc[nt+eof*ntime_learn_all] / sqrt(rea_var[0]); 00214 00215 /* Recompute normalization factor using normalized field */ 00216 data->learning->pc_normalized_var[eof] = gsl_stats_variance(&(buf_learn_pc[eof*ntime_learn_all]), 1, ntime_learn_all); 00217 if (data->learning->pc_normalized_var[eof] == 0.0) { 00218 (void) fprintf(stderr, "%s: ERROR: Normalized variance of the projection of the large-scale field onto EOF is 0.0. You probably have too many EOFs for your field. EOF number=%d. Variance=%f. Must abort...\n", 00219 __FILE__, eof, data->learning->pc_normalized_var[eof]); 00220 return -1; 00221 } 00222 } 00223 00224 ntime_sub = (int *) malloc(data->conf->nseasons * sizeof(int)); 00225 if (ntime_sub == NULL) alloc_error(__FILE__, __LINE__); 00226 00227 /* Read observed precipitation (liquid and solid) */ 00228 istat_solid = read_obs_period(&precip_solid_obs, &(data->learning->lon), &(data->learning->lat), &missing_value_precip, 00229 data, "prsn", data->learning->obs->time_s->year, data->learning->obs->time_s->month, 00230 data->learning->obs->time_s->day, &(data->learning->nlon), &(data->learning->nlat), 00231 data->learning->obs->ntime); 00232 if (istat_solid == -1) return -1; 00233 if (istat_solid >= 0) { 00234 (void) free(data->learning->lon); 00235 (void) free(data->learning->lat); 00236 } 00237 istat = read_obs_period(&precip_liquid_obs, &(data->learning->lon), &(data->learning->lat), &missing_value_precip, data, "prr", 00238 data->learning->obs->time_s->year, data->learning->obs->time_s->month, data->learning->obs->time_s->day, 00239 &(data->learning->nlon), &(data->learning->nlat), data->learning->obs->ntime); 00240 if (istat == -1) return -1; 00241 00242 /* Calculate total precipitation */ 00243 precip_obs = (double *) malloc(data->learning->nlon*data->learning->nlat*data->learning->obs->ntime * sizeof(double)); 00244 if (precip_obs == NULL) alloc_error(__FILE__, __LINE__); 00245 00246 if (istat_solid == -2) { 00247 fprintf(stderr, "%s: WARNING: Snow observation variable not found in dsclim XML config file. Will assume that you don't have snow observations, and set it to zero.\n", __FILE__); 00248 for (t=0; t<data->learning->obs->ntime; t++) 00249 for (j=0; j<data->learning->nlat; j++) 00250 for (i=0; i<data->learning->nlon; i++) 00251 if (precip_liquid_obs[i+j*data->learning->nlon+t*data->learning->nlon*data->learning->nlat] != missing_value_precip) 00252 precip_obs[i+j*data->learning->nlon+t*data->learning->nlon*data->learning->nlat] = 00253 precip_liquid_obs[i+j*data->learning->nlon+t*data->learning->nlon*data->learning->nlat] * 86400; 00254 else 00255 precip_obs[i+j*data->learning->nlon+t*data->learning->nlon*data->learning->nlat] = missing_value_precip; 00256 (void) free(precip_liquid_obs); 00257 } 00258 else { 00259 (void) printf("%s: Calculating total precipitation from solid and liquid.\n", __FILE__); 00260 for (t=0; t<data->learning->obs->ntime; t++) 00261 for (j=0; j<data->learning->nlat; j++) 00262 for (i=0; i<data->learning->nlon; i++) 00263 if (precip_liquid_obs[i+j*data->learning->nlon+t*data->learning->nlon*data->learning->nlat] != missing_value_precip) 00264 precip_obs[i+j*data->learning->nlon+t*data->learning->nlon*data->learning->nlat] = 00265 (precip_liquid_obs[i+j*data->learning->nlon+t*data->learning->nlon*data->learning->nlat] + 00266 precip_solid_obs[i+j*data->learning->nlon+t*data->learning->nlon*data->learning->nlat]) * 86400.0; 00267 else 00268 precip_obs[i+j*data->learning->nlon+t*data->learning->nlon*data->learning->nlat] = missing_value_precip; 00269 (void) free(precip_liquid_obs); 00270 (void) free(precip_solid_obs); 00271 } 00272 00273 /* Apply mask for learning data */ 00274 if (data->conf->learning_maskfile->use_mask == TRUE) { 00275 /* Allocate memory */ 00276 mask_sub = (short int *) malloc(data->learning->nlat*data->learning->nlon * sizeof(short int)); 00277 if (mask_sub == NULL) alloc_error(__FILE__, __LINE__); 00278 for (i=0; i<data->learning->nlat*data->learning->nlon; i++) 00279 mask_sub[i] = (short int) data->conf->learning_maskfile->field[i]; 00280 /* Apply mask */ 00281 (void) printf("%s: Masking points using mask file for regression analysis.\n", __FILE__); 00282 (void) mask_points(precip_obs, missing_value_precip, mask_sub, 00283 data->learning->nlon, data->learning->nlat, data->learning->obs->ntime); 00284 /* Free memory of mask_sub */ 00285 (void) free(mask_sub); 00286 mask_sub = NULL; 00287 } 00288 00289 /* Mask region if needed using domain bounding box */ 00290 if (data->conf->learning_mask_longitude_min != -999.0 && 00291 data->conf->learning_mask_longitude_max != -999.0 && 00292 data->conf->learning_mask_latitude_min != -999.0 && 00293 data->conf->learning_mask_latitude_max != -999.0) { 00294 (void) printf("%s: Masking region for regression analysis.\n", __FILE__); 00295 (void) mask_region(precip_obs, missing_value_precip, data->learning->lon, data->learning->lat, 00296 data->conf->learning_mask_longitude_min, data->conf->learning_mask_longitude_max, 00297 data->conf->learning_mask_latitude_min, data->conf->learning_mask_latitude_max, 00298 data->learning->nlon, data->learning->nlat, data->learning->obs->ntime); 00299 } 00300 00301 /* Perform spatial mean of observed precipitation around regression points, normalize precip */ 00302 (void) printf("%s: Perform spatial mean of observed precipitation around regression points.\n", __FILE__); 00303 mean_precip = (double *) malloc(data->reg->npts * data->learning->obs->ntime * sizeof(double)); 00304 if (mean_precip == NULL) alloc_error(__FILE__, __LINE__); 00305 npt = (int *) malloc(data->learning->obs->ntime * sizeof(int)); 00306 if (npt == NULL) alloc_error(__FILE__, __LINE__); 00307 for (pt=0; pt<data->reg->npts; pt++) { 00308 for (t=0; t<data->learning->obs->ntime; t++) { 00309 mean_precip[t+pt*data->learning->obs->ntime] = 0.0; 00310 npt[t] = 0; 00311 } 00312 for (j=0; j<data->learning->nlat; j++) 00313 for (i=0; i<data->learning->nlon; i++) { 00314 dist_pt = distance_point(data->reg->lon[pt], data->reg->lat[pt], 00315 data->learning->lon[i+j*data->learning->nlon], data->learning->lat[i+j*data->learning->nlon]); 00316 if (dist_pt <= data->reg->dist) 00317 for (t=0; t<data->learning->obs->ntime; t++) 00318 if (precip_obs[i+j*data->learning->nlon+t*data->learning->nlon*data->learning->nlat] != missing_value_precip) { 00319 mean_precip[t+pt*data->learning->obs->ntime] += 00320 precip_obs[i+j*data->learning->nlon+t*data->learning->nlon*data->learning->nlat]; 00321 npt[t]++; 00322 } 00323 } 00324 allpt = FALSE; 00325 for (t=0; t<data->learning->obs->ntime; t++) 00326 if (npt[t] == 0) allpt = TRUE; 00327 if (allpt == TRUE) { 00328 (void) fprintf(stderr, "%s: WARNING: There are no point of observation in the vicinity of the regression point #%d at a minimum distance of at least %f meters! Verify your regression points, or the configuration of your coordinate variable names in your configuration file, or that you don't have all missing values in your observations in the vicinity of the regression point. Time=%d. lon=%lf lat=%lf. WARNING: Will desactivate this regression point.\n", 00329 __FILE__, pt, data->reg->dist, t, data->reg->lon[pt], data->reg->lat[pt]); 00330 for (t=0; t<data->learning->obs->ntime; t++) 00331 mean_precip[t+pt*data->learning->obs->ntime] = missing_value_precip; 00332 } 00333 else 00334 for (t=0; t<data->learning->obs->ntime; t++) 00335 mean_precip[t+pt*data->learning->obs->ntime] = sqrt(mean_precip[t+pt*data->learning->obs->ntime] / (double) npt[t]); 00336 } 00337 (void) free(npt); 00338 (void) free(precip_obs); 00339 00340 /* Select common time period between the re-analysis and the observation data periods for */ 00341 /* secondary large-scale field and extract subdomain */ 00342 istat = read_field_subdomain_period(&tas_rea, &(data->learning->sup_lon), &(data->learning->sup_lat), 00343 &missing_value, data->learning->nomvar_rea_sup, 00344 data->learning->obs->time_s->year, data->learning->obs->time_s->month, 00345 data->learning->obs->time_s->day, 00346 data->conf->secondary_longitude_min, data->conf->secondary_longitude_max, 00347 data->conf->secondary_latitude_min, data->conf->secondary_latitude_max, 00348 data->learning->rea_coords, data->learning->rea_gridname, 00349 data->learning->rea_lonname, data->learning->rea_latname, 00350 data->learning->rea_dimxname, data->learning->rea_dimyname, 00351 data->learning->rea_timename, data->learning->filename_rea_sup, 00352 &(data->learning->sup_nlon), &(data->learning->sup_nlat), data->learning->obs->ntime); 00353 00354 /* Perform spatial mean of secondary large-scale fields */ 00355 tas_rea_mean = (double *) malloc(data->learning->obs->ntime * sizeof(double)); 00356 if (tas_rea_mean == NULL) alloc_error(__FILE__, __LINE__); 00357 /* Prepare mask */ 00358 if (data->secondary_mask->use_mask == TRUE) { 00359 (void) extract_subdomain(&mask_subd, &lon_mask, &lat_mask, &nlon_mask, &nlat_mask, data->secondary_mask->field, 00360 data->secondary_mask->lon, data->secondary_mask->lat, 00361 data->conf->secondary_longitude_min, data->conf->secondary_longitude_max, 00362 data->conf->secondary_latitude_min, data->conf->secondary_latitude_max, 00363 data->secondary_mask->nlon, data->secondary_mask->nlat, 1); 00364 if (data->learning->sup_nlon != nlon_mask || data->learning->sup_nlat != nlat_mask) { 00365 (void) fprintf(stderr, "%s: IMPORTANT WARNING: The mask for secondary large-scale fields after selecting subdomain has invalid dimensions: nlon=%d nlat=%d. Expected: nlon=%d nlat=%d\nReverting to no-mask processing.", __FILE__, nlon_mask, nlat_mask, 00366 data->learning->sup_nlon, data->learning->sup_nlat); 00367 mask_sub = (short int *) NULL; 00368 } 00369 else { 00370 mask_sub = (short int *) malloc(data->learning->sup_nlat*data->learning->sup_nlon * sizeof(short int)); 00371 if (mask_sub == NULL) alloc_error(__FILE__, __LINE__); 00372 for (i=0; i<data->learning->sup_nlat*data->learning->sup_nlon; i++) 00373 mask_sub[i] = (short int) mask_subd[i]; 00374 } 00375 (void) free(mask_subd); 00376 (void) free(lon_mask); 00377 (void) free(lat_mask); 00378 } 00379 else 00380 mask_sub = (short int *) NULL; 00381 00382 if (mask_sub != NULL) 00383 printf("%s: Using a mask for secondary large-scale fields.\n", __FILE__); 00384 00385 (void) mean_field_spatial(tas_rea_mean, tas_rea, mask_sub, data->learning->sup_nlon, data->learning->sup_nlat, 00386 data->learning->obs->ntime); 00387 if (mask_sub != NULL) 00388 (void) free(mask_sub); 00389 00390 /* Loop over each season */ 00391 (void) printf("Extract data for each season separately and process each season.\n"); 00392 00393 for (s=0; s<data->conf->nseasons; s++) { 00394 /* Process separately each season */ 00395 00396 /* Select season months in the whole time period and create sub-period fields */ 00397 if (data->learning->obs_neof != 0) { 00398 (void) extract_subperiod_months(&buf_learn_obs_sub, &(ntime_sub[s]), buf_learn_obs, 00399 data->learning->time_s->year, data->learning->time_s->month, data->learning->time_s->day, 00400 data->conf->season[s].month, 00401 1, 1, data->learning->obs_neof, ntime_learn_all, 00402 data->conf->season[s].nmonths); 00403 } 00404 (void) extract_subperiod_months(&buf_learn_rea_sub, &(ntime_sub[s]), buf_learn_rea, 00405 data->learning->time_s->year, data->learning->time_s->month, data->learning->time_s->day, 00406 data->conf->season[s].month, 00407 1, 1, data->learning->rea_neof, ntime_learn_all, 00408 data->conf->season[s].nmonths); 00409 (void) extract_subperiod_months(&buf_learn_pc_sub, &(ntime_sub[s]), buf_learn_pc, 00410 data->learning->time_s->year, data->learning->time_s->month, data->learning->time_s->day, 00411 data->conf->season[s].month, 00412 1, 1, data->learning->rea_neof, ntime_learn_all, 00413 data->conf->season[s].nmonths); 00414 (void) extract_subperiod_months(&tas_rea_mean_sub, &(ntime_sub[s]), tas_rea_mean, 00415 data->learning->time_s->year, data->learning->time_s->month, data->learning->time_s->day, 00416 data->conf->season[s].month, 00417 1, 1, 1, ntime_learn_all, 00418 data->conf->season[s].nmonths); 00419 (void) extract_subperiod_months(&tas_rea_sub, &(ntime_sub[s]), tas_rea, 00420 data->learning->time_s->year, data->learning->time_s->month, data->learning->time_s->day, 00421 data->conf->season[s].month, 00422 1, data->learning->sup_nlon, data->learning->sup_nlat, ntime_learn_all, 00423 data->conf->season[s].nmonths); 00424 (void) extract_subperiod_months(&mean_precip_sub, &(ntime_sub[s]), mean_precip, 00425 data->learning->time_s->year, data->learning->time_s->month, data->learning->time_s->day, 00426 data->conf->season[s].month, 00427 1, 1, data->reg->npts, ntime_learn_all, 00428 data->conf->season[s].nmonths); 00429 00431 data->learning->data[s].sup_index = (double *) malloc(ntime_sub[s] * sizeof(double)); 00432 if (data->learning->data[s].sup_index == NULL) alloc_error(__FILE__, __LINE__); 00433 data->learning->data[s].sup_val = (double *) malloc(data->learning->sup_nlon*data->learning->sup_nlat*ntime_sub[s] * sizeof(double)); 00434 if (data->learning->data[s].sup_val == NULL) alloc_error(__FILE__, __LINE__); 00435 00436 /* Compute mean and variance over time */ 00437 data->learning->data[s].sup_index_mean = gsl_stats_mean(tas_rea_mean_sub, 1, ntime_sub[s]); 00438 data->learning->data[s].sup_index_var = gsl_stats_variance(tas_rea_mean_sub, 1, ntime_sub[s]); 00439 00440 /* Normalize using mean and variance */ 00441 (void) normalize_field(data->learning->data[s].sup_index, tas_rea_mean_sub, data->learning->data[s].sup_index_mean, 00442 data->learning->data[s].sup_index_var, 1, 1, ntime_sub[s]); 00443 00444 /* Compute mean and variance over time for each point */ 00445 sup_mean = (double *) malloc(data->learning->sup_nlon*data->learning->sup_nlat*ntime_sub[s] * sizeof(double)); 00446 if (sup_mean == NULL) alloc_error(__FILE__, __LINE__); 00447 sup_var = (double *) malloc(data->learning->sup_nlon*data->learning->sup_nlat*ntime_sub[s] * sizeof(double)); 00448 if (sup_var == NULL) alloc_error(__FILE__, __LINE__); 00449 (void) time_mean_variance_field_2d(sup_mean, sup_var, tas_rea_sub, data->learning->sup_nlon, data->learning->sup_nlat, ntime_sub[s]); 00450 00451 /* Normalize whole secondary 2D field using mean and variance at each point */ 00452 (void) normalize_field_2d(data->learning->data[s].sup_val, tas_rea_sub, sup_mean, 00453 sup_var, data->learning->sup_nlon, data->learning->sup_nlat, ntime_sub[s]); 00454 00455 (void) free(sup_mean); 00456 sup_mean = NULL; 00457 (void) free(sup_var); 00458 sup_var = NULL; 00459 00461 data->learning->data[s].ntime = ntime_sub[s]; 00462 data->learning->data[s].time = (double *) malloc(ntime_sub[s] * sizeof(double)); 00463 if (data->learning->data[s].time == NULL) alloc_error(__FILE__, __LINE__); 00464 data->learning->data[s].time_s->year = (int *) malloc(ntime_sub[s] * sizeof(int)); 00465 if (data->learning->data[s].time_s->year == NULL) alloc_error(__FILE__, __LINE__); 00466 data->learning->data[s].time_s->month = (int *) malloc(ntime_sub[s] * sizeof(int)); 00467 if (data->learning->data[s].time_s->month == NULL) alloc_error(__FILE__, __LINE__); 00468 data->learning->data[s].time_s->day = (int *) malloc(ntime_sub[s] * sizeof(int)); 00469 if (data->learning->data[s].time_s->day == NULL) alloc_error(__FILE__, __LINE__); 00470 data->learning->data[s].time_s->hour = (int *) malloc(ntime_sub[s] * sizeof(int)); 00471 if (data->learning->data[s].time_s->hour == NULL) alloc_error(__FILE__, __LINE__); 00472 data->learning->data[s].time_s->minutes = (int *) malloc(ntime_sub[s] * sizeof(int)); 00473 if (data->learning->data[s].time_s->minutes == NULL) alloc_error(__FILE__, __LINE__); 00474 data->learning->data[s].time_s->seconds = (double *) malloc(ntime_sub[s] * sizeof(double)); 00475 if (data->learning->data[s].time_s->seconds == NULL) alloc_error(__FILE__, __LINE__); 00476 00477 /* Retrieve time index spanning selected months and assign time structure values */ 00478 t = 0; 00479 00480 /* Initialize udunits */ 00481 ut_set_error_message_handler(ut_ignore); 00482 unitSystem = ut_read_xml(NULL); 00483 ut_set_error_message_handler(ut_write_to_stderr); 00484 dataunits = ut_parse(unitSystem, data->conf->time_units, UT_ASCII); 00485 00486 for (nt=0; nt<ntime_learn_all; nt++) 00487 for (ntt=0; ntt<data->conf->season[s].nmonths; ntt++) 00488 if (data->learning->time_s->month[nt] == data->conf->season[s].month[ntt]) { 00489 data->learning->data[s].time_s->year[t] = data->learning->time_s->year[nt]; 00490 data->learning->data[s].time_s->month[t] = data->learning->time_s->month[nt]; 00491 data->learning->data[s].time_s->day[t] = data->learning->time_s->day[nt]; 00492 data->learning->data[s].time_s->hour[t] = data->learning->time_s->hour[nt]; 00493 data->learning->data[s].time_s->minutes[t] = data->learning->time_s->minutes[nt]; 00494 data->learning->data[s].time_s->seconds[t] = data->learning->time_s->seconds[nt]; 00495 istat = utInvCalendar2(data->learning->data[s].time_s->year[t], data->learning->data[s].time_s->month[t], 00496 data->learning->data[s].time_s->day[t], data->learning->data[s].time_s->hour[t], 00497 data->learning->data[s].time_s->minutes[t], data->learning->data[s].time_s->seconds[t], 00498 dataunits, &(data->learning->data[s].time[t])); 00499 t++; 00500 } 00501 00502 (void) ut_free(dataunits); 00503 (void) ut_free_system(unitSystem); 00504 00507 buf_learn = (double *) realloc(buf_learn, ntime_sub[s] * (data->learning->rea_neof + data->learning->obs_neof) * sizeof(double)); 00508 if (buf_learn == NULL) alloc_error(__FILE__, __LINE__); 00509 00510 /* Normalisation by the first Singular Value */ 00511 rea_first_sing = data->learning->rea->sing[0]; 00512 for (eof=0; eof<data->learning->rea_neof; eof++) { 00513 rea_sing = data->learning->rea->sing[eof]; 00514 for (nt=0; nt<ntime_sub[s]; nt++) { 00515 buf_learn_rea_sub[nt+eof*ntime_sub[s]] = buf_learn_rea_sub[nt+eof*ntime_sub[s]] * rea_sing / rea_first_sing; 00516 buf_learn[nt+eof*ntime_sub[s]] = buf_learn_rea_sub[nt+eof*ntime_sub[s]]; 00517 } 00518 } 00519 if (data->learning->obs_neof != 0) { 00520 obs_first_sing = data->learning->obs->sing[0]; 00521 for (eof=0; eof<data->learning->obs_neof; eof++) { 00522 obs_sing = data->learning->obs->sing[eof]; 00523 for (nt=0; nt<ntime_sub[s]; nt++) { 00524 buf_learn_obs_sub[nt+eof*ntime_sub[s]] = buf_learn_obs_sub[nt+eof*ntime_sub[s]] * obs_sing / obs_first_sing; 00525 buf_learn[nt+(eof+data->learning->rea_neof)*ntime_sub[s]] = buf_learn_obs_sub[nt+eof*ntime_sub[s]]; 00526 } 00527 } 00528 } 00529 00530 /* Compute best clusters */ 00531 buf_weight = (double *) realloc(buf_weight, data->conf->season[s].nclusters * (data->learning->rea_neof + data->learning->obs_neof) * 00532 sizeof(double)); 00533 if (buf_weight == NULL) alloc_error(__FILE__, __LINE__); 00534 niter = best_clusters(buf_weight, buf_learn, data->conf->classif_type, data->conf->npartitions, 00535 data->conf->nclassifications, data->learning->rea_neof + data->learning->obs_neof, 00536 data->conf->season[s].nclusters, ntime_sub[s]); 00537 00538 /* Keep only first data->learning->rea_neof EOFs */ 00539 data->learning->data[s].weight = (double *) 00540 malloc(data->conf->season[s].nclusters*data->learning->rea_neof * sizeof(double)); 00541 if (data->learning->data[s].weight == NULL) alloc_error(__FILE__, __LINE__); 00542 for (clust=0; clust<data->conf->season[s].nclusters; clust++) 00543 for (eof=0; eof<data->learning->rea_neof; eof++) 00544 data->learning->data[s].weight[eof+clust*data->learning->rea_neof] = 00545 buf_weight[eof+clust*(data->learning->rea_neof+data->learning->obs_neof)]; 00546 00547 /* Classify each day in the current clusters */ 00548 data->learning->data[s].class_clusters = (int *) malloc(ntime_sub[s] * sizeof(int)); 00549 if (data->learning->data[s].class_clusters == NULL) alloc_error(__FILE__, __LINE__); 00550 (void) class_days_pc_clusters(data->learning->data[s].class_clusters, buf_learn, 00551 data->learning->data[s].weight, data->conf->classif_type, 00552 data->learning->rea_neof, data->conf->season[s].nclusters, 00553 ntime_sub[s]); 00554 00555 /* Set mean and variance of distances to clusters to 1.0 because we first need to compute distances and */ 00556 /* we don't have a control run in learning mode */ 00557 mean_dist = (double *) realloc(mean_dist, data->conf->season[s].nclusters * sizeof(double)); 00558 if (mean_dist == NULL) alloc_error(__FILE__, __LINE__); 00559 var_dist = (double *) realloc(var_dist, data->conf->season[s].nclusters * sizeof(double)); 00560 if (var_dist == NULL) alloc_error(__FILE__, __LINE__); 00561 for (clust=0; clust<data->conf->season[s].nclusters; clust++) { 00562 mean_dist[clust] = 1.0; 00563 var_dist[clust] = 1.0; 00564 } 00565 00566 /* Compute distances to clusters using normalization */ 00567 dist = (double *) realloc(dist, data->conf->season[s].nclusters*ntime_sub[s] * sizeof(double)); 00568 if (dist == NULL) alloc_error(__FILE__, __LINE__); 00569 (void) dist_clusters_normctrl(dist, buf_learn_pc_sub, data->learning->data[s].weight, 00570 data->learning->pc_normalized_var, data->learning->pc_normalized_var, mean_dist, var_dist, 00571 data->learning->rea_neof, data->conf->season[s].nclusters, ntime_sub[s]); 00572 /* Normalize */ 00573 for (clust=0; clust<data->conf->season[s].nclusters; clust++) { 00574 /* Calculate mean over time */ 00575 mean_dist[clust] = gsl_stats_mean(&(dist[clust*ntime_sub[s]]), 1, ntime_sub[s]); 00576 /* Calculate variance over time */ 00577 var_dist[clust] = gsl_stats_variance(&(dist[clust*ntime_sub[s]]), 1, ntime_sub[s]); 00578 /* Normalize */ 00579 for (nt=0; nt<ntime_sub[s]; nt++) 00580 dist[nt+clust*ntime_sub[s]] = ( dist[nt+clust*ntime_sub[s]] - mean_dist[clust] ) / sqrt(var_dist[clust]); 00581 } 00582 00583 /* Classify each day in the current clusters */ 00584 /* data->learning->data[s].class_clusters */ 00585 /* data->learning->data[s].class_clusters = (int *) malloc(ntime_sub[s] * sizeof(int)); 00586 if (data->learning->data[s].class_clusters == NULL) alloc_error(__FILE__, __LINE__); 00587 (void) class_days_pc_clusters(data->learning->data[s].class_clusters, buf_learn, 00588 data->learning->data[s].weight, data->conf->classif_type, 00589 data->learning->rea_neof, data->conf->season[s].nclusters, ntime_sub[s]);*/ 00590 00591 /* Allocate memory for regression */ 00592 precip_reg = (double *) malloc(data->conf->season[s].nreg * sizeof(double)); 00593 if (precip_reg == NULL) alloc_error(__FILE__, __LINE__); 00594 precip_index = (double *) malloc(ntime_sub[s] * sizeof(double)); 00595 if (precip_index == NULL) alloc_error(__FILE__, __LINE__); 00596 precip_err = (double *) malloc(ntime_sub[s] * sizeof(double)); 00597 if (precip_err == NULL) alloc_error(__FILE__, __LINE__); 00598 dist_reg = (double *) malloc(data->conf->season[s].nreg*ntime_sub[s] * sizeof(double)); 00599 if (dist_reg == NULL) alloc_error(__FILE__, __LINE__); 00600 vif = (double *) malloc(data->conf->season[s].nreg * sizeof(double)); 00601 if (vif == NULL) alloc_error(__FILE__, __LINE__); 00602 00603 /* Create variable to hold values of x vector for regression */ 00604 /* Begin with distances to clusters */ 00605 for (clust=0; clust<data->conf->season[s].nclusters; clust++) 00606 for (t=0; t<ntime_sub[s]; t++) 00607 dist_reg[t+clust*ntime_sub[s]] = dist[t+clust*ntime_sub[s]]; 00608 00609 /* For special seasons using secondary field in the regression, append values to x vector for regression */ 00610 if (data->conf->season[s].nreg == (data->conf->season[s].nclusters+1)) { 00611 clust = data->conf->season[s].nclusters; 00612 for (t=0; t<ntime_sub[s]; t++) 00613 dist_reg[t+clust*ntime_sub[s]] = data->learning->data[s].sup_index[t]; 00614 } 00615 00616 data->learning->data[s].precip_reg_cst = (double *) malloc(data->reg->npts * sizeof(double)); 00617 if (data->learning->data[s].precip_reg_cst == NULL) alloc_error(__FILE__, __LINE__); 00618 data->learning->data[s].precip_reg = (double *) malloc(data->reg->npts*data->conf->season[s].nreg * sizeof(double)); 00619 if (data->learning->data[s].precip_reg == NULL) alloc_error(__FILE__, __LINE__); 00620 data->learning->data[s].precip_reg_dist = (double *) 00621 malloc(data->conf->season[s].nclusters*ntime_sub[s] * sizeof(double)); 00622 if (data->learning->data[s].precip_reg_dist == NULL) alloc_error(__FILE__, __LINE__); 00623 data->learning->data[s].precip_index = (double *) malloc(data->reg->npts*ntime_sub[s] * sizeof(double)); 00624 if (data->learning->data[s].precip_index == NULL) alloc_error(__FILE__, __LINE__); 00625 data->learning->data[s].precip_index_obs = (double *) malloc(data->reg->npts*ntime_sub[s] * sizeof(double)); 00626 if (data->learning->data[s].precip_index_obs == NULL) alloc_error(__FILE__, __LINE__); 00627 data->learning->data[s].precip_reg_err = (double *) malloc(data->reg->npts*ntime_sub[s] * sizeof(double)); 00628 if (data->learning->data[s].precip_reg_err == NULL) alloc_error(__FILE__, __LINE__); 00629 data->learning->data[s].precip_reg_rsq = (double *) malloc(data->reg->npts * sizeof(double)); 00630 if (data->learning->data[s].precip_reg_rsq == NULL) alloc_error(__FILE__, __LINE__); 00631 data->learning->data[s].precip_reg_vif = (double *) malloc(data->conf->season[s].nreg * sizeof(double)); 00632 if (data->learning->data[s].precip_reg_vif == NULL) alloc_error(__FILE__, __LINE__); 00633 data->learning->data[s].precip_reg_autocor = (double *) malloc(data->reg->npts * sizeof(double)); 00634 if (data->learning->data[s].precip_reg_autocor == NULL) alloc_error(__FILE__, __LINE__); 00635 00636 /* Save distances */ 00637 for (t=0; t<ntime_sub[s]; t++) 00638 for (clust=0; clust<data->conf->season[s].nclusters; clust++) 00639 data->learning->data[s].precip_reg_dist[clust+t*data->conf->season[s].nclusters] = dist[t+clust*ntime_sub[s]]; 00640 00641 for (pt=0; pt<data->reg->npts; pt++) { 00642 /* Compute regression and save regression constant */ 00643 istat = regress(precip_reg, dist_reg, &(mean_precip_sub[pt*ntime_sub[s]]), &(data->learning->data[s].precip_reg_cst[pt]), 00644 precip_index, precip_err, &chisq, &rsq, vif, &autocor, data->conf->season[s].nreg, ntime_sub[s]); 00645 /* Save R^2 */ 00646 data->learning->data[s].precip_reg_rsq[pt] = rsq; 00647 /* Save residuals */ 00648 for (t=0; t<ntime_sub[s]; t++) 00649 data->learning->data[s].precip_reg_err[pt+t*data->reg->npts] = precip_err[t]; 00650 /* Save autocorrelation of residuals */ 00651 data->learning->data[s].precip_reg_autocor[pt] = autocor; 00652 /* Save Variance Inflation Factor VIF, and compute mean VIF */ 00653 if (pt == 0) { 00654 meanvif = 0.0; 00655 for (term=0; term<data->conf->season[s].nreg; term++) { 00656 data->learning->data[s].precip_reg_vif[term] = vif[term]; 00657 meanvif += vif[term]; 00658 } 00659 meanvif = meanvif / (double) data->conf->season[s].nreg; 00660 } 00661 00662 // (void) fprintf(stdout, "%s: pt=%d R^2=%lf CHI^2=%lf ACOR=%lf\n", __FILE__, pt, rsq, chisq, autocor); 00663 00664 /* Save regression coefficients */ 00665 for (clust=0; clust<data->conf->season[s].nreg; clust++) 00666 data->learning->data[s].precip_reg[pt+clust*data->reg->npts] = precip_reg[clust]; 00667 00668 /* Save precipitation index */ 00669 for (t=0; t<ntime_sub[s]; t++) 00670 data->learning->data[s].precip_index[pt+t*data->reg->npts] = precip_index[t]; 00671 00672 /* Save observed precipitation index */ 00673 for (t=0; t<ntime_sub[s]; t++) 00674 data->learning->data[s].precip_index_obs[pt+t*data->reg->npts] = mean_precip_sub[t+pt*ntime_sub[s]]; 00675 } 00676 00677 (void) fprintf(stdout, "%s: MeanVIF=%lf\n", __FILE__, meanvif); 00678 00679 (void) free(precip_reg); 00680 (void) free(precip_index); 00681 (void) free(precip_err); 00682 (void) free(dist_reg); 00683 (void) free(vif); 00684 00685 (void) free(buf_learn_rea_sub); 00686 buf_learn_rea_sub = NULL; 00687 if (data->learning->obs_neof != 0) { 00688 (void) free(buf_learn_obs_sub); 00689 buf_learn_obs_sub = NULL; 00690 } 00691 (void) free(buf_learn_pc_sub); 00692 buf_learn_pc_sub = NULL; 00693 (void) free(tas_rea_mean_sub); 00694 tas_rea_mean_sub = NULL; 00695 (void) free(tas_rea_sub); 00696 tas_rea_sub = NULL; 00697 (void) free(mean_precip_sub); 00698 mean_precip_sub = NULL; 00699 } 00700 00701 (void) free(tas_rea); 00702 (void) free(tas_rea_mean); 00703 (void) free(mean_precip); 00704 (void) free(buf_weight); 00705 (void) free(buf_learn); 00706 (void) free(buf_learn_rea); 00707 if (data->learning->obs_neof != 0) (void) free(buf_learn_obs); 00708 (void) free(buf_learn_pc); 00709 (void) free(ntime_sub); 00710 (void) free(rea_var); 00711 (void) free(mean_dist); 00712 (void) free(var_dist); 00713 (void) free(dist); 00714 00715 /* If wanted, write learning data to files for later use */ 00716 if (data->learning->learning_save == TRUE) { 00717 (void) printf("Writing learning fields.\n"); 00718 istat = write_learning_fields(data); 00719 } 00720 if (niter == 1) { 00721 (void) fprintf(stderr, "%s: ERROR: In one classification, only 1 iteration was needed! Probably an error in your EOF data or configuration. Must abort...\n", 00722 __FILE__); 00723 return -1; 00724 } 00725 } 00726 00727 /* Success status */ 00728 return 0; 00729 }