generate_clusters.c

Go to the documentation of this file.
00001 /* ***************************************************** */
00002 /* Algorithm to generate clusters.                       */
00003 /* generate_clusters.c                                   */
00004 /* ***************************************************** */
00005 /* Author: Christian Page, CERFACS, Toulouse, France.    */
00006 /* ***************************************************** */
00011 /* LICENSE BEGIN
00012 
00013 Copyright Cerfacs (Christian Page) (2015)
00014 
00015 christian.page@cerfacs.fr
00016 
00017 This software is a computer program whose purpose is to downscale climate
00018 scenarios using a statistical methodology based on weather regimes.
00019 
00020 This software is governed by the CeCILL license under French law and
00021 abiding by the rules of distribution of free software. You can use, 
00022 modify and/ or redistribute the software under the terms of the CeCILL
00023 license as circulated by CEA, CNRS and INRIA at the following URL
00024 "http://www.cecill.info". 
00025 
00026 As a counterpart to the access to the source code and rights to copy,
00027 modify and redistribute granted by the license, users are provided only
00028 with a limited warranty and the software's author, the holder of the
00029 economic rights, and the successive licensors have only limited
00030 liability. 
00031 
00032 In this respect, the user's attention is drawn to the risks associated
00033 with loading, using, modifying and/or developing or reproducing the
00034 software by the user in light of its specific status of free software,
00035 that may mean that it is complicated to manipulate, and that also
00036 therefore means that it is reserved for developers and experienced
00037 professionals having in-depth computer knowledge. Users are therefore
00038 encouraged to load and test the software's suitability as regards their
00039 requirements in conditions enabling the security of their systems and/or 
00040 data to be ensured and, more generally, to use and operate it in the 
00041 same conditions as regards security. 
00042 
00043 The fact that you are presently reading this means that you have had
00044 knowledge of the CeCILL license and that you accept its terms.
00045 
00046 LICENSE END */
00047 
00048 
00049 
00050 
00051 
00052 
00053 
00054 #include <classif.h>
00055 
00057 int
00058 generate_clusters(double *clusters, double *pc_eof_days, char *type, int nclassif,
00059                   int neof, int ncluster, int ndays) {
00072   unsigned long int *random_num = NULL; /* Vector of random numbers for random choice of initial points. */
00073   int eof; /* Loop counter for EOF. */
00074   int clust; /* Loop counter for clusters. */
00075   int day; /* Loop counter for days. */
00076   int classif; /* Loop counter for classifications. */
00077   int ndays_cluster; /* Number of days in the current cluster. */
00078 
00079   const gsl_rng_type *T; /* For random number generation type. */
00080   gsl_rng *rng; /* For random number generation. */
00081 
00082   double cluster_bary; /* Cluster barycentre. */
00083   double ndiff_cluster_bary; /* Distance between current cluster barycenter and previous value in the iteration. */
00084   double mean_days; /* Mean of the days (PC-space) for a cluster. */
00085   double *eof_days_cluster = NULL; /* Vector of clusters' barycenter positions (PC-space). */
00086   int *days_class_cluster = NULL; /* Vector of classification of days into each cluster. */
00087 
00088   static unsigned long int seed = 0;
00089 
00090   (void) fprintf(stdout, "%s:: BEGIN: Find clusters among data points.\n", __FILE__);
00091 
00092   /***********************************/
00094   /***********************************/
00095 
00096   (void) fprintf(stdout, "%s:: Choosing %d random points.\n", __FILE__, ncluster);
00097 
00098   /* Initialize random number generator */
00099   T = gsl_rng_default;
00100   rng = gsl_rng_alloc(T);
00103   if (seed == 0) seed = time(NULL);
00104   (void) gsl_rng_set(rng, seed++);
00105   
00106   /* Generate ncluster random days and initialize cluster PC array */
00107   random_num = (unsigned long int *) calloc(ncluster, sizeof(unsigned long int));
00108   if (random_num == NULL) alloc_error(__FILE__, __LINE__);
00109   for (clust=0; clust<ncluster; clust++)
00110     random_num[clust] = gsl_rng_uniform_int(rng, ndays);  
00111   
00112   /* Free random number generator */
00113   (void) gsl_rng_free(rng);
00114   
00115   /********************/
00117   /********************/
00118 
00119   /* Allocate memory */
00120   eof_days_cluster = (double *) calloc(neof*ncluster, sizeof(double));
00121   if (eof_days_cluster == NULL) alloc_error(__FILE__, __LINE__);
00122   days_class_cluster = (int *) calloc(ndays, sizeof(int));
00123   if (days_class_cluster == NULL) alloc_error(__FILE__, __LINE__);
00124   
00125   /* Initialize cluster PC array randomly */
00126   (void) fprintf(stdout, "%s:: Initializing cluster array.\n", __FILE__);
00127   for (eof=0; eof<neof; eof++)
00128     for (clust=0; clust<ncluster; clust++) {
00129       eof_days_cluster[eof+clust*neof] = pc_eof_days[random_num[clust]+eof*ndays];
00130 
00131 #if DEBUG >= 7
00132       (void) fprintf(stderr, "eof=%d cluster=%d eof_days_cluster=%lf\n", eof, clust, eof_days_cluster[eof+clust*neof]);
00133 #endif
00134     }
00135 
00136   (void) free(random_num);
00137 
00138   /* Iterate by performing up to nclassif classifications. Stop if same cluster center positions in two consecutive iterations. */
00139   cluster_bary = -9999999999.9;
00140   ndiff_cluster_bary = -9999999999.9;
00141   classif = 0;
00142 
00143   (void) fprintf(stdout, "%s:: Iterate up to %d classifications or when classification is stable.\n", __FILE__, nclassif);
00144 
00145   while (ndiff_cluster_bary != 0.0 && classif < nclassif) {
00146 
00147 #if DEBUG >= 7
00148     (void) fprintf(stderr, "classif=%d cluster_bary=%lf\n", classif, cluster_bary);
00149 #endif
00150 
00151     /* Classify each day (pc_eof_days) in the current clusters (eof_days_cluster) = days_class_cluster */
00152     (void) class_days_pc_clusters(days_class_cluster, pc_eof_days, eof_days_cluster, type, neof, ncluster, ndays);
00153 
00154     /* For each cluster, perform a mean of all points falling in that cluster.
00155        Compare to the current clusters by calculating the 'coordinates' (PC-space) of the 'new' cluster center. */
00156     cluster_bary = -9999999999.9;
00157 
00158     /* Loop over clusters and EOFs */
00159     for (clust=0; clust<ncluster; clust++) {
00160       for (eof=0; eof<neof; eof++) {
00161         mean_days = 0.0;
00162         ndays_cluster = 0;
00163 
00164         /* Compute the mean (PC-space) of all the days of the current cluster */
00165         for (day=0; day<ndays; day++)
00166           if (days_class_cluster[day] == clust) {
00167             mean_days += pc_eof_days[day+eof*ndays];
00168             ndays_cluster++;
00169           }
00170 
00171         if (ndays_cluster > 0) {
00172 
00173           mean_days = mean_days / (double) ndays_cluster;
00174 
00177 #if DEBUG >= 7
00178           (void) fprintf(stderr, "eof=%d cluster=%d diff_cluster_bary=%lf mean_days=%lf eof_days_cluster=%lf\n",
00179                          eof, clust, fabs(mean_days - eof_days_cluster[eof+clust*neof]), mean_days, eof_days_cluster[eof+clust*neof]);
00180 #endif
00181 
00182           /* Compute the difference between the new cluster center position and the previous one */
00183           ndiff_cluster_bary = fabs(mean_days - eof_days_cluster[eof+clust*neof]);
00184 
00185           /* If this new cluster center position is further away than the other EOF's ones, choose this new cluster center */
00186           if ( ndiff_cluster_bary > cluster_bary )
00187             cluster_bary = mean_days;
00188 
00189           /* Store the new cluster center value */
00190           clusters[eof+clust*neof] = mean_days;
00191 
00192 #if DEBUG >= 7
00193           if (classif == 0 || cluster_bary == 0.0)
00194             (void) fprintf(stderr, "eof=%d cluster=%d mean_pc_days=%lf ndays_cluster=%d cluster_bary=%lf\n",
00195                            eof, clust, mean_days, ndays_cluster, cluster_bary);
00196 #endif
00197         }
00198         else
00199           clusters[eof+clust*neof] = 0;
00200       }
00201     }
00202 
00203     classif++;
00204 
00205     /* Update the cluster center matrix with the new values */
00206     if (cluster_bary != 0.0 && classif < nclassif)
00207       for (clust=0; clust<ncluster; clust++)
00208         for (eof=0; eof<neof; eof++)
00209           eof_days_cluster[eof+clust*neof] = clusters[eof+clust*neof];
00210   }
00211 
00212   /* Free memory */
00213   (void) free(eof_days_cluster);
00214   (void) free(days_class_cluster);
00215 
00216   (void) fprintf(stdout, "%s:: END: Find clusters among data points. %d iterations needed.\n", __FILE__, classif);
00217 
00218   return classif;
00219 }

Generated on 12 May 2016 for DSCLIM by  doxygen 1.6.1