generate_clusters.c File Reference

Algorithm to generate clusters. More...

#include <classif.h>
Include dependency graph for generate_clusters.c:

Go to the source code of this file.

Functions

int generate_clusters (double *clusters, double *pc_eof_days, char *type, int nclassif, int neof, int ncluster, int ndays)
 Algorithm to generate clusters based on the Michelangeli et al (1995) methodology.

Detailed Description

Algorithm to generate clusters.

Definition in file generate_clusters.c.


Function Documentation

int generate_clusters ( double *  clusters,
double *  pc_eof_days,
char *  type,
int  nclassif,
int  neof,
int  ncluster,
int  ndays 
)

Algorithm to generate clusters based on the Michelangeli et al (1995) methodology.

Parameters:
[out] clusters Clusters' positions.
[in] pc_eof_days Principal Components of EOF (daily data).
[in] type Type of distance used. Possible values: euclidian.
[in] nclassif Maximum number of classifications to perform in the iterative algorithm.
[in] neof Number of EOFs.
[in] ncluster Number of clusters.
[in] ndays Number of days in the pc_eof_days vector.
Returns:
Number of iterations.

Generate ncluster random days

Warning: we are using time() as the seed. Don't run this subroutine twice with the same time. If you do you will get the exact same sequence.

Main algorithm

Try to find the maximum distance (PC-space) between the new cluster center and the previous value

Definition at line 58 of file generate_clusters.c.

References alloc_error(), and class_days_pc_clusters().

Referenced by best_clusters(), and main().

00059                                                      {
00072   unsigned long int *random_num = NULL; /* Vector of random numbers for random choice of initial points. */
00073   int eof; /* Loop counter for EOF. */
00074   int clust; /* Loop counter for clusters. */
00075   int day; /* Loop counter for days. */
00076   int classif; /* Loop counter for classifications. */
00077   int ndays_cluster; /* Number of days in the current cluster. */
00078 
00079   const gsl_rng_type *T; /* For random number generation type. */
00080   gsl_rng *rng; /* For random number generation. */
00081 
00082   double cluster_bary; /* Cluster barycentre. */
00083   double ndiff_cluster_bary; /* Distance between current cluster barycenter and previous value in the iteration. */
00084   double mean_days; /* Mean of the days (PC-space) for a cluster. */
00085   double *eof_days_cluster = NULL; /* Vector of clusters' barycenter positions (PC-space). */
00086   int *days_class_cluster = NULL; /* Vector of classification of days into each cluster. */
00087 
00088   static unsigned long int seed = 0;
00089 
00090   (void) fprintf(stdout, "%s:: BEGIN: Find clusters among data points.\n", __FILE__);
00091 
00092   /***********************************/
00094   /***********************************/
00095 
00096   (void) fprintf(stdout, "%s:: Choosing %d random points.\n", __FILE__, ncluster);
00097 
00098   /* Initialize random number generator */
00099   T = gsl_rng_default;
00100   rng = gsl_rng_alloc(T);
00103   if (seed == 0) seed = time(NULL);
00104   (void) gsl_rng_set(rng, seed++);
00105   
00106   /* Generate ncluster random days and initialize cluster PC array */
00107   random_num = (unsigned long int *) calloc(ncluster, sizeof(unsigned long int));
00108   if (random_num == NULL) alloc_error(__FILE__, __LINE__);
00109   for (clust=0; clust<ncluster; clust++)
00110     random_num[clust] = gsl_rng_uniform_int(rng, ndays);  
00111   
00112   /* Free random number generator */
00113   (void) gsl_rng_free(rng);
00114   
00115   /********************/
00117   /********************/
00118 
00119   /* Allocate memory */
00120   eof_days_cluster = (double *) calloc(neof*ncluster, sizeof(double));
00121   if (eof_days_cluster == NULL) alloc_error(__FILE__, __LINE__);
00122   days_class_cluster = (int *) calloc(ndays, sizeof(int));
00123   if (days_class_cluster == NULL) alloc_error(__FILE__, __LINE__);
00124   
00125   /* Initialize cluster PC array randomly */
00126   (void) fprintf(stdout, "%s:: Initializing cluster array.\n", __FILE__);
00127   for (eof=0; eof<neof; eof++)
00128     for (clust=0; clust<ncluster; clust++) {
00129       eof_days_cluster[eof+clust*neof] = pc_eof_days[random_num[clust]+eof*ndays];
00130 
00131 #if DEBUG >= 7
00132       (void) fprintf(stderr, "eof=%d cluster=%d eof_days_cluster=%lf\n", eof, clust, eof_days_cluster[eof+clust*neof]);
00133 #endif
00134     }
00135 
00136   (void) free(random_num);
00137 
00138   /* Iterate by performing up to nclassif classifications. Stop if same cluster center positions in two consecutive iterations. */
00139   cluster_bary = -9999999999.9;
00140   ndiff_cluster_bary = -9999999999.9;
00141   classif = 0;
00142 
00143   (void) fprintf(stdout, "%s:: Iterate up to %d classifications or when classification is stable.\n", __FILE__, nclassif);
00144 
00145   while (ndiff_cluster_bary != 0.0 && classif < nclassif) {
00146 
00147 #if DEBUG >= 7
00148     (void) fprintf(stderr, "classif=%d cluster_bary=%lf\n", classif, cluster_bary);
00149 #endif
00150 
00151     /* Classify each day (pc_eof_days) in the current clusters (eof_days_cluster) = days_class_cluster */
00152     (void) class_days_pc_clusters(days_class_cluster, pc_eof_days, eof_days_cluster, type, neof, ncluster, ndays);
00153 
00154     /* For each cluster, perform a mean of all points falling in that cluster.
00155        Compare to the current clusters by calculating the 'coordinates' (PC-space) of the 'new' cluster center. */
00156     cluster_bary = -9999999999.9;
00157 
00158     /* Loop over clusters and EOFs */
00159     for (clust=0; clust<ncluster; clust++) {
00160       for (eof=0; eof<neof; eof++) {
00161         mean_days = 0.0;
00162         ndays_cluster = 0;
00163 
00164         /* Compute the mean (PC-space) of all the days of the current cluster */
00165         for (day=0; day<ndays; day++)
00166           if (days_class_cluster[day] == clust) {
00167             mean_days += pc_eof_days[day+eof*ndays];
00168             ndays_cluster++;
00169           }
00170 
00171         if (ndays_cluster > 0) {
00172 
00173           mean_days = mean_days / (double) ndays_cluster;
00174 
00177 #if DEBUG >= 7
00178           (void) fprintf(stderr, "eof=%d cluster=%d diff_cluster_bary=%lf mean_days=%lf eof_days_cluster=%lf\n",
00179                          eof, clust, fabs(mean_days - eof_days_cluster[eof+clust*neof]), mean_days, eof_days_cluster[eof+clust*neof]);
00180 #endif
00181 
00182           /* Compute the difference between the new cluster center position and the previous one */
00183           ndiff_cluster_bary = fabs(mean_days - eof_days_cluster[eof+clust*neof]);
00184 
00185           /* If this new cluster center position is further away than the other EOF's ones, choose this new cluster center */
00186           if ( ndiff_cluster_bary > cluster_bary )
00187             cluster_bary = mean_days;
00188 
00189           /* Store the new cluster center value */
00190           clusters[eof+clust*neof] = mean_days;
00191 
00192 #if DEBUG >= 7
00193           if (classif == 0 || cluster_bary == 0.0)
00194             (void) fprintf(stderr, "eof=%d cluster=%d mean_pc_days=%lf ndays_cluster=%d cluster_bary=%lf\n",
00195                            eof, clust, mean_days, ndays_cluster, cluster_bary);
00196 #endif
00197         }
00198         else
00199           clusters[eof+clust*neof] = 0;
00200       }
00201     }
00202 
00203     classif++;
00204 
00205     /* Update the cluster center matrix with the new values */
00206     if (cluster_bary != 0.0 && classif < nclassif)
00207       for (clust=0; clust<ncluster; clust++)
00208         for (eof=0; eof<neof; eof++)
00209           eof_days_cluster[eof+clust*neof] = clusters[eof+clust*neof];
00210   }
00211 
00212   /* Free memory */
00213   (void) free(eof_days_cluster);
00214   (void) free(days_class_cluster);
00215 
00216   (void) fprintf(stdout, "%s:: END: Find clusters among data points. %d iterations needed.\n", __FILE__, classif);
00217 
00218   return classif;
00219 }


Generated on 12 May 2016 for DSCLIM by  doxygen 1.6.1