Algorithm to generate clusters. More...
#include <classif.h>
Go to the source code of this file.
Functions | |
int | generate_clusters (double *clusters, double *pc_eof_days, char *type, int nclassif, int neof, int ncluster, int ndays) |
Algorithm to generate clusters based on the Michelangeli et al (1995) methodology. |
Algorithm to generate clusters.
Definition in file generate_clusters.c.
int generate_clusters | ( | double * | clusters, | |
double * | pc_eof_days, | |||
char * | type, | |||
int | nclassif, | |||
int | neof, | |||
int | ncluster, | |||
int | ndays | |||
) |
Algorithm to generate clusters based on the Michelangeli et al (1995) methodology.
[out] | clusters | Clusters' positions. |
[in] | pc_eof_days | Principal Components of EOF (daily data). |
[in] | type | Type of distance used. Possible values: euclidian. |
[in] | nclassif | Maximum number of classifications to perform in the iterative algorithm. |
[in] | neof | Number of EOFs. |
[in] | ncluster | Number of clusters. |
[in] | ndays | Number of days in the pc_eof_days vector. |
Generate ncluster random days
Warning: we are using time() as the seed. Don't run this subroutine twice with the same time. If you do you will get the exact same sequence.
Main algorithm
Try to find the maximum distance (PC-space) between the new cluster center and the previous value
Definition at line 58 of file generate_clusters.c.
References alloc_error(), and class_days_pc_clusters().
Referenced by best_clusters(), and main().
00059 { 00072 unsigned long int *random_num = NULL; /* Vector of random numbers for random choice of initial points. */ 00073 int eof; /* Loop counter for EOF. */ 00074 int clust; /* Loop counter for clusters. */ 00075 int day; /* Loop counter for days. */ 00076 int classif; /* Loop counter for classifications. */ 00077 int ndays_cluster; /* Number of days in the current cluster. */ 00078 00079 const gsl_rng_type *T; /* For random number generation type. */ 00080 gsl_rng *rng; /* For random number generation. */ 00081 00082 double cluster_bary; /* Cluster barycentre. */ 00083 double ndiff_cluster_bary; /* Distance between current cluster barycenter and previous value in the iteration. */ 00084 double mean_days; /* Mean of the days (PC-space) for a cluster. */ 00085 double *eof_days_cluster = NULL; /* Vector of clusters' barycenter positions (PC-space). */ 00086 int *days_class_cluster = NULL; /* Vector of classification of days into each cluster. */ 00087 00088 static unsigned long int seed = 0; 00089 00090 (void) fprintf(stdout, "%s:: BEGIN: Find clusters among data points.\n", __FILE__); 00091 00092 /***********************************/ 00094 /***********************************/ 00095 00096 (void) fprintf(stdout, "%s:: Choosing %d random points.\n", __FILE__, ncluster); 00097 00098 /* Initialize random number generator */ 00099 T = gsl_rng_default; 00100 rng = gsl_rng_alloc(T); 00103 if (seed == 0) seed = time(NULL); 00104 (void) gsl_rng_set(rng, seed++); 00105 00106 /* Generate ncluster random days and initialize cluster PC array */ 00107 random_num = (unsigned long int *) calloc(ncluster, sizeof(unsigned long int)); 00108 if (random_num == NULL) alloc_error(__FILE__, __LINE__); 00109 for (clust=0; clust<ncluster; clust++) 00110 random_num[clust] = gsl_rng_uniform_int(rng, ndays); 00111 00112 /* Free random number generator */ 00113 (void) gsl_rng_free(rng); 00114 00115 /********************/ 00117 /********************/ 00118 00119 /* Allocate memory */ 00120 eof_days_cluster = (double *) calloc(neof*ncluster, sizeof(double)); 00121 if (eof_days_cluster == NULL) alloc_error(__FILE__, __LINE__); 00122 days_class_cluster = (int *) calloc(ndays, sizeof(int)); 00123 if (days_class_cluster == NULL) alloc_error(__FILE__, __LINE__); 00124 00125 /* Initialize cluster PC array randomly */ 00126 (void) fprintf(stdout, "%s:: Initializing cluster array.\n", __FILE__); 00127 for (eof=0; eof<neof; eof++) 00128 for (clust=0; clust<ncluster; clust++) { 00129 eof_days_cluster[eof+clust*neof] = pc_eof_days[random_num[clust]+eof*ndays]; 00130 00131 #if DEBUG >= 7 00132 (void) fprintf(stderr, "eof=%d cluster=%d eof_days_cluster=%lf\n", eof, clust, eof_days_cluster[eof+clust*neof]); 00133 #endif 00134 } 00135 00136 (void) free(random_num); 00137 00138 /* Iterate by performing up to nclassif classifications. Stop if same cluster center positions in two consecutive iterations. */ 00139 cluster_bary = -9999999999.9; 00140 ndiff_cluster_bary = -9999999999.9; 00141 classif = 0; 00142 00143 (void) fprintf(stdout, "%s:: Iterate up to %d classifications or when classification is stable.\n", __FILE__, nclassif); 00144 00145 while (ndiff_cluster_bary != 0.0 && classif < nclassif) { 00146 00147 #if DEBUG >= 7 00148 (void) fprintf(stderr, "classif=%d cluster_bary=%lf\n", classif, cluster_bary); 00149 #endif 00150 00151 /* Classify each day (pc_eof_days) in the current clusters (eof_days_cluster) = days_class_cluster */ 00152 (void) class_days_pc_clusters(days_class_cluster, pc_eof_days, eof_days_cluster, type, neof, ncluster, ndays); 00153 00154 /* For each cluster, perform a mean of all points falling in that cluster. 00155 Compare to the current clusters by calculating the 'coordinates' (PC-space) of the 'new' cluster center. */ 00156 cluster_bary = -9999999999.9; 00157 00158 /* Loop over clusters and EOFs */ 00159 for (clust=0; clust<ncluster; clust++) { 00160 for (eof=0; eof<neof; eof++) { 00161 mean_days = 0.0; 00162 ndays_cluster = 0; 00163 00164 /* Compute the mean (PC-space) of all the days of the current cluster */ 00165 for (day=0; day<ndays; day++) 00166 if (days_class_cluster[day] == clust) { 00167 mean_days += pc_eof_days[day+eof*ndays]; 00168 ndays_cluster++; 00169 } 00170 00171 if (ndays_cluster > 0) { 00172 00173 mean_days = mean_days / (double) ndays_cluster; 00174 00177 #if DEBUG >= 7 00178 (void) fprintf(stderr, "eof=%d cluster=%d diff_cluster_bary=%lf mean_days=%lf eof_days_cluster=%lf\n", 00179 eof, clust, fabs(mean_days - eof_days_cluster[eof+clust*neof]), mean_days, eof_days_cluster[eof+clust*neof]); 00180 #endif 00181 00182 /* Compute the difference between the new cluster center position and the previous one */ 00183 ndiff_cluster_bary = fabs(mean_days - eof_days_cluster[eof+clust*neof]); 00184 00185 /* If this new cluster center position is further away than the other EOF's ones, choose this new cluster center */ 00186 if ( ndiff_cluster_bary > cluster_bary ) 00187 cluster_bary = mean_days; 00188 00189 /* Store the new cluster center value */ 00190 clusters[eof+clust*neof] = mean_days; 00191 00192 #if DEBUG >= 7 00193 if (classif == 0 || cluster_bary == 0.0) 00194 (void) fprintf(stderr, "eof=%d cluster=%d mean_pc_days=%lf ndays_cluster=%d cluster_bary=%lf\n", 00195 eof, clust, mean_days, ndays_cluster, cluster_bary); 00196 #endif 00197 } 00198 else 00199 clusters[eof+clust*neof] = 0; 00200 } 00201 } 00202 00203 classif++; 00204 00205 /* Update the cluster center matrix with the new values */ 00206 if (cluster_bary != 0.0 && classif < nclassif) 00207 for (clust=0; clust<ncluster; clust++) 00208 for (eof=0; eof<neof; eof++) 00209 eof_days_cluster[eof+clust*neof] = clusters[eof+clust*neof]; 00210 } 00211 00212 /* Free memory */ 00213 (void) free(eof_days_cluster); 00214 (void) free(days_class_cluster); 00215 00216 (void) fprintf(stdout, "%s:: END: Find clusters among data points. %d iterations needed.\n", __FILE__, classif); 00217 00218 return classif; 00219 }