Algorithm to generate best clusters among many tries. More...
#include <classif.h>
Go to the source code of this file.
Functions | |
int | best_clusters (double *best_clusters, double *pc_eof_days, char *type, int npart, int nclassif, int neof, int ncluster, int ndays) |
Algorithm to generate best clusters among many tries. |
Algorithm to generate best clusters among many tries.
Definition in file best_clusters.c.
int best_clusters | ( | double * | best_clusters, | |
double * | pc_eof_days, | |||
char * | type, | |||
int | npart, | |||
int | nclassif, | |||
int | neof, | |||
int | ncluster, | |||
int | ndays | |||
) |
Algorithm to generate best clusters among many tries.
[out] | best_clusters | Best clusters' positions. |
[in] | pc_eof_days | Principal Components of EOF (daily data). |
[in] | type | Type of distance used. Possible values: euclidian. |
[in] | npart | Number of classification partitions to try. |
[in] | nclassif | Maximum number of classifications to perform in the iterative algorithm. |
[in] | neof | Number of EOFs. |
[in] | ncluster | Number of clusters. |
[in] | ndays | Number of days in the pc_eof_days vector. |
Try to find best partition (clustering) which is closest to all the other partitions (which corresponds to the partition closest to the barycenter of partitions.
Definition at line 58 of file best_clusters.c.
References alloc_error(), and generate_clusters().
Referenced by main(), and wt_learning().
00058 { 00072 double min_meandistval; /* Minimum distance between a partition and all other partitions */ 00073 double meandistval; /* Mean distance value between each corresponding clusters for the comparison of two partitions. */ 00074 double maxdistval; /* Maximum distance over all clusters for the two partitions comparison. */ 00075 double minval; /* Minimum distance to find a corresponding closest cluster in another partition. */ 00076 double dist_bary; /* Distance summed over all EOFs between a cluster in one partition and other clusters in other partitions. */ 00077 double val; /* Difference in positions between a cluster in one partition and other clusters in other partitions for a particular EOF. */ 00078 00079 double *tmpcluster = NULL; /* Temporary vector of clusters for one partition. */ 00080 double *testclusters = NULL; /* Temporary vector of clusters for all partitions. */ 00081 00082 int min_cluster = -1; /* Cluster number used to find a corresponding cluster in another partition. */ 00083 int min_partition = -1; /* Partition number used to find the partition which has the minimum distance to all other partitions. */ 00084 00085 int part; /* Loop counter for partitions */ 00086 int part1; /* Loop counter for partitions inside loop */ 00087 int part2; /* Loop counter for partitions inside loop */ 00088 int clust; /* Loop counter for clusters */ 00089 int clust1; /* Loop counter for clusters inside loop */ 00090 int clust2; /* Loop counter for clusters inside loop */ 00091 int eof; /* Loop counter for eofs */ 00092 00093 int niter; /* Number of iterations */ 00094 int niter_min; /* Minimum number of iterations */ 00095 00096 (void) fprintf(stdout, "%s:: BEGIN: Find the best partition of clusters.\n", __FILE__); 00097 00098 niter_min = 99999; 00099 00100 /* Allocate memory */ 00101 tmpcluster = (double *) calloc(neof*ncluster, sizeof(double)); 00102 if (tmpcluster == NULL) alloc_error(__FILE__, __LINE__); 00103 testclusters = (double *) calloc(neof*ncluster*npart, sizeof(double)); 00104 if (testclusters == NULL) alloc_error(__FILE__, __LINE__); 00105 00106 /* Generate npart clusters (which will be used to find the best clustering). */ 00107 (void) fprintf(stdout, "%s:: Generating %d partitions of clusters.\n", __FILE__, npart); 00108 for (part=0; part<npart; part++) { 00109 #if DEBUG >= 1 00110 (void) fprintf(stdout, "%s:: Generating %d/%d partition of clusters.\n", __FILE__, part+1, npart); 00111 #endif 00112 niter = generate_clusters(tmpcluster, pc_eof_days, type, nclassif, neof, ncluster, ndays); 00113 if (niter < niter_min) niter_min = niter; 00114 for (clust=0; clust<ncluster; clust++) 00115 for (eof=0; eof<neof; eof++) 00116 testclusters[part+eof*npart+clust*npart*neof] = tmpcluster[eof+clust*neof]; 00117 } 00118 00121 min_meandistval = 9999999999.9; 00122 min_partition = -1; 00123 /* Loop over all partition and compute distance between each other partition. */ 00124 (void) fprintf(stdout, "%s:: Computing distance between each partitions of clusters.\n", __FILE__); 00125 for (part1=0; part1<npart; part1++) { 00126 #if DEBUG >= 1 00127 (void) fprintf(stdout, "%s:: Partition %d/%d.\n", __FILE__, part1+1, npart); 00128 #endif 00129 meandistval = 0.0; 00130 for (part2=0; part2<npart; part2++) { 00131 00132 /* Don't compute for the same partition number. */ 00133 if (part1 != part2) { 00134 00135 maxdistval = -9999999999.9; 00136 00137 for (clust1=0; clust1<ncluster; clust1++) { 00138 00139 /* Find closest cluster to current one (in terms of distance summed over all EOF). */ 00140 minval = 9999999999.9; 00141 min_cluster = -1; 00142 for (clust2=0; clust2<ncluster; clust2++) { 00143 00144 if ( !strcmp(type, "euclidian") ) { 00145 /* Sum distances over all EOF. */ 00146 dist_bary = 0.0; 00147 for (eof=0; eof<neof; eof++) { 00148 val = testclusters[part2+eof*npart+clust1*npart*neof] - testclusters[part1+eof*npart+clust2*npart*neof]; 00149 dist_bary += (val * val); 00150 } 00151 00152 dist_bary = sqrt(dist_bary); 00153 } 00154 else { 00155 (void) fprintf(stderr, "best_clusters: ABORT: Unknown distance type=%s!!\n", type); 00156 (void) abort(); 00157 } 00158 00159 /* Check for minimum distance. We want to find the corresponding closest cluster in another partition. */ 00160 if (dist_bary < minval) { 00161 minval = dist_bary; 00162 min_cluster = clust2; 00163 } 00164 } 00165 00166 if (min_cluster == -1) { 00167 (void) fprintf(stderr, "best_clusters: ABORT: Error in algorithm. Cannot find best cluster!\n"); 00168 (void) abort(); 00169 } 00170 00171 /* Save the maximum distance over all clusters for the two partitions comparison. */ 00172 if (minval > maxdistval) 00173 maxdistval = minval; 00174 } 00175 /* Sum the maximum distance of the clusters between each corresponding one over all the partitions. 00176 We want to compute the mean afterward. */ 00177 meandistval += maxdistval; 00178 } 00179 } 00180 /* Compute the mean of the distances between each corresponding clusters for the comparison of two partitions. */ 00181 meandistval = meandistval / (double) (npart-1); 00182 /* We want to keep the partition which has the minimum distance to all other partitions. */ 00183 if (meandistval < min_meandistval) { 00184 min_meandistval = meandistval; 00185 min_partition = part1; 00186 } 00187 } 00188 00189 if (min_partition == -1) { 00190 /* Failing algorithm */ 00191 (void) fprintf(stderr, "best_clusters: ABORT: Error in algorithm. Cannot find best partition!\n"); 00192 (void) abort(); 00193 } 00194 00195 /* Save data for the best selected partition of clusters. */ 00196 (void) fprintf(stdout, "%s:: Save best partition of clusters.\n", __FILE__); 00197 for (clust=0; clust<ncluster; clust++) 00198 for (eof=0; eof<neof; eof++) 00199 best_clusters[eof+clust*neof] = testclusters[min_partition+eof*npart+clust*npart*neof]; 00200 00201 /* Free memory. */ 00202 (void) free(tmpcluster); 00203 (void) free(testclusters); 00204 00205 (void) fprintf(stdout, "%s:: END: Find the best partition of clusters. Partition %d selected.\n", __FILE__, min_partition); 00206 00207 return niter_min; 00208 }