best_clusters.c File Reference

Algorithm to generate best clusters among many tries. More...

#include <classif.h>
Include dependency graph for best_clusters.c:

Go to the source code of this file.

Functions

int best_clusters (double *best_clusters, double *pc_eof_days, char *type, int npart, int nclassif, int neof, int ncluster, int ndays)
 Algorithm to generate best clusters among many tries.

Detailed Description

Algorithm to generate best clusters among many tries.

Definition in file best_clusters.c.


Function Documentation

int best_clusters ( double *  best_clusters,
double *  pc_eof_days,
char *  type,
int  npart,
int  nclassif,
int  neof,
int  ncluster,
int  ndays 
)

Algorithm to generate best clusters among many tries.

Parameters:
[out] best_clusters Best clusters' positions.
[in] pc_eof_days Principal Components of EOF (daily data).
[in] type Type of distance used. Possible values: euclidian.
[in] npart Number of classification partitions to try.
[in] nclassif Maximum number of classifications to perform in the iterative algorithm.
[in] neof Number of EOFs.
[in] ncluster Number of clusters.
[in] ndays Number of days in the pc_eof_days vector.
Returns:
Minimum number of iterations needed.

Try to find best partition (clustering) which is closest to all the other partitions (which corresponds to the partition closest to the barycenter of partitions.

Definition at line 58 of file best_clusters.c.

References alloc_error(), and generate_clusters().

Referenced by main(), and wt_learning().

00058                                                                                                                                   {
00072   double min_meandistval; /* Minimum distance between a partition and all other partitions */
00073   double meandistval; /* Mean distance value between each corresponding clusters for the comparison of two partitions. */
00074   double maxdistval; /* Maximum distance over all clusters for the two partitions comparison. */
00075   double minval; /* Minimum distance to find a corresponding closest cluster in another partition. */
00076   double dist_bary; /* Distance summed over all EOFs between a cluster in one partition and other clusters in other partitions. */
00077   double val; /* Difference in positions between a cluster in one partition and other clusters in other partitions for a particular EOF. */
00078 
00079   double *tmpcluster = NULL; /* Temporary vector of clusters for one partition. */
00080   double *testclusters = NULL; /* Temporary vector of clusters for all partitions. */
00081   
00082   int min_cluster = -1; /* Cluster number used to find a corresponding cluster in another partition. */
00083   int min_partition = -1; /* Partition number used to find the partition which has the minimum distance to all other partitions. */
00084 
00085   int part; /* Loop counter for partitions */
00086   int part1; /* Loop counter for partitions inside loop */
00087   int part2; /* Loop counter for partitions inside loop */
00088   int clust; /* Loop counter for clusters */
00089   int clust1; /* Loop counter for clusters inside loop */
00090   int clust2; /* Loop counter for clusters inside loop */
00091   int eof; /* Loop counter for eofs */
00092 
00093   int niter; /* Number of iterations */
00094   int niter_min; /* Minimum number of iterations */
00095 
00096   (void) fprintf(stdout, "%s:: BEGIN: Find the best partition of clusters.\n", __FILE__);
00097 
00098   niter_min = 99999;
00099 
00100   /* Allocate memory */
00101   tmpcluster = (double *) calloc(neof*ncluster, sizeof(double));
00102   if (tmpcluster == NULL) alloc_error(__FILE__, __LINE__);
00103   testclusters = (double *) calloc(neof*ncluster*npart, sizeof(double));
00104   if (testclusters == NULL) alloc_error(__FILE__, __LINE__);
00105 
00106   /* Generate npart clusters (which will be used to find the best clustering). */
00107   (void) fprintf(stdout, "%s:: Generating %d partitions of clusters.\n", __FILE__, npart);
00108   for (part=0; part<npart; part++) {
00109 #if DEBUG >= 1
00110     (void) fprintf(stdout, "%s:: Generating %d/%d partition of clusters.\n", __FILE__, part+1, npart);
00111 #endif
00112     niter = generate_clusters(tmpcluster, pc_eof_days, type, nclassif, neof, ncluster, ndays);
00113     if (niter < niter_min) niter_min = niter;
00114     for (clust=0; clust<ncluster; clust++)
00115       for (eof=0; eof<neof; eof++)
00116         testclusters[part+eof*npart+clust*npart*neof] = tmpcluster[eof+clust*neof];
00117   }
00118 
00121   min_meandistval = 9999999999.9;
00122   min_partition = -1;
00123   /* Loop over all partition and compute distance between each other partition. */
00124   (void) fprintf(stdout, "%s:: Computing distance between each partitions of clusters.\n", __FILE__);
00125   for (part1=0; part1<npart; part1++) {
00126 #if DEBUG >= 1
00127     (void) fprintf(stdout, "%s:: Partition %d/%d.\n", __FILE__, part1+1, npart);
00128 #endif
00129     meandistval = 0.0;
00130     for (part2=0; part2<npart; part2++) {
00131 
00132       /* Don't compute for the same partition number. */
00133       if (part1 != part2) {
00134 
00135         maxdistval = -9999999999.9;
00136         
00137         for (clust1=0; clust1<ncluster; clust1++) {
00138           
00139           /* Find closest cluster to current one (in terms of distance summed over all EOF). */
00140           minval = 9999999999.9;
00141           min_cluster = -1;
00142           for (clust2=0; clust2<ncluster; clust2++) {
00143 
00144             if ( !strcmp(type, "euclidian") ) {
00145               /* Sum distances over all EOF. */
00146               dist_bary = 0.0;
00147               for (eof=0; eof<neof; eof++) {
00148                 val = testclusters[part2+eof*npart+clust1*npart*neof] - testclusters[part1+eof*npart+clust2*npart*neof];
00149                 dist_bary += (val * val);
00150               }
00151               
00152               dist_bary = sqrt(dist_bary);
00153             }
00154             else {
00155               (void) fprintf(stderr, "best_clusters: ABORT: Unknown distance type=%s!!\n", type);
00156               (void) abort();
00157             }
00158             
00159             /* Check for minimum distance. We want to find the corresponding closest cluster in another partition. */
00160             if (dist_bary < minval) {
00161               minval = dist_bary;
00162               min_cluster = clust2;
00163             }
00164           }
00165 
00166           if (min_cluster == -1) {
00167             (void) fprintf(stderr, "best_clusters: ABORT: Error in algorithm. Cannot find best cluster!\n");
00168             (void) abort();
00169           }
00170           
00171           /* Save the maximum distance over all clusters for the two partitions comparison. */
00172           if (minval > maxdistval)
00173             maxdistval = minval;
00174         }
00175         /* Sum the maximum distance of the clusters between each corresponding one over all the partitions.
00176            We want to compute the mean afterward. */
00177         meandistval += maxdistval;
00178       }
00179     }
00180     /* Compute the mean of the distances between each corresponding clusters for the comparison of two partitions. */
00181     meandistval = meandistval / (double) (npart-1);
00182     /* We want to keep the partition which has the minimum distance to all other partitions. */
00183     if (meandistval < min_meandistval) {
00184       min_meandistval = meandistval;
00185       min_partition = part1;
00186     }
00187   }
00188 
00189   if (min_partition == -1) {
00190     /* Failing algorithm */
00191     (void) fprintf(stderr, "best_clusters: ABORT: Error in algorithm. Cannot find best partition!\n");
00192     (void) abort();
00193   }
00194 
00195   /* Save data for the best selected partition of clusters. */
00196   (void) fprintf(stdout, "%s:: Save best partition of clusters.\n", __FILE__);
00197   for (clust=0; clust<ncluster; clust++)
00198     for (eof=0; eof<neof; eof++)
00199       best_clusters[eof+clust*neof] = testclusters[min_partition+eof*npart+clust*npart*neof];  
00200 
00201   /* Free memory. */
00202   (void) free(tmpcluster);
00203   (void) free(testclusters);
00204 
00205   (void) fprintf(stdout, "%s:: END: Find the best partition of clusters. Partition %d selected.\n", __FILE__, min_partition);
00206 
00207   return niter_min;
00208 }


Generated on 12 May 2016 for DSCLIM by  doxygen 1.6.1