You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
			
				
					219 lines
				
				6.5 KiB
			
		
		
			
		
	
	
					219 lines
				
				6.5 KiB
			| 
											6 years ago
										 | //
 | ||
|  | // C++ standalone verion of fastcluster by Daniel Müllner
 | ||
|  | //
 | ||
|  | // Copyright: Christoph Dalitz, 2018
 | ||
|  | //            Daniel Müllner, 2011
 | ||
|  | // License:   BSD style license
 | ||
|  | //            (see the file LICENSE for details)
 | ||
|  | //
 | ||
|  | 
 | ||
|  | 
 | ||
|  | #include <vector>
 | ||
|  | #include <algorithm>
 | ||
|  | #include <cmath>
 | ||
|  | 
 | ||
|  | 
 | ||
|  | extern "C" {
 | ||
|  | #include "fastcluster.h"
 | ||
|  | }
 | ||
|  | 
 | ||
|  | // Code by Daniel Müllner
 | ||
|  | // workaround to make it usable as a standalone version (without R)
 | ||
|  | bool fc_isnan(double x) { return false; }
 | ||
|  | #include "fastcluster_dm.cpp"
 | ||
|  | #include "fastcluster_R_dm.cpp"
 | ||
|  | 
 | ||
|  | extern "C" {
 | ||
|  | //
 | ||
|  | // Assigns cluster labels (0, ..., nclust-1) to the n points such
 | ||
|  | // that the cluster result is split into nclust clusters.
 | ||
|  | //
 | ||
|  | // Input arguments:
 | ||
|  | //   n      = number of observables
 | ||
|  | //   merge  = clustering result in R format
 | ||
|  | //   nclust = number of clusters
 | ||
|  | // Output arguments:
 | ||
|  | //   labels = allocated integer array of size n for result
 | ||
|  | //
 | ||
|  |   void cutree_k(int n, const int* merge, int nclust, int* labels) {
 | ||
|  | 
 | ||
|  |     int k,m1,m2,j,l;
 | ||
|  | 
 | ||
|  |     if (nclust > n || nclust < 2) {
 | ||
|  |       for (j=0; j<n; j++) labels[j] = 0;
 | ||
|  |       return;
 | ||
|  |     }
 | ||
|  | 
 | ||
|  |     // assign to each observable the number of its last merge step
 | ||
|  |     // beware: indices of observables in merge start at 1 (R convention)
 | ||
|  |     std::vector<int> last_merge(n, 0);
 | ||
|  |     for (k=1; k<=(n-nclust); k++) {
 | ||
|  |       // (m1,m2) = merge[k,]
 | ||
|  |       m1 = merge[k-1];
 | ||
|  |       m2 = merge[n-1+k-1];
 | ||
|  |       if (m1 < 0 && m2 < 0) { // both single observables
 | ||
|  |         last_merge[-m1-1] = last_merge[-m2-1] = k;
 | ||
|  |       }
 | ||
|  |       else if (m1 < 0 || m2 < 0) { // one is a cluster
 | ||
|  |         if(m1 < 0) { j = -m1; m1 = m2; } else j = -m2;
 | ||
|  |         // merging single observable and cluster
 | ||
|  |         for(l = 0; l < n; l++)
 | ||
|  |           if (last_merge[l] == m1)
 | ||
|  |             last_merge[l] = k;
 | ||
|  |         last_merge[j-1] = k;
 | ||
|  |       }
 | ||
|  |       else { // both cluster
 | ||
|  |         for(l=0; l < n; l++) {
 | ||
|  |           if( last_merge[l] == m1 || last_merge[l] == m2 )
 | ||
|  |             last_merge[l] = k;
 | ||
|  |         }
 | ||
|  |       }
 | ||
|  |     }
 | ||
|  | 
 | ||
|  |     // assign cluster labels
 | ||
|  |     int label = 0;
 | ||
|  |     std::vector<int> z(n,-1);
 | ||
|  |     for (j=0; j<n; j++) {
 | ||
|  |       if (last_merge[j] == 0) { // still singleton
 | ||
|  |         labels[j] = label++;
 | ||
|  |       } else {
 | ||
|  |         if (z[last_merge[j]] < 0) {
 | ||
|  |           z[last_merge[j]] = label++;
 | ||
|  |         }
 | ||
|  |         labels[j] = z[last_merge[j]];
 | ||
|  |       }
 | ||
|  |     }
 | ||
|  |   }
 | ||
|  | 
 | ||
|  |   //
 | ||
|  |   // Assigns cluster labels (0, ..., nclust-1) to the n points such
 | ||
|  |   // that the hierarchical clustering is stopped when cluster distance >= cdist
 | ||
|  |   //
 | ||
|  |   // Input arguments:
 | ||
|  |   //   n      = number of observables
 | ||
|  |   //   merge  = clustering result in R format
 | ||
|  |   //   height = cluster distance at each merge step
 | ||
|  |   //   cdist  = cutoff cluster distance
 | ||
|  |   // Output arguments:
 | ||
|  |   //   labels = allocated integer array of size n for result
 | ||
|  |   //
 | ||
|  |   void cutree_cdist(int n, const int* merge, double* height, double cdist, int* labels) {
 | ||
|  | 
 | ||
|  |     int k;
 | ||
|  | 
 | ||
|  |     for (k=0; k<(n-1); k++) {
 | ||
|  |       if (height[k] >= cdist) {
 | ||
|  |         break;
 | ||
|  |       }
 | ||
|  |     }
 | ||
|  |     cutree_k(n, merge, n-k, labels);
 | ||
|  |   }
 | ||
|  | 
 | ||
|  | 
 | ||
|  |   //
 | ||
|  |   // Hierarchical clustering with one of Daniel Muellner's fast algorithms
 | ||
|  |   //
 | ||
|  |   // Input arguments:
 | ||
|  |   //   n       = number of observables
 | ||
|  |   //   distmat = condensed distance matrix, i.e. an n*(n-1)/2 array representing
 | ||
|  |   //             the upper triangle (without diagonal elements) of the distance
 | ||
|  |   //             matrix, e.g. for n=4:
 | ||
|  |   //               d00 d01 d02 d03
 | ||
|  |   //               d10 d11 d12 d13   ->  d01 d02 d03 d12 d13 d23
 | ||
|  |   //               d20 d21 d22 d23
 | ||
|  |   //               d30 d31 d32 d33
 | ||
|  |   //   method  = cluster metric (see enum method_code)
 | ||
|  |   // Output arguments:
 | ||
|  |   //   merge   = allocated (n-1)x2 matrix (2*(n-1) array) for storing result.
 | ||
|  |   //             Result follows R hclust convention:
 | ||
|  |   //              - observabe indices start with one
 | ||
|  |   //              - merge[i][] contains the merged nodes in step i
 | ||
|  |   //              - merge[i][j] is negative when the node is an atom
 | ||
|  |   //   height  = allocated (n-1) array with distances at each merge step
 | ||
|  |   // Return code:
 | ||
|  |   //   0 = ok
 | ||
|  |   //   1 = invalid method
 | ||
|  |   //
 | ||
|  |   int hclust_fast(int n, double* distmat, int method, int* merge, double* height) {
 | ||
|  | 
 | ||
|  |     // call appropriate culstering function
 | ||
|  |     cluster_result Z2(n-1);
 | ||
|  |     if (method == HCLUST_METHOD_SINGLE) {
 | ||
|  |       // single link
 | ||
|  |       MST_linkage_core(n, distmat, Z2);
 | ||
|  |     }
 | ||
|  |     else if (method == HCLUST_METHOD_COMPLETE) {
 | ||
|  |       // complete link
 | ||
|  |       NN_chain_core<METHOD_METR_COMPLETE, t_float>(n, distmat, NULL, Z2);
 | ||
|  |     }
 | ||
|  |     else if (method == HCLUST_METHOD_AVERAGE) {
 | ||
|  |       // best average distance
 | ||
|  |       double* members = new double[n];
 | ||
|  |       for (int i=0; i<n; i++) members[i] = 1;
 | ||
|  |       NN_chain_core<METHOD_METR_AVERAGE, t_float>(n, distmat, members, Z2);
 | ||
|  |       delete[] members;
 | ||
|  |     }
 | ||
|  |     else if (method == HCLUST_METHOD_MEDIAN) {
 | ||
|  |       // best median distance (beware: O(n^3))
 | ||
|  |       generic_linkage<METHOD_METR_MEDIAN, t_float>(n, distmat, NULL, Z2);
 | ||
|  |     }
 | ||
|  |     else if (method == HCLUST_METHOD_CENTROID) {
 | ||
|  |       // best centroid distance (beware: O(n^3))
 | ||
|  |       double* members = new double[n];
 | ||
|  |       for (int i=0; i<n; i++) members[i] = 1;
 | ||
|  |       generic_linkage<METHOD_METR_CENTROID, t_float>(n, distmat, members, Z2);
 | ||
|  |       delete[] members;
 | ||
|  |     }
 | ||
|  |     else {
 | ||
|  |       return 1;
 | ||
|  |     }
 | ||
|  | 
 | ||
|  |     int* order = new int[n];
 | ||
|  |     if (method == HCLUST_METHOD_MEDIAN || method == HCLUST_METHOD_CENTROID) {
 | ||
|  |       generate_R_dendrogram<true>(merge, height, order, Z2, n);
 | ||
|  |     } else {
 | ||
|  |       generate_R_dendrogram<false>(merge, height, order, Z2, n);
 | ||
|  |     }
 | ||
|  |     delete[] order; // only needed for visualization
 | ||
|  | 
 | ||
|  |     return 0;
 | ||
|  |   }
 | ||
|  | 
 | ||
|  | 
 | ||
|  |   // Build condensed distance matrix
 | ||
|  |   // Input arguments:
 | ||
|  |   //   n  = number of observables
 | ||
|  |   //   m  = dimension of observable
 | ||
|  |   // Output arguments:
 | ||
|  |   //   out = allocated integer array of size n * (n - 1) / 2 for result
 | ||
|  |   void hclust_pdist(int n, int m, double* pts, double* out) {
 | ||
|  |     int ii = 0;
 | ||
| 
											4 years ago
										 |     for (int i = 0; i < n; i++) {
 | ||
|  |       for (int j = i + 1; j < n; j++) {
 | ||
| 
											6 years ago
										 |         // Compute euclidian distance
 | ||
|  |         double d = 0;
 | ||
| 
											4 years ago
										 |         for (int k = 0; k < m; k ++) {
 | ||
| 
											6 years ago
										 |           double error = pts[i * m + k] - pts[j * m + k];
 | ||
|  |           d += (error * error);
 | ||
|  |         }
 | ||
|  |         out[ii] = d;//sqrt(d);
 | ||
|  |         ii++;
 | ||
|  |       }
 | ||
|  |     }
 | ||
|  |   }
 | ||
|  | 
 | ||
|  |   void cluster_points_centroid(int n, int m, double* pts, double dist, int* idx) {
 | ||
|  |     double* pdist = new double[n * (n - 1) / 2];
 | ||
|  |     int* merge = new int[2 * (n - 1)];
 | ||
|  |     double* height = new double[n - 1];
 | ||
|  | 
 | ||
|  |     hclust_pdist(n, m, pts, pdist);
 | ||
|  |     hclust_fast(n, pdist, HCLUST_METHOD_CENTROID, merge, height);
 | ||
|  |     cutree_cdist(n, merge, height, dist, idx);
 | ||
|  | 
 | ||
|  |     delete[] pdist;
 | ||
|  |     delete[] merge;
 | ||
|  |     delete[] height;
 | ||
|  |   }
 | ||
|  | }
 |