remove fastcluster (#29352)
* remove fastcluster * lock * rm there * and from release filespull/29353/head^2
parent
0ced56b2ea
commit
00a11a1a2b
16 changed files with 1 additions and 2574 deletions
@ -1,140 +0,0 @@ |
||||
# pylint: skip-file |
||||
|
||||
import time |
||||
import unittest |
||||
import numpy as np |
||||
from fastcluster import linkage_vector |
||||
from scipy.cluster import _hierarchy |
||||
from scipy.spatial.distance import pdist |
||||
|
||||
from third_party.cluster.fastcluster_py import hclust, ffi |
||||
from third_party.cluster.fastcluster_py import cluster_points_centroid |
||||
|
||||
|
||||
def fcluster(Z, t, criterion='inconsistent', depth=2, R=None, monocrit=None): |
||||
# supersimplified function to get fast clustering. Got it from scipy |
||||
Z = np.asarray(Z, order='c') |
||||
n = Z.shape[0] + 1 |
||||
T = np.zeros((n,), dtype='i') |
||||
_hierarchy.cluster_dist(Z, T, float(t), int(n)) |
||||
return T |
||||
|
||||
|
||||
TRACK_PTS = np.array([[59.26000137, -9.35999966, -5.42500019], |
||||
[91.61999817, -0.31999999, -2.75], |
||||
[31.38000031, 0.40000001, -0.2], |
||||
[89.57999725, -8.07999992, -18.04999924], |
||||
[53.42000122, 0.63999999, -0.175], |
||||
[31.38000031, 0.47999999, -0.2], |
||||
[36.33999939, 0.16, -0.2], |
||||
[53.33999939, 0.95999998, -0.175], |
||||
[59.26000137, -9.76000023, -5.44999981], |
||||
[33.93999977, 0.40000001, -0.22499999], |
||||
[106.74000092, -5.76000023, -18.04999924]]) |
||||
|
||||
CORRECT_LINK = np.array([[2., 5., 0.07999998, 2.], |
||||
[4., 7., 0.32984889, 2.], |
||||
[0., 8., 0.40078104, 2.], |
||||
[6., 9., 2.41209933, 2.], |
||||
[11., 14., 3.76342275, 4.], |
||||
[12., 13., 13.02297651, 4.], |
||||
[1., 3., 17.27626057, 2.], |
||||
[10., 17., 17.92918845, 3.], |
||||
[15., 16., 23.68525366, 8.], |
||||
[18., 19., 52.52351319, 11.]]) |
||||
|
||||
CORRECT_LABELS = np.array([7, 1, 4, 2, 6, 4, 5, 6, 7, 5, 3], dtype=np.int32) |
||||
|
||||
|
||||
def plot_cluster(pts, idx_old, idx_new): |
||||
import matplotlib.pyplot as plt |
||||
m = 'Set1' |
||||
|
||||
plt.figure() |
||||
plt.subplot(1, 2, 1) |
||||
plt.scatter(pts[:, 0], pts[:, 1], c=idx_old, cmap=m) |
||||
plt.title("Old") |
||||
plt.colorbar() |
||||
plt.subplot(1, 2, 2) |
||||
plt.scatter(pts[:, 0], pts[:, 1], c=idx_new, cmap=m) |
||||
plt.title("New") |
||||
plt.colorbar() |
||||
|
||||
plt.show() |
||||
|
||||
|
||||
def same_clusters(correct, other): |
||||
correct = np.asarray(correct) |
||||
other = np.asarray(other) |
||||
if len(correct) != len(other): |
||||
return False |
||||
|
||||
for i in range(len(correct)): |
||||
c = np.where(correct == correct[i]) |
||||
o = np.where(other == other[i]) |
||||
if not np.array_equal(c, o): |
||||
return False |
||||
return True |
||||
|
||||
|
||||
class TestClustering(unittest.TestCase): |
||||
def test_scipy_clustering(self): |
||||
old_link = linkage_vector(TRACK_PTS, method='centroid') |
||||
old_cluster_idxs = fcluster(old_link, 2.5, criterion='distance') |
||||
|
||||
np.testing.assert_allclose(old_link, CORRECT_LINK) |
||||
np.testing.assert_allclose(old_cluster_idxs, CORRECT_LABELS) |
||||
|
||||
def test_pdist(self): |
||||
pts = np.ascontiguousarray(TRACK_PTS, dtype=np.float64) |
||||
pts_ptr = ffi.cast("double *", pts.ctypes.data) |
||||
|
||||
n, m = pts.shape |
||||
out = np.zeros((n * (n - 1) // 2, ), dtype=np.float64) |
||||
out_ptr = ffi.cast("double *", out.ctypes.data) |
||||
hclust.hclust_pdist(n, m, pts_ptr, out_ptr) |
||||
|
||||
np.testing.assert_allclose(out, np.power(pdist(TRACK_PTS), 2)) |
||||
|
||||
def test_cpp_clustering(self): |
||||
pts = np.ascontiguousarray(TRACK_PTS, dtype=np.float64) |
||||
pts_ptr = ffi.cast("double *", pts.ctypes.data) |
||||
n, m = pts.shape |
||||
|
||||
labels = np.zeros((n, ), dtype=np.int32) |
||||
labels_ptr = ffi.cast("int *", labels.ctypes.data) |
||||
hclust.cluster_points_centroid(n, m, pts_ptr, 2.5**2, labels_ptr) |
||||
self.assertTrue(same_clusters(CORRECT_LABELS, labels)) |
||||
|
||||
def test_cpp_wrapper_clustering(self): |
||||
labels = cluster_points_centroid(TRACK_PTS, 2.5) |
||||
self.assertTrue(same_clusters(CORRECT_LABELS, labels)) |
||||
|
||||
def test_random_cluster(self): |
||||
np.random.seed(1337) |
||||
N = 1000 |
||||
|
||||
t_old = 0. |
||||
t_new = 0. |
||||
|
||||
for _ in range(N): |
||||
n = int(np.random.uniform(2, 32)) |
||||
x = np.random.uniform(-10, 50, (n, 1)) |
||||
y = np.random.uniform(-5, 5, (n, 1)) |
||||
vrel = np.random.uniform(-5, 5, (n, 1)) |
||||
pts = np.hstack([x, y, vrel]) |
||||
|
||||
t = time.time() |
||||
old_link = linkage_vector(pts, method='centroid') |
||||
old_cluster_idx = fcluster(old_link, 2.5, criterion='distance') |
||||
t_old += time.time() - t |
||||
|
||||
t = time.time() |
||||
cluster_idx = cluster_points_centroid(pts, 2.5) |
||||
t_new += time.time() - t |
||||
|
||||
self.assertTrue(same_clusters(old_cluster_idx, cluster_idx)) |
||||
|
||||
|
||||
if __name__ == "__main__": |
||||
unittest.main() |
@ -1 +0,0 @@ |
||||
test |
@ -1,79 +0,0 @@ |
||||
C++ interface to fast hierarchical clustering algorithms |
||||
======================================================== |
||||
|
||||
This is a simplified C++ interface to fast implementations of hierarchical |
||||
clustering by Daniel Müllner. The original library with interfaces to R |
||||
and Python is described in: |
||||
|
||||
Daniel Müllner: "fastcluster: Fast Hierarchical, Agglomerative Clustering |
||||
Routines for R and Python." Journal of Statistical Software 53 (2013), |
||||
no. 9, pp. 1–18, http://www.jstatsoft.org/v53/i09/ |
||||
|
||||
|
||||
Usage of the library |
||||
-------------------- |
||||
|
||||
For using the library, the following source files are needed: |
||||
|
||||
fastcluster_dm.cpp, fastcluster_R_dm.cpp |
||||
original code by Daniel Müllner |
||||
these are included by fastcluster.cpp via #include, and therefore |
||||
need not be compiled to object code |
||||
|
||||
fastcluster.[h|cpp] |
||||
simplified C++ interface |
||||
fastcluster.cpp is the only file that must be compiled |
||||
|
||||
The library provides the clustering function *hclust_fast* for |
||||
creating the dendrogram information in an encoding as used by the |
||||
R function *hclust*. For a description of the parameters, see fastcluster.h. |
||||
Its parameter *method* can be one of |
||||
|
||||
HCLUST_METHOD_SINGLE |
||||
single link with the minimum spanning tree algorithm (Rohlf, 1973) |
||||
|
||||
HHCLUST_METHOD_COMPLETE |
||||
complete link with the nearest-neighbor-chain algorithm (Murtagh, 1984) |
||||
|
||||
HCLUST_METHOD_AVERAGE |
||||
complete link with the nearest-neighbor-chain algorithm (Murtagh, 1984) |
||||
|
||||
HCLUST_METHOD_MEDIAN |
||||
median link with the generic algorithm (Müllner, 2011) |
||||
|
||||
For splitting the dendrogram into clusters, the two functions *cutree_k* |
||||
and *cutree_cdist* are provided. |
||||
|
||||
Note that output parameters must be allocated beforehand, e.g. |
||||
int* merge = new int[2*(npoints-1)]; |
||||
For a complete usage example, see lines 135-142 of demo.cpp. |
||||
|
||||
|
||||
Demonstration program |
||||
--------------------- |
||||
|
||||
A simple demo is implemented in demo.cpp, which can be compiled and run with |
||||
|
||||
make |
||||
./hclust-demo -m complete lines.csv |
||||
|
||||
It creates two clusters of line segments such that the segment angle between |
||||
line segments of different clusters have a maximum (cosine) dissimilarity. |
||||
For visualizing the result, plotresult.r can be used as follows |
||||
(requires R <https://r-project.org> to be installed): |
||||
|
||||
./hclust-demo -m complete lines.csv | Rscript plotresult.r |
||||
|
||||
|
||||
Authors & Copyright |
||||
------------------- |
||||
|
||||
Daniel Müllner, 2011, <http://danifold.net> |
||||
Christoph Dalitz, 2018, <http://www.hsnr.de/ipattern/> |
||||
|
||||
|
||||
License |
||||
------- |
||||
|
||||
This code is provided under a BSD-style license. |
||||
See the file LICENSE for details. |
@ -1,8 +0,0 @@ |
||||
Import('env') |
||||
|
||||
fc = env.SharedLibrary("fastcluster", "fastcluster.cpp") |
||||
|
||||
# TODO: how do I gate on test |
||||
#env.Program("test", ["test.cpp"], LIBS=[fc]) |
||||
#valgrind --leak-check=full ./test |
||||
|
@ -1,218 +0,0 @@ |
||||
//
|
||||
// C++ standalone verion of fastcluster by Daniel Müllner
|
||||
//
|
||||
// Copyright: Christoph Dalitz, 2018
|
||||
// Daniel Müllner, 2011
|
||||
// License: BSD style license
|
||||
// (see the file LICENSE for details)
|
||||
//
|
||||
|
||||
|
||||
#include <vector> |
||||
#include <algorithm> |
||||
#include <cmath> |
||||
|
||||
|
||||
extern "C" { |
||||
#include "fastcluster.h" |
||||
} |
||||
|
||||
// Code by Daniel Müllner
|
||||
// workaround to make it usable as a standalone version (without R)
|
||||
bool fc_isnan(double x) { return false; } |
||||
#include "fastcluster_dm.cpp" |
||||
#include "fastcluster_R_dm.cpp" |
||||
|
||||
extern "C" { |
||||
//
|
||||
// Assigns cluster labels (0, ..., nclust-1) to the n points such
|
||||
// that the cluster result is split into nclust clusters.
|
||||
//
|
||||
// Input arguments:
|
||||
// n = number of observables
|
||||
// merge = clustering result in R format
|
||||
// nclust = number of clusters
|
||||
// Output arguments:
|
||||
// labels = allocated integer array of size n for result
|
||||
//
|
||||
void cutree_k(int n, const int* merge, int nclust, int* labels) { |
||||
|
||||
int k,m1,m2,j,l; |
||||
|
||||
if (nclust > n || nclust < 2) { |
||||
for (j=0; j<n; j++) labels[j] = 0; |
||||
return; |
||||
} |
||||
|
||||
// assign to each observable the number of its last merge step
|
||||
// beware: indices of observables in merge start at 1 (R convention)
|
||||
std::vector<int> last_merge(n, 0); |
||||
for (k=1; k<=(n-nclust); k++) { |
||||
// (m1,m2) = merge[k,]
|
||||
m1 = merge[k-1]; |
||||
m2 = merge[n-1+k-1]; |
||||
if (m1 < 0 && m2 < 0) { // both single observables
|
||||
last_merge[-m1-1] = last_merge[-m2-1] = k; |
||||
} |
||||
else if (m1 < 0 || m2 < 0) { // one is a cluster
|
||||
if(m1 < 0) { j = -m1; m1 = m2; } else j = -m2; |
||||
// merging single observable and cluster
|
||||
for(l = 0; l < n; l++) |
||||
if (last_merge[l] == m1) |
||||
last_merge[l] = k; |
||||
last_merge[j-1] = k; |
||||
} |
||||
else { // both cluster
|
||||
for(l=0; l < n; l++) { |
||||
if( last_merge[l] == m1 || last_merge[l] == m2 ) |
||||
last_merge[l] = k; |
||||
} |
||||
} |
||||
} |
||||
|
||||
// assign cluster labels
|
||||
int label = 0; |
||||
std::vector<int> z(n,-1); |
||||
for (j=0; j<n; j++) { |
||||
if (last_merge[j] == 0) { // still singleton
|
||||
labels[j] = label++; |
||||
} else { |
||||
if (z[last_merge[j]] < 0) { |
||||
z[last_merge[j]] = label++; |
||||
} |
||||
labels[j] = z[last_merge[j]]; |
||||
} |
||||
} |
||||
} |
||||
|
||||
//
|
||||
// Assigns cluster labels (0, ..., nclust-1) to the n points such
|
||||
// that the hierarchical clustering is stopped when cluster distance >= cdist
|
||||
//
|
||||
// Input arguments:
|
||||
// n = number of observables
|
||||
// merge = clustering result in R format
|
||||
// height = cluster distance at each merge step
|
||||
// cdist = cutoff cluster distance
|
||||
// Output arguments:
|
||||
// labels = allocated integer array of size n for result
|
||||
//
|
||||
void cutree_cdist(int n, const int* merge, double* height, double cdist, int* labels) { |
||||
|
||||
int k; |
||||
|
||||
for (k=0; k<(n-1); k++) { |
||||
if (height[k] >= cdist) { |
||||
break; |
||||
} |
||||
} |
||||
cutree_k(n, merge, n-k, labels); |
||||
} |
||||
|
||||
|
||||
//
|
||||
// Hierarchical clustering with one of Daniel Muellner's fast algorithms
|
||||
//
|
||||
// Input arguments:
|
||||
// n = number of observables
|
||||
// distmat = condensed distance matrix, i.e. an n*(n-1)/2 array representing
|
||||
// the upper triangle (without diagonal elements) of the distance
|
||||
// matrix, e.g. for n=4:
|
||||
// d00 d01 d02 d03
|
||||
// d10 d11 d12 d13 -> d01 d02 d03 d12 d13 d23
|
||||
// d20 d21 d22 d23
|
||||
// d30 d31 d32 d33
|
||||
// method = cluster metric (see enum method_code)
|
||||
// Output arguments:
|
||||
// merge = allocated (n-1)x2 matrix (2*(n-1) array) for storing result.
|
||||
// Result follows R hclust convention:
|
||||
// - observabe indices start with one
|
||||
// - merge[i][] contains the merged nodes in step i
|
||||
// - merge[i][j] is negative when the node is an atom
|
||||
// height = allocated (n-1) array with distances at each merge step
|
||||
// Return code:
|
||||
// 0 = ok
|
||||
// 1 = invalid method
|
||||
//
|
||||
int hclust_fast(int n, double* distmat, int method, int* merge, double* height) { |
||||
|
||||
// call appropriate culstering function
|
||||
cluster_result Z2(n-1); |
||||
if (method == HCLUST_METHOD_SINGLE) { |
||||
// single link
|
||||
MST_linkage_core(n, distmat, Z2); |
||||
} |
||||
else if (method == HCLUST_METHOD_COMPLETE) { |
||||
// complete link
|
||||
NN_chain_core<METHOD_METR_COMPLETE, t_float>(n, distmat, NULL, Z2); |
||||
} |
||||
else if (method == HCLUST_METHOD_AVERAGE) { |
||||
// best average distance
|
||||
double* members = new double[n]; |
||||
for (int i=0; i<n; i++) members[i] = 1; |
||||
NN_chain_core<METHOD_METR_AVERAGE, t_float>(n, distmat, members, Z2); |
||||
delete[] members; |
||||
} |
||||
else if (method == HCLUST_METHOD_MEDIAN) { |
||||
// best median distance (beware: O(n^3))
|
||||
generic_linkage<METHOD_METR_MEDIAN, t_float>(n, distmat, NULL, Z2); |
||||
} |
||||
else if (method == HCLUST_METHOD_CENTROID) { |
||||
// best centroid distance (beware: O(n^3))
|
||||
double* members = new double[n]; |
||||
for (int i=0; i<n; i++) members[i] = 1; |
||||
generic_linkage<METHOD_METR_CENTROID, t_float>(n, distmat, members, Z2); |
||||
delete[] members; |
||||
} |
||||
else { |
||||
return 1; |
||||
} |
||||
|
||||
int* order = new int[n]; |
||||
if (method == HCLUST_METHOD_MEDIAN || method == HCLUST_METHOD_CENTROID) { |
||||
generate_R_dendrogram<true>(merge, height, order, Z2, n); |
||||
} else { |
||||
generate_R_dendrogram<false>(merge, height, order, Z2, n); |
||||
} |
||||
delete[] order; // only needed for visualization
|
||||
|
||||
return 0; |
||||
} |
||||
|
||||
|
||||
// Build condensed distance matrix
|
||||
// Input arguments:
|
||||
// n = number of observables
|
||||
// m = dimension of observable
|
||||
// Output arguments:
|
||||
// out = allocated integer array of size n * (n - 1) / 2 for result
|
||||
void hclust_pdist(int n, int m, double* pts, double* out) { |
||||
int ii = 0; |
||||
for (int i = 0; i < n; i++) { |
||||
for (int j = i + 1; j < n; j++) { |
||||
// Compute euclidian distance
|
||||
double d = 0; |
||||
for (int k = 0; k < m; k ++) { |
||||
double error = pts[i * m + k] - pts[j * m + k]; |
||||
d += (error * error); |
||||
} |
||||
out[ii] = d;//sqrt(d);
|
||||
ii++; |
||||
} |
||||
} |
||||
} |
||||
|
||||
void cluster_points_centroid(int n, int m, double* pts, double dist, int* idx) { |
||||
double* pdist = new double[n * (n - 1) / 2]; |
||||
int* merge = new int[2 * (n - 1)]; |
||||
double* height = new double[n - 1]; |
||||
|
||||
hclust_pdist(n, m, pts, pdist); |
||||
hclust_fast(n, pdist, HCLUST_METHOD_CENTROID, merge, height); |
||||
cutree_cdist(n, merge, height, dist, idx); |
||||
|
||||
delete[] pdist; |
||||
delete[] merge; |
||||
delete[] height; |
||||
} |
||||
} |
@ -1,77 +0,0 @@ |
||||
//
|
||||
// C++ standalone verion of fastcluster by Daniel Muellner
|
||||
//
|
||||
// Copyright: Daniel Muellner, 2011
|
||||
// Christoph Dalitz, 2018
|
||||
// License: BSD style license
|
||||
// (see the file LICENSE for details)
|
||||
//
|
||||
|
||||
#ifndef fastclustercpp_H |
||||
#define fastclustercpp_H |
||||
|
||||
//
|
||||
// Assigns cluster labels (0, ..., nclust-1) to the n points such
|
||||
// that the cluster result is split into nclust clusters.
|
||||
//
|
||||
// Input arguments:
|
||||
// n = number of observables
|
||||
// merge = clustering result in R format
|
||||
// nclust = number of clusters
|
||||
// Output arguments:
|
||||
// labels = allocated integer array of size n for result
|
||||
//
|
||||
void cutree_k(int n, const int* merge, int nclust, int* labels); |
||||
|
||||
//
|
||||
// Assigns cluster labels (0, ..., nclust-1) to the n points such
|
||||
// that the hierarchical clsutering is stopped at cluster distance cdist
|
||||
//
|
||||
// Input arguments:
|
||||
// n = number of observables
|
||||
// merge = clustering result in R format
|
||||
// height = cluster distance at each merge step
|
||||
// cdist = cutoff cluster distance
|
||||
// Output arguments:
|
||||
// labels = allocated integer array of size n for result
|
||||
//
|
||||
void cutree_cdist(int n, const int* merge, double* height, double cdist, int* labels); |
||||
|
||||
//
|
||||
// Hierarchical clustering with one of Daniel Muellner's fast algorithms
|
||||
//
|
||||
// Input arguments:
|
||||
// n = number of observables
|
||||
// distmat = condensed distance matrix, i.e. an n*(n-1)/2 array representing
|
||||
// the upper triangle (without diagonal elements) of the distance
|
||||
// matrix, e.g. for n=4:
|
||||
// d00 d01 d02 d03
|
||||
// d10 d11 d12 d13 -> d01 d02 d03 d12 d13 d23
|
||||
// d20 d21 d22 d23
|
||||
// d30 d31 d32 d33
|
||||
// method = cluster metric (see enum method_code)
|
||||
// Output arguments:
|
||||
// merge = allocated (n-1)x2 matrix (2*(n-1) array) for storing result.
|
||||
// Result follows R hclust convention:
|
||||
// - observabe indices start with one
|
||||
// - merge[i][] contains the merged nodes in step i
|
||||
// - merge[i][j] is negative when the node is an atom
|
||||
// height = allocated (n-1) array with distances at each merge step
|
||||
// Return code:
|
||||
// 0 = ok
|
||||
// 1 = invalid method
|
||||
//
|
||||
int hclust_fast(int n, double* distmat, int method, int* merge, double* height); |
||||
enum hclust_fast_methods { |
||||
HCLUST_METHOD_SINGLE = 0, |
||||
HCLUST_METHOD_COMPLETE = 1, |
||||
HCLUST_METHOD_AVERAGE = 2, |
||||
HCLUST_METHOD_MEDIAN = 3, |
||||
HCLUST_METHOD_CENTROID = 5, |
||||
}; |
||||
|
||||
void hclust_pdist(int n, int m, double* pts, double* out); |
||||
void cluster_points_centroid(int n, int m, double* pts, double dist, int* idx); |
||||
|
||||
|
||||
#endif |
@ -1,115 +0,0 @@ |
||||
//
|
||||
// Excerpt from fastcluster_R.cpp
|
||||
//
|
||||
// Copyright: Daniel Müllner, 2011 <http://danifold.net>
|
||||
//
|
||||
|
||||
struct pos_node { |
||||
t_index pos; |
||||
int node; |
||||
}; |
||||
|
||||
void order_nodes(const int N, const int * const merge, const t_index * const node_size, int * const order) { |
||||
/* Parameters:
|
||||
N : number of data points |
||||
merge : (N-1)×2 array which specifies the node indices which are |
||||
merged in each step of the clustering procedure. |
||||
Negative entries -1...-N point to singleton nodes, while |
||||
positive entries 1...(N-1) point to nodes which are themselves |
||||
parents of other nodes. |
||||
node_size : array of node sizes - makes it easier |
||||
order : output array of size N |
||||
|
||||
Runtime: Θ(N) |
||||
*/ |
||||
auto_array_ptr<pos_node> queue(N/2); |
||||
|
||||
int parent; |
||||
int child; |
||||
t_index pos = 0; |
||||
|
||||
queue[0].pos = 0; |
||||
queue[0].node = N-2; |
||||
t_index idx = 1; |
||||
|
||||
do { |
||||
--idx; |
||||
pos = queue[idx].pos; |
||||
parent = queue[idx].node; |
||||
|
||||
// First child
|
||||
child = merge[parent]; |
||||
if (child<0) { // singleton node, write this into the 'order' array.
|
||||
order[pos] = -child; |
||||
++pos; |
||||
} |
||||
else { /* compound node: put it on top of the queue and decompose it
|
||||
in a later iteration. */ |
||||
queue[idx].pos = pos; |
||||
queue[idx].node = child-1; // convert index-1 based to index-0 based
|
||||
++idx; |
||||
pos += node_size[child-1]; |
||||
} |
||||
// Second child
|
||||
child = merge[parent+N-1]; |
||||
if (child<0) { |
||||
order[pos] = -child; |
||||
} |
||||
else { |
||||
queue[idx].pos = pos; |
||||
queue[idx].node = child-1; |
||||
++idx; |
||||
} |
||||
} while (idx>0); |
||||
} |
||||
|
||||
#define size_(r_) ( ((r_<N) ? 1 : node_size[r_-N]) ) |
||||
|
||||
template <const bool sorted> |
||||
void generate_R_dendrogram(int * const merge, double * const height, int * const order, cluster_result & Z2, const int N) { |
||||
// The array "nodes" is a union-find data structure for the cluster
|
||||
// identites (only needed for unsorted cluster_result input).
|
||||
union_find nodes(sorted ? 0 : N); |
||||
if (!sorted) { |
||||
std::stable_sort(Z2[0], Z2[N-1]); |
||||
} |
||||
|
||||
t_index node1, node2; |
||||
auto_array_ptr<t_index> node_size(N-1); |
||||
|
||||
for (t_index i=0; i<N-1; ++i) { |
||||
// Get two data points whose clusters are merged in step i.
|
||||
// Find the cluster identifiers for these points.
|
||||
if (sorted) { |
||||
node1 = Z2[i]->node1; |
||||
node2 = Z2[i]->node2; |
||||
} |
||||
else { |
||||
node1 = nodes.Find(Z2[i]->node1); |
||||
node2 = nodes.Find(Z2[i]->node2); |
||||
// Merge the nodes in the union-find data structure by making them
|
||||
// children of a new node.
|
||||
nodes.Union(node1, node2); |
||||
} |
||||
// Sort the nodes in the output array.
|
||||
if (node1>node2) { |
||||
t_index tmp = node1; |
||||
node1 = node2; |
||||
node2 = tmp; |
||||
} |
||||
/* Conversion between labeling conventions.
|
||||
Input: singleton nodes 0,...,N-1 |
||||
compound nodes N,...,2N-2 |
||||
Output: singleton nodes -1,...,-N |
||||
compound nodes 1,...,N |
||||
*/ |
||||
merge[i] = (node1<N) ? -static_cast<int>(node1)-1 |
||||
: static_cast<int>(node1)-N+1; |
||||
merge[i+N-1] = (node2<N) ? -static_cast<int>(node2)-1 |
||||
: static_cast<int>(node2)-N+1; |
||||
height[i] = Z2[i]->dist; |
||||
node_size[i] = size_(node1) + size_(node2); |
||||
} |
||||
|
||||
order_nodes(N, merge, node_size, order); |
||||
} |
File diff suppressed because it is too large
Load Diff
@ -1,28 +0,0 @@ |
||||
import os |
||||
import numpy as np |
||||
|
||||
from cffi import FFI |
||||
from common.ffi_wrapper import suffix |
||||
|
||||
cluster_dir = os.path.join(os.path.dirname(os.path.abspath(__file__))) |
||||
cluster_fn = os.path.join(cluster_dir, "libfastcluster"+suffix()) |
||||
|
||||
ffi = FFI() |
||||
ffi.cdef(""" |
||||
int hclust_fast(int n, double* distmat, int method, int* merge, double* height); |
||||
void cutree_cdist(int n, const int* merge, double* height, double cdist, int* labels); |
||||
void hclust_pdist(int n, int m, double* pts, double* out); |
||||
void cluster_points_centroid(int n, int m, double* pts, double dist, int* idx); |
||||
""") |
||||
|
||||
hclust = ffi.dlopen(cluster_fn) |
||||
|
||||
|
||||
def cluster_points_centroid(pts, dist): |
||||
pts = np.ascontiguousarray(pts, dtype=np.float64) |
||||
pts_ptr = ffi.cast("double *", pts.ctypes.data) |
||||
n, m = pts.shape |
||||
|
||||
labels_ptr = ffi.new("int[]", n) |
||||
hclust.cluster_points_centroid(n, m, pts_ptr, dist**2, labels_ptr) |
||||
return list(labels_ptr) |
@ -1,35 +0,0 @@ |
||||
#include <cassert> |
||||
|
||||
extern "C" { |
||||
#include "fastcluster.h" |
||||
} |
||||
|
||||
|
||||
int main(int argc, const char* argv[]) { |
||||
const int n = 11; |
||||
const int m = 3; |
||||
double* pts = new double[n*m]{59.26000137, -9.35999966, -5.42500019, |
||||
91.61999817, -0.31999999, -2.75, |
||||
31.38000031, 0.40000001, -0.2, |
||||
89.57999725, -8.07999992, -18.04999924, |
||||
53.42000122, 0.63999999, -0.175, |
||||
31.38000031, 0.47999999, -0.2, |
||||
36.33999939, 0.16, -0.2, |
||||
53.33999939, 0.95999998, -0.175, |
||||
59.26000137, -9.76000023, -5.44999981, |
||||
33.93999977, 0.40000001, -0.22499999, |
||||
106.74000092, -5.76000023, -18.04999924}; |
||||
|
||||
int * idx = new int[n]; |
||||
int * correct_idx = new int[n]{0, 1, 2, 3, 4, 2, 5, 4, 0, 5, 6}; |
||||
|
||||
cluster_points_centroid(n, m, pts, 2.5 * 2.5, idx); |
||||
|
||||
for (int i = 0; i < n; i++) { |
||||
assert(idx[i] == correct_idx[i]); |
||||
} |
||||
|
||||
delete[] idx; |
||||
delete[] correct_idx; |
||||
delete[] pts; |
||||
} |
Loading…
Reference in new issue