You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
140 lines
4.5 KiB
140 lines
4.5 KiB
# pylint: skip-file
|
|
|
|
import time
|
|
import unittest
|
|
import numpy as np
|
|
from fastcluster import linkage_vector
|
|
from scipy.cluster import _hierarchy
|
|
from scipy.spatial.distance import pdist
|
|
|
|
from third_party.cluster.fastcluster_py import hclust, ffi
|
|
from third_party.cluster.fastcluster_py import cluster_points_centroid
|
|
|
|
|
|
def fcluster(Z, t, criterion='inconsistent', depth=2, R=None, monocrit=None):
|
|
# supersimplified function to get fast clustering. Got it from scipy
|
|
Z = np.asarray(Z, order='c')
|
|
n = Z.shape[0] + 1
|
|
T = np.zeros((n,), dtype='i')
|
|
_hierarchy.cluster_dist(Z, T, float(t), int(n))
|
|
return T
|
|
|
|
|
|
TRACK_PTS = np.array([[59.26000137, -9.35999966, -5.42500019],
|
|
[91.61999817, -0.31999999, -2.75],
|
|
[31.38000031, 0.40000001, -0.2],
|
|
[89.57999725, -8.07999992, -18.04999924],
|
|
[53.42000122, 0.63999999, -0.175],
|
|
[31.38000031, 0.47999999, -0.2],
|
|
[36.33999939, 0.16, -0.2],
|
|
[53.33999939, 0.95999998, -0.175],
|
|
[59.26000137, -9.76000023, -5.44999981],
|
|
[33.93999977, 0.40000001, -0.22499999],
|
|
[106.74000092, -5.76000023, -18.04999924]])
|
|
|
|
CORRECT_LINK = np.array([[2., 5., 0.07999998, 2.],
|
|
[4., 7., 0.32984889, 2.],
|
|
[0., 8., 0.40078104, 2.],
|
|
[6., 9., 2.41209933, 2.],
|
|
[11., 14., 3.76342275, 4.],
|
|
[12., 13., 13.02297651, 4.],
|
|
[1., 3., 17.27626057, 2.],
|
|
[10., 17., 17.92918845, 3.],
|
|
[15., 16., 23.68525366, 8.],
|
|
[18., 19., 52.52351319, 11.]])
|
|
|
|
CORRECT_LABELS = np.array([7, 1, 4, 2, 6, 4, 5, 6, 7, 5, 3], dtype=np.int32)
|
|
|
|
|
|
def plot_cluster(pts, idx_old, idx_new):
|
|
import matplotlib.pyplot as plt
|
|
m = 'Set1'
|
|
|
|
plt.figure()
|
|
plt.subplot(1, 2, 1)
|
|
plt.scatter(pts[:, 0], pts[:, 1], c=idx_old, cmap=m)
|
|
plt.title("Old")
|
|
plt.colorbar()
|
|
plt.subplot(1, 2, 2)
|
|
plt.scatter(pts[:, 0], pts[:, 1], c=idx_new, cmap=m)
|
|
plt.title("New")
|
|
plt.colorbar()
|
|
|
|
plt.show()
|
|
|
|
|
|
def same_clusters(correct, other):
|
|
correct = np.asarray(correct)
|
|
other = np.asarray(other)
|
|
if len(correct) != len(other):
|
|
return False
|
|
|
|
for i in range(len(correct)):
|
|
c = np.where(correct == correct[i])
|
|
o = np.where(other == other[i])
|
|
if not np.array_equal(c, o):
|
|
return False
|
|
return True
|
|
|
|
|
|
class TestClustering(unittest.TestCase):
|
|
def test_scipy_clustering(self):
|
|
old_link = linkage_vector(TRACK_PTS, method='centroid')
|
|
old_cluster_idxs = fcluster(old_link, 2.5, criterion='distance')
|
|
|
|
np.testing.assert_allclose(old_link, CORRECT_LINK)
|
|
np.testing.assert_allclose(old_cluster_idxs, CORRECT_LABELS)
|
|
|
|
def test_pdist(self):
|
|
pts = np.ascontiguousarray(TRACK_PTS, dtype=np.float64)
|
|
pts_ptr = ffi.cast("double *", pts.ctypes.data)
|
|
|
|
n, m = pts.shape
|
|
out = np.zeros((n * (n - 1) // 2, ), dtype=np.float64)
|
|
out_ptr = ffi.cast("double *", out.ctypes.data)
|
|
hclust.hclust_pdist(n, m, pts_ptr, out_ptr)
|
|
|
|
np.testing.assert_allclose(out, np.power(pdist(TRACK_PTS), 2))
|
|
|
|
def test_cpp_clustering(self):
|
|
pts = np.ascontiguousarray(TRACK_PTS, dtype=np.float64)
|
|
pts_ptr = ffi.cast("double *", pts.ctypes.data)
|
|
n, m = pts.shape
|
|
|
|
labels = np.zeros((n, ), dtype=np.int32)
|
|
labels_ptr = ffi.cast("int *", labels.ctypes.data)
|
|
hclust.cluster_points_centroid(n, m, pts_ptr, 2.5**2, labels_ptr)
|
|
self.assertTrue(same_clusters(CORRECT_LABELS, labels))
|
|
|
|
def test_cpp_wrapper_clustering(self):
|
|
labels = cluster_points_centroid(TRACK_PTS, 2.5)
|
|
self.assertTrue(same_clusters(CORRECT_LABELS, labels))
|
|
|
|
def test_random_cluster(self):
|
|
np.random.seed(1337)
|
|
N = 1000
|
|
|
|
t_old = 0.
|
|
t_new = 0.
|
|
|
|
for _ in range(N):
|
|
n = int(np.random.uniform(2, 32))
|
|
x = np.random.uniform(-10, 50, (n, 1))
|
|
y = np.random.uniform(-5, 5, (n, 1))
|
|
vrel = np.random.uniform(-5, 5, (n, 1))
|
|
pts = np.hstack([x, y, vrel])
|
|
|
|
t = time.time()
|
|
old_link = linkage_vector(pts, method='centroid')
|
|
old_cluster_idx = fcluster(old_link, 2.5, criterion='distance')
|
|
t_old += time.time() - t
|
|
|
|
t = time.time()
|
|
cluster_idx = cluster_points_centroid(pts, 2.5)
|
|
t_new += time.time() - t
|
|
|
|
self.assertTrue(same_clusters(old_cluster_idx, cluster_idx))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|
|
|