remove fastcluster (#29352)

* remove fastcluster * lock * rm there * and from release files
2 years ago · 00a11a1a2b
parent 0ced56b2ea
commit 00a11a1a2b
16 changed files with 1 additions and 2574 deletions
--- a/poetry.lock
+++ b/poetry.lock
@ -1217,66 +1217,6 @@ files = [
 [package.extras]
 testing = ["hatch", "pre-commit", "pytest", "tox"]
 [[package]]
 name = "fastcluster"
 version = "1.2.6"
 description = "Fast hierarchical clustering routines for R and Python."
 optional = false
 python-versions = ">=3"
 files = [
    {file = "fastcluster-1.2.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d0e8faef0437a25fd083df70fb86cc65ce3c9c9780d4ae377cbe6521e7746ce0"},
    {file = "fastcluster-1.2.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c8be01f97bc2bf11a9188537864f8e520e1103cdc6007aa2c5d7979b1363b121"},
    {file = "fastcluster-1.2.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:855ab2b7e6fa9b05f19c4f3023dedfb1a35a88d831933d65d0d9e10a070a9e85"},
    {file = "fastcluster-1.2.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:72503e727887a61a15f9aaa13178798d3994dfec58aa7a943e42dcfda07c0149"},
    {file = "fastcluster-1.2.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2fcb0973ca0e6978e3242046338c350cbed1493108929231fae9bd35ad05a6d6"},
    {file = "fastcluster-1.2.6-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9020899b67fe492d0ed87a3e993ec9962c5a0b51ea2df71d86b1766f065f1cef"},
    {file = "fastcluster-1.2.6-cp310-cp310-win32.whl", hash = "sha256:6cf156d4203708348522393c523c2e61c81f5a6a500e0411dcba2b064551ea2f"},
    {file = "fastcluster-1.2.6-cp310-cp310-win_amd64.whl", hash = "sha256:1801c9daa9aa5bbbb0830efe8bd3034b4b7a417e4b8dd353683999be29797df2"},
    {file = "fastcluster-1.2.6-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ce70c743490f6778b463524d1767a9ecccd31c8bd2dbb5739bb2174168c15d39"},
    {file = "fastcluster-1.2.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ac1b84d4b28456a379a71451d13995eb3242143452ce9c861f8913360de842a3"},
    {file = "fastcluster-1.2.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:55b49f6033c45a28f93540847b495ed0f718b5c3f4fef446cf77e3726662e1d5"},
    {file = "fastcluster-1.2.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1c776a4ec7594f47cd2e1e2da73a30134f1d402d7c93a81e3cb7c3d8e191173"},
    {file = "fastcluster-1.2.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aca61d16435bb7aea3901939d7d7d7e36aff9bb538123e649166a3014b280054"},
    {file = "fastcluster-1.2.6-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:04ea4a68e0675072ca761bad33322a0e998cb43693fd41165bc420d7db40429a"},
    {file = "fastcluster-1.2.6-cp311-cp311-win32.whl", hash = "sha256:773043d5db2790e1ff2a4e1eae0b6a60afb2a93ad2c74897a56c80bc800db04f"},
    {file = "fastcluster-1.2.6-cp311-cp311-win_amd64.whl", hash = "sha256:841d128daa6597d13781793eb482b0b566bbd58d2a9d1e2cf1b58838773beb14"},
    {file = "fastcluster-1.2.6-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:cf5acfe1156849279ebd44a8d1fbcbe8b8e21334f7538eda782ae31e7dade9e2"},
    {file = "fastcluster-1.2.6-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb27c13225f5f77f3c5986a27ca27277cce7db12844330cf535019cd38021257"},
    {file = "fastcluster-1.2.6-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5fe543b6d45da27e84e5af6248722475b88943d8ef40d835cbabbb0ba5ee786b"},
    {file = "fastcluster-1.2.6-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c12224da0b1f2f9d2b3d715dc82ecb1a3a33b990606f97da075cc46bc6d9576f"},
    {file = "fastcluster-1.2.6-cp36-cp36m-win32.whl", hash = "sha256:86a1ad972e83ba48144884fa849f87626346308b650002157123aee67d3b16fe"},
    {file = "fastcluster-1.2.6-cp36-cp36m-win_amd64.whl", hash = "sha256:8d3c9eab8e69cb36dcdd64c8b3200e008aedf88e34d39e01ae6af98a9605ad18"},
    {file = "fastcluster-1.2.6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c61be0bad81a21ee3e5bef91469fdd11968f33d41d142c656accba9b2992babe"},
    {file = "fastcluster-1.2.6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:06df1d97edca68b2ffa43d81c3b5f4e4147bc12ab241c6585fadcdeb0bfa23ca"},
    {file = "fastcluster-1.2.6-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ab9337b0a6a9b07b6f86fc724972d1ad729c890e2f539c1b33271c2f1f00af8b"},
    {file = "fastcluster-1.2.6-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4093d5454bcbe48b30e32da5db43056a08889480a96e4555f28c1f7004fc5323"},
    {file = "fastcluster-1.2.6-cp37-cp37m-win32.whl", hash = "sha256:58958a0333e3dfbad198394e9b7dd6254de0a3e622019b057288405b2a4a6bba"},
    {file = "fastcluster-1.2.6-cp37-cp37m-win_amd64.whl", hash = "sha256:03f8efe6435a7b947fa4a420676942d0267dac0d323ec5ead50f1856cc7cf96f"},
    {file = "fastcluster-1.2.6-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:a5ceb39379327316d34613f7c16c06d7a3816aa38f4437b5e8433aa1bf149e2c"},
    {file = "fastcluster-1.2.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0bb54283b4d5ec96f167c7fd31921f169226c1261637434fdae7a69ee3c69573"},
    {file = "fastcluster-1.2.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6e51db0067e65687a5c46f00a11843d0bb15ca759e8a1767eebac8c4f6e3f4df"},
    {file = "fastcluster-1.2.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:11748a4e245c745030e9ddd8c2c37e378f8ad8bd8e869d954c84ff674495499f"},
    {file = "fastcluster-1.2.6-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7254f81dc71cd29ef6f2d9747cf97ff907b569c9ef9d9760352391be5b57118c"},
    {file = "fastcluster-1.2.6-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa4a4c01c5fbec3623e92bc33a9f712ca416ce93255c402f5c904ac4b890ac3c"},
    {file = "fastcluster-1.2.6-cp38-cp38-win32.whl", hash = "sha256:ffdb00782cd63bbf2c45bb048897531e868326dff5081ab9b752d294b0426c1d"},
    {file = "fastcluster-1.2.6-cp38-cp38-win_amd64.whl", hash = "sha256:a952a84453123db0c2336b9a9c86162e99ad0b897bae8213107c055a64effd41"},
    {file = "fastcluster-1.2.6-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:a085e7e13f1afc517358981b2b7ed774dc9abf95f2be0da9a495d9e6b58c4409"},
    {file = "fastcluster-1.2.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6a7c7f51a6d2f5ab58b1d85e9d0af2af9600ec13bb43bc6aafc9085d2c4ccd93"},
    {file = "fastcluster-1.2.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8bac5cf64691060cf86b0752dd385ef1eccff6d24bdb8b60691cf8cbf0e4f9ef"},
    {file = "fastcluster-1.2.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:060c1cb3c84942d8d3618385e2c25998ba690c46ec8c73d64477f808abfac3f2"},
    {file = "fastcluster-1.2.6-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e03a228e018457842eb81de85be7af0b5fe8065d666dd093193e3bdcf1f13d2e"},
    {file = "fastcluster-1.2.6-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a6f8da329c0032f2acaf4beaef958a2db0dae43d3f946f592dad5c29aa82c832"},
    {file = "fastcluster-1.2.6-cp39-cp39-win32.whl", hash = "sha256:eb3f98791427d5d5d02d023b66bcef61e48954edfadae6527ef72d70cf32ec86"},
    {file = "fastcluster-1.2.6-cp39-cp39-win_amd64.whl", hash = "sha256:4b9cfd426966b8037bec2fc03a0d7a9c87313482c699b36ffa1432b49f84ed2e"},
    {file = "fastcluster-1.2.6.tar.gz", hash = "sha256:aab886efa7b6bba7ac124f4498153d053e5a08b822d2254926b7206cdf5a8aa6"},
 ]
 [package.dependencies]
 numpy = ">=1.9"
 [package.extras]
 test = ["scipy (>=1.6.3)"]
 [[package]]
 name = "filelock"
 version = "3.12.2"
@ -5396,4 +5336,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
 [metadata]
 lock-version = "2.0"
 python-versions = "~3.11"
-content-hash = "c88d289360ff8c163e0e9fa45431fd86bd783f8e51134711df3e7559b9e0303e"
+content-hash = "f342fe9e841e08da2db9cf28aceaaa4112a67ea7015fc66c4aadace327868929"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -76,7 +76,6 @@ carla = { url = "https://github.com/commaai/carla/releases/download/3.11.4/carla
 control = "*"
 coverage = "*"
 dictdiffer = "*"
 fastcluster = "*"
 ft4222 = "*"
 hexdump = "*"
 hypothesis = "==6.46.7"
--- a/release/files_common
+++ b/release/files_common
@ -418,8 +418,6 @@ selfdrive/assets/navigation/*
 third_party/.gitignore
 third_party/SConscript
 third_party/cluster/*
 third_party/linux/**
 third_party/opencl/**
--- a/selfdrive/controls/tests/test_clustering.py
+++ b/selfdrive/controls/tests/test_clustering.py
@ -1,140 +0,0 @@
 # pylint: skip-file
 import time
 import unittest
 import numpy as np
 from fastcluster import linkage_vector
 from scipy.cluster import _hierarchy
 from scipy.spatial.distance import pdist
 from third_party.cluster.fastcluster_py import hclust, ffi
 from third_party.cluster.fastcluster_py import cluster_points_centroid
 def fcluster(Z, t, criterion='inconsistent', depth=2, R=None, monocrit=None):
  # supersimplified function to get fast clustering. Got it from scipy
  Z = np.asarray(Z, order='c')
  n = Z.shape[0] + 1
  T = np.zeros((n,), dtype='i')
  _hierarchy.cluster_dist(Z, T, float(t), int(n))
  return T
 TRACK_PTS = np.array([[59.26000137, -9.35999966, -5.42500019],
                      [91.61999817, -0.31999999, -2.75],
                      [31.38000031, 0.40000001, -0.2],
                      [89.57999725, -8.07999992, -18.04999924],
                      [53.42000122, 0.63999999, -0.175],
                      [31.38000031, 0.47999999, -0.2],
                      [36.33999939, 0.16, -0.2],
                      [53.33999939, 0.95999998, -0.175],
                      [59.26000137, -9.76000023, -5.44999981],
                      [33.93999977, 0.40000001, -0.22499999],
                      [106.74000092, -5.76000023, -18.04999924]])
 CORRECT_LINK = np.array([[2., 5., 0.07999998, 2.],
                         [4., 7., 0.32984889, 2.],
                         [0., 8., 0.40078104, 2.],
                         [6., 9., 2.41209933, 2.],
                         [11., 14., 3.76342275, 4.],
                         [12., 13., 13.02297651, 4.],
                         [1., 3., 17.27626057, 2.],
                         [10., 17., 17.92918845, 3.],
                         [15., 16., 23.68525366, 8.],
                         [18., 19., 52.52351319, 11.]])
 CORRECT_LABELS = np.array([7, 1, 4, 2, 6, 4, 5, 6, 7, 5, 3], dtype=np.int32)
 def plot_cluster(pts, idx_old, idx_new):
    import matplotlib.pyplot as plt
    m = 'Set1'
    plt.figure()
    plt.subplot(1, 2, 1)
    plt.scatter(pts[:, 0], pts[:, 1], c=idx_old, cmap=m)
    plt.title("Old")
    plt.colorbar()
    plt.subplot(1, 2, 2)
    plt.scatter(pts[:, 0], pts[:, 1], c=idx_new, cmap=m)
    plt.title("New")
    plt.colorbar()
    plt.show()
 def same_clusters(correct, other):
  correct = np.asarray(correct)
  other = np.asarray(other)
  if len(correct) != len(other):
    return False
  for i in range(len(correct)):
    c = np.where(correct == correct[i])
    o = np.where(other == other[i])
    if not np.array_equal(c, o):
      return False
  return True
 class TestClustering(unittest.TestCase):
  def test_scipy_clustering(self):
    old_link = linkage_vector(TRACK_PTS, method='centroid')
    old_cluster_idxs = fcluster(old_link, 2.5, criterion='distance')
    np.testing.assert_allclose(old_link, CORRECT_LINK)
    np.testing.assert_allclose(old_cluster_idxs, CORRECT_LABELS)
  def test_pdist(self):
    pts = np.ascontiguousarray(TRACK_PTS, dtype=np.float64)
    pts_ptr = ffi.cast("double *", pts.ctypes.data)
    n, m = pts.shape
    out = np.zeros((n * (n - 1) // 2, ), dtype=np.float64)
    out_ptr = ffi.cast("double *", out.ctypes.data)
    hclust.hclust_pdist(n, m, pts_ptr, out_ptr)
    np.testing.assert_allclose(out, np.power(pdist(TRACK_PTS), 2))
  def test_cpp_clustering(self):
    pts = np.ascontiguousarray(TRACK_PTS, dtype=np.float64)
    pts_ptr = ffi.cast("double *", pts.ctypes.data)
    n, m = pts.shape
    labels = np.zeros((n, ), dtype=np.int32)
    labels_ptr = ffi.cast("int *", labels.ctypes.data)
    hclust.cluster_points_centroid(n, m, pts_ptr, 2.5**2, labels_ptr)
    self.assertTrue(same_clusters(CORRECT_LABELS, labels))
  def test_cpp_wrapper_clustering(self):
    labels = cluster_points_centroid(TRACK_PTS, 2.5)
    self.assertTrue(same_clusters(CORRECT_LABELS, labels))
  def test_random_cluster(self):
    np.random.seed(1337)
    N = 1000
    t_old = 0.
    t_new = 0.
    for _ in range(N):
      n = int(np.random.uniform(2, 32))
      x = np.random.uniform(-10, 50, (n, 1))
      y = np.random.uniform(-5, 5, (n, 1))
      vrel = np.random.uniform(-5, 5, (n, 1))
      pts = np.hstack([x, y, vrel])
      t = time.time()
      old_link = linkage_vector(pts, method='centroid')
      old_cluster_idx = fcluster(old_link, 2.5, criterion='distance')
      t_old += time.time() - t
      t = time.time()
      cluster_idx = cluster_points_centroid(pts, 2.5)
      t_new += time.time() - t
      self.assertTrue(same_clusters(old_cluster_idx, cluster_idx))
 if __name__ == "__main__":
  unittest.main()
--- a/third_party/SConscript
+++ b/third_party/SConscript
@ -4,5 +4,3 @@ env.Library('json11', ['json11/json11.cpp'], CCFLAGS=env['CCFLAGS'] + ['-Wno-unq
 env.Append(CPPPATH=[Dir('json11')])
 env.Library('kaitai', ['kaitai/kaitaistream.cpp'], CPPDEFINES=['KS_STR_ENCODING_NONE'])
 SConscript(['cluster/SConscript'])
--- a/third_party/cluster/.gitignore
+++ b/third_party/cluster/.gitignore
@ -1 +0,0 @@
 test
--- a/third_party/cluster/LICENSE
+++ b/third_party/cluster/LICENSE
@ -1,13 +0,0 @@
 Copyright:
  * fastcluster_dm.cpp & fastcluster_R_dm.cpp:
     © 2011 Daniel Müllner <http://danifold.net>
  * fastcluster.(h|cpp) & demo.cpp & plotresult.r:
     © 2018 Christoph Dalitz <http://www.hsnr.de/ipattern/>
 All rights reserved.
 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
  * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
  * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/third_party/cluster/README
+++ b/third_party/cluster/README
@ -1,79 +0,0 @@
 C++ interface to fast hierarchical clustering algorithms
 ========================================================
 This is a simplified C++ interface to fast implementations of hierarchical
 clustering by Daniel Müllner. The original library with interfaces to R
 and Python is described in:
 Daniel Müllner: "fastcluster: Fast Hierarchical, Agglomerative Clustering
 Routines for R and Python." Journal of Statistical Software 53 (2013),
 no. 9, pp. 1–18, http://www.jstatsoft.org/v53/i09/
 Usage of the library
 --------------------
 For using the library, the following source files are needed:
 fastcluster_dm.cpp, fastcluster_R_dm.cpp
   original code by Daniel Müllner
   these are included by fastcluster.cpp via #include, and therefore
   need not be compiled to object code
 fastcluster.[h|cpp]
   simplified C++ interface
   fastcluster.cpp is the only file that must be compiled
 The library provides the clustering function *hclust_fast* for
 creating the dendrogram information in an encoding as used by the
 R function *hclust*. For a description of the parameters, see fastcluster.h.
 Its parameter *method* can be one of
 HCLUST_METHOD_SINGLE
  single link with the minimum spanning tree algorithm (Rohlf, 1973)
 HHCLUST_METHOD_COMPLETE
  complete link with the nearest-neighbor-chain algorithm (Murtagh, 1984)
 HCLUST_METHOD_AVERAGE
  complete link with the nearest-neighbor-chain algorithm (Murtagh, 1984)
 HCLUST_METHOD_MEDIAN
  median link with the generic algorithm (Müllner, 2011)
 For splitting the dendrogram into clusters, the two functions *cutree_k*
 and *cutree_cdist* are provided.
 Note that output parameters must be allocated beforehand, e.g.
  int* merge = new int[2*(npoints-1)];
 For a complete usage example, see lines 135-142 of demo.cpp.
 Demonstration program
 ---------------------
 A simple demo is implemented in demo.cpp, which can be compiled and run with
   make
   ./hclust-demo -m complete lines.csv
 It creates two clusters of line segments such that the segment angle between
 line segments of different clusters have a maximum (cosine) dissimilarity.
 For visualizing the result, plotresult.r can be used as follows
 (requires R <https://r-project.org> to be installed):
  ./hclust-demo -m complete lines.csv | Rscript plotresult.r
 Authors & Copyright
 -------------------
 Daniel Müllner, 2011, <http://danifold.net>
 Christoph Dalitz, 2018, <http://www.hsnr.de/ipattern/>
 License
 -------
 This code is provided under a BSD-style license.
 See the file LICENSE for details.
--- a/third_party/cluster/SConscript
+++ b/third_party/cluster/SConscript
@ -1,8 +0,0 @@
 Import('env')
 fc = env.SharedLibrary("fastcluster", "fastcluster.cpp")
 # TODO: how do I gate on test
 #env.Program("test", ["test.cpp"], LIBS=[fc])
 #valgrind --leak-check=full ./test
--- a/third_party/cluster/init.py
+++ b/third_party/cluster/init.py
--- a/third_party/cluster/fastcluster.cpp
+++ b/third_party/cluster/fastcluster.cpp
@ -1,218 +0,0 @@
 //
 // C++ standalone verion of fastcluster by Daniel Müllner
 //
 // Copyright: Christoph Dalitz, 2018
 //            Daniel Müllner, 2011
 // License:   BSD style license
 //            (see the file LICENSE for details)
 //
 #include <vector>
 #include <algorithm>
 #include <cmath>
 extern "C" {
 #include "fastcluster.h"
 }
 // Code by Daniel Müllner
 // workaround to make it usable as a standalone version (without R)
 bool fc_isnan(double x) { return false; }
 #include "fastcluster_dm.cpp"
 #include "fastcluster_R_dm.cpp"
 extern "C" {
 //
 // Assigns cluster labels (0, ..., nclust-1) to the n points such
 // that the cluster result is split into nclust clusters.
 //
 // Input arguments:
 //   n      = number of observables
 //   merge  = clustering result in R format
 //   nclust = number of clusters
 // Output arguments:
 //   labels = allocated integer array of size n for result
 //
  void cutree_k(int n, const int* merge, int nclust, int* labels) {
    int k,m1,m2,j,l;
    if (nclust > n || nclust < 2) {
      for (j=0; j<n; j++) labels[j] = 0;
      return;
    }
    // assign to each observable the number of its last merge step
    // beware: indices of observables in merge start at 1 (R convention)
    std::vector<int> last_merge(n, 0);
    for (k=1; k<=(n-nclust); k++) {
      // (m1,m2) = merge[k,]
      m1 = merge[k-1];
      m2 = merge[n-1+k-1];
      if (m1 < 0 && m2 < 0) { // both single observables
        last_merge[-m1-1] = last_merge[-m2-1] = k;
      }
      else if (m1 < 0 || m2 < 0) { // one is a cluster
        if(m1 < 0) { j = -m1; m1 = m2; } else j = -m2;
        // merging single observable and cluster
        for(l = 0; l < n; l++)
          if (last_merge[l] == m1)
            last_merge[l] = k;
        last_merge[j-1] = k;
      }
      else { // both cluster
        for(l=0; l < n; l++) {
          if( last_merge[l] == m1 || last_merge[l] == m2 )
            last_merge[l] = k;
        }
      }
    }
    // assign cluster labels
    int label = 0;
    std::vector<int> z(n,-1);
    for (j=0; j<n; j++) {
      if (last_merge[j] == 0) { // still singleton
        labels[j] = label++;
      } else {
        if (z[last_merge[j]] < 0) {
          z[last_merge[j]] = label++;
        }
        labels[j] = z[last_merge[j]];
      }
    }
  }
  //
  // Assigns cluster labels (0, ..., nclust-1) to the n points such
  // that the hierarchical clustering is stopped when cluster distance >= cdist
  //
  // Input arguments:
  //   n      = number of observables
  //   merge  = clustering result in R format
  //   height = cluster distance at each merge step
  //   cdist  = cutoff cluster distance
  // Output arguments:
  //   labels = allocated integer array of size n for result
  //
  void cutree_cdist(int n, const int* merge, double* height, double cdist, int* labels) {
    int k;
    for (k=0; k<(n-1); k++) {
      if (height[k] >= cdist) {
        break;
      }
    }
    cutree_k(n, merge, n-k, labels);
  }
  //
  // Hierarchical clustering with one of Daniel Muellner's fast algorithms
  //
  // Input arguments:
  //   n       = number of observables
  //   distmat = condensed distance matrix, i.e. an n*(n-1)/2 array representing
  //             the upper triangle (without diagonal elements) of the distance
  //             matrix, e.g. for n=4:
  //               d00 d01 d02 d03
  //               d10 d11 d12 d13   ->  d01 d02 d03 d12 d13 d23
  //               d20 d21 d22 d23
  //               d30 d31 d32 d33
  //   method  = cluster metric (see enum method_code)
  // Output arguments:
  //   merge   = allocated (n-1)x2 matrix (2*(n-1) array) for storing result.
  //             Result follows R hclust convention:
  //              - observabe indices start with one
  //              - merge[i][] contains the merged nodes in step i
  //              - merge[i][j] is negative when the node is an atom
  //   height  = allocated (n-1) array with distances at each merge step
  // Return code:
  //   0 = ok
  //   1 = invalid method
  //
  int hclust_fast(int n, double* distmat, int method, int* merge, double* height) {
    // call appropriate culstering function
    cluster_result Z2(n-1);
    if (method == HCLUST_METHOD_SINGLE) {
      // single link
      MST_linkage_core(n, distmat, Z2);
    }
    else if (method == HCLUST_METHOD_COMPLETE) {
      // complete link
      NN_chain_core<METHOD_METR_COMPLETE, t_float>(n, distmat, NULL, Z2);
    }
    else if (method == HCLUST_METHOD_AVERAGE) {
      // best average distance
      double* members = new double[n];
      for (int i=0; i<n; i++) members[i] = 1;
      NN_chain_core<METHOD_METR_AVERAGE, t_float>(n, distmat, members, Z2);
      delete[] members;
    }
    else if (method == HCLUST_METHOD_MEDIAN) {
      // best median distance (beware: O(n^3))
      generic_linkage<METHOD_METR_MEDIAN, t_float>(n, distmat, NULL, Z2);
    }
    else if (method == HCLUST_METHOD_CENTROID) {
      // best centroid distance (beware: O(n^3))
      double* members = new double[n];
      for (int i=0; i<n; i++) members[i] = 1;
      generic_linkage<METHOD_METR_CENTROID, t_float>(n, distmat, members, Z2);
      delete[] members;
    }
    else {
      return 1;
    }
    int* order = new int[n];
    if (method == HCLUST_METHOD_MEDIAN || method == HCLUST_METHOD_CENTROID) {
      generate_R_dendrogram<true>(merge, height, order, Z2, n);
    } else {
      generate_R_dendrogram<false>(merge, height, order, Z2, n);
    }
    delete[] order; // only needed for visualization
    return 0;
  }
  // Build condensed distance matrix
  // Input arguments:
  //   n  = number of observables
  //   m  = dimension of observable
  // Output arguments:
  //   out = allocated integer array of size n * (n - 1) / 2 for result
  void hclust_pdist(int n, int m, double* pts, double* out) {
    int ii = 0;
    for (int i = 0; i < n; i++) {
      for (int j = i + 1; j < n; j++) {
        // Compute euclidian distance
        double d = 0;
        for (int k = 0; k < m; k ++) {
          double error = pts[i * m + k] - pts[j * m + k];
          d += (error * error);
        }
        out[ii] = d;//sqrt(d);
        ii++;
      }
    }
  }
  void cluster_points_centroid(int n, int m, double* pts, double dist, int* idx) {
    double* pdist = new double[n * (n - 1) / 2];
    int* merge = new int[2 * (n - 1)];
    double* height = new double[n - 1];
    hclust_pdist(n, m, pts, pdist);
    hclust_fast(n, pdist, HCLUST_METHOD_CENTROID, merge, height);
    cutree_cdist(n, merge, height, dist, idx);
    delete[] pdist;
    delete[] merge;
    delete[] height;
  }
 }
--- a/third_party/cluster/fastcluster.h
+++ b/third_party/cluster/fastcluster.h
@ -1,77 +0,0 @@
 //
 // C++ standalone verion of fastcluster by Daniel Muellner
 //
 // Copyright: Daniel Muellner, 2011
 //            Christoph Dalitz, 2018
 // License:   BSD style license
 //            (see the file LICENSE for details)
 //
 #ifndef fastclustercpp_H
 #define fastclustercpp_H
 //
 // Assigns cluster labels (0, ..., nclust-1) to the n points such
 // that the cluster result is split into nclust clusters.
 //
 // Input arguments:
 //   n      = number of observables
 //   merge  = clustering result in R format
 //   nclust = number of clusters
 // Output arguments:
 //   labels = allocated integer array of size n for result
 //
 void cutree_k(int n, const int* merge, int nclust, int* labels);
 //
 // Assigns cluster labels (0, ..., nclust-1) to the n points such
 // that the hierarchical clsutering is stopped at cluster distance cdist
 //
 // Input arguments:
 //   n      = number of observables
 //   merge  = clustering result in R format
 //   height = cluster distance at each merge step
 //   cdist  = cutoff cluster distance
 // Output arguments:
 //   labels = allocated integer array of size n for result
 //
 void cutree_cdist(int n, const int* merge, double* height, double cdist, int* labels);
 //
 // Hierarchical clustering with one of Daniel Muellner's fast algorithms
 //
 // Input arguments:
 //   n       = number of observables
 //   distmat = condensed distance matrix, i.e. an n*(n-1)/2 array representing
 //             the upper triangle (without diagonal elements) of the distance
 //             matrix, e.g. for n=4:
 //               d00 d01 d02 d03
 //               d10 d11 d12 d13   ->  d01 d02 d03 d12 d13 d23
 //               d20 d21 d22 d23
 //               d30 d31 d32 d33
 //   method  = cluster metric (see enum method_code)
 // Output arguments:
 //   merge   = allocated (n-1)x2 matrix (2*(n-1) array) for storing result.
 //             Result follows R hclust convention:
 //              - observabe indices start with one
 //              - merge[i][] contains the merged nodes in step i
 //              - merge[i][j] is negative when the node is an atom
 //   height  = allocated (n-1) array with distances at each merge step
 // Return code:
 //   0 = ok
 //   1 = invalid method
 //
 int hclust_fast(int n, double* distmat, int method, int* merge, double* height);
 enum hclust_fast_methods {
  HCLUST_METHOD_SINGLE = 0,
  HCLUST_METHOD_COMPLETE = 1,
  HCLUST_METHOD_AVERAGE = 2,
  HCLUST_METHOD_MEDIAN = 3,
  HCLUST_METHOD_CENTROID = 5,
 };
 void hclust_pdist(int n, int m, double* pts, double* out);
 void cluster_points_centroid(int n, int m, double* pts, double dist, int* idx);
 #endif
--- a/third_party/cluster/fastcluster_R_dm.cpp
+++ b/third_party/cluster/fastcluster_R_dm.cpp
@ -1,115 +0,0 @@
 //
 // Excerpt from fastcluster_R.cpp
 //
 // Copyright: Daniel Müllner, 2011 <http://danifold.net>
 //
 struct pos_node {
  t_index pos;
  int node;
 };
 void order_nodes(const int N, const int * const merge, const t_index * const node_size, int * const order) {
  /* Parameters:
     N         : number of data points
     merge     : (N-1)×2 array which specifies the node indices which are
                 merged in each step of the clustering procedure.
                 Negative entries -1...-N point to singleton nodes, while
                 positive entries 1...(N-1) point to nodes which are themselves
                 parents of other nodes.
     node_size : array of node sizes - makes it easier
     order     : output array of size N
     Runtime: Θ(N)
  */
  auto_array_ptr<pos_node> queue(N/2);
  int parent;
  int child;
  t_index pos = 0;
  queue[0].pos = 0;
  queue[0].node = N-2;
  t_index idx = 1;
  do {
    --idx;
    pos = queue[idx].pos;
    parent = queue[idx].node;
    // First child
    child = merge[parent];
    if (child<0) { // singleton node, write this into the 'order' array.
      order[pos] = -child;
      ++pos;
    }
    else { /* compound node: put it on top of the queue and decompose it
              in a later iteration. */
      queue[idx].pos = pos;
      queue[idx].node = child-1; // convert index-1 based to index-0 based
      ++idx;
      pos += node_size[child-1];
    }
    // Second child
    child = merge[parent+N-1];
    if (child<0) {
      order[pos] = -child;
    }
    else {
      queue[idx].pos = pos;
      queue[idx].node = child-1;
      ++idx;
    }
  } while (idx>0);
 }
 #define size_(r_) ( ((r_<N) ? 1 : node_size[r_-N]) )
 template <const bool sorted>
 void generate_R_dendrogram(int * const merge, double * const height, int * const order, cluster_result & Z2, const int N) {
  // The array "nodes" is a union-find data structure for the cluster
  // identites (only needed for unsorted cluster_result input).
  union_find nodes(sorted ? 0 : N);
  if (!sorted) {
    std::stable_sort(Z2[0], Z2[N-1]);
  }
  t_index node1, node2;
  auto_array_ptr<t_index> node_size(N-1);
  for (t_index i=0; i<N-1; ++i) {
    // Get two data points whose clusters are merged in step i.
    // Find the cluster identifiers for these points.
    if (sorted) {
      node1 = Z2[i]->node1;
      node2 = Z2[i]->node2;
    }
    else {
      node1 = nodes.Find(Z2[i]->node1);
      node2 = nodes.Find(Z2[i]->node2);
      // Merge the nodes in the union-find data structure by making them
      // children of a new node.
      nodes.Union(node1, node2);
    }
    // Sort the nodes in the output array.
    if (node1>node2) {
      t_index tmp = node1;
      node1 = node2;
      node2 = tmp;
    }
    /* Conversion between labeling conventions.
       Input:  singleton nodes 0,...,N-1
               compound nodes  N,...,2N-2
       Output: singleton nodes -1,...,-N
               compound nodes  1,...,N
    */
    merge[i]     = (node1<N) ? -static_cast<int>(node1)-1
                              : static_cast<int>(node1)-N+1;
    merge[i+N-1] = (node2<N) ? -static_cast<int>(node2)-1
                              : static_cast<int>(node2)-N+1;
    height[i] = Z2[i]->dist;
    node_size[i] = size_(node1) + size_(node2);
  }
  order_nodes(N, merge, node_size, order);
 }
--- a/third_party/cluster/fastcluster_dm.cpp
+++ b/third_party/cluster/fastcluster_dm.cpp
--- a/third_party/cluster/fastcluster_py.py
+++ b/third_party/cluster/fastcluster_py.py
@ -1,28 +0,0 @@
 import os
 import numpy as np
 from cffi import FFI
 from common.ffi_wrapper import suffix
 cluster_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)))
 cluster_fn = os.path.join(cluster_dir, "libfastcluster"+suffix())
 ffi = FFI()
 ffi.cdef("""
 int hclust_fast(int n, double* distmat, int method, int* merge, double* height);
 void cutree_cdist(int n, const int* merge, double* height, double cdist, int* labels);
 void hclust_pdist(int n, int m, double* pts, double* out);
 void cluster_points_centroid(int n, int m, double* pts, double dist, int* idx);
 """)
 hclust = ffi.dlopen(cluster_fn)
 def cluster_points_centroid(pts, dist):
  pts = np.ascontiguousarray(pts, dtype=np.float64)
  pts_ptr = ffi.cast("double *", pts.ctypes.data)
  n, m = pts.shape
  labels_ptr = ffi.new("int[]", n)
  hclust.cluster_points_centroid(n, m, pts_ptr, dist**2, labels_ptr)
  return list(labels_ptr)
--- a/third_party/cluster/test.cpp
+++ b/third_party/cluster/test.cpp
@ -1,35 +0,0 @@
 #include <cassert>
 extern "C" {
 #include "fastcluster.h"
 }
 int main(int argc, const char* argv[]) {
  const int n = 11;
  const int m = 3;
  double* pts = new double[n*m]{59.26000137, -9.35999966, -5.42500019,
                                91.61999817, -0.31999999, -2.75,
                                31.38000031, 0.40000001, -0.2,
                                89.57999725, -8.07999992, -18.04999924,
                                53.42000122, 0.63999999, -0.175,
                                31.38000031, 0.47999999, -0.2,
                                36.33999939, 0.16, -0.2,
                                53.33999939, 0.95999998, -0.175,
                                59.26000137, -9.76000023, -5.44999981,
                                33.93999977, 0.40000001, -0.22499999,
                                106.74000092, -5.76000023, -18.04999924};
  int * idx = new int[n];
  int * correct_idx = new int[n]{0, 1, 2, 3, 4, 2, 5, 4, 0, 5, 6};
  cluster_points_centroid(n, m, pts, 2.5 * 2.5, idx);
  for (int i = 0; i < n; i++) {
    assert(idx[i] == correct_idx[i]);
  }
  delete[] idx;
  delete[] correct_idx;
  delete[] pts;
 }