You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
			
				
					1795 lines
				
				51 KiB
			
		
		
			
		
	
	
					1795 lines
				
				51 KiB
			| 
								 
											6 years ago
										 
									 | 
							
								/*
							 | 
						||
| 
								 | 
							
								  fastcluster: Fast hierarchical clustering routines for R and Python
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  Copyright © 2011 Daniel Müllner
							 | 
						||
| 
								 | 
							
								  <http://danifold.net>
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  This library implements various fast algorithms for hierarchical,
							 | 
						||
| 
								 | 
							
								  agglomerative clustering methods:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  (1) Algorithms for the "stored matrix approach": the input is the array of
							 | 
						||
| 
								 | 
							
								      pairwise dissimilarities.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      MST_linkage_core: single linkage clustering with the "minimum spanning
							 | 
						||
| 
								 | 
							
								      tree algorithm (Rohlfs)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      NN_chain_core: nearest-neighbor-chain algorithm, suitable for single,
							 | 
						||
| 
								 | 
							
								      complete, average, weighted and Ward linkage (Murtagh)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      generic_linkage: generic algorithm, suitable for all distance update
							 | 
						||
| 
								 | 
							
								      formulas (Müllner)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  (2) Algorithms for the "stored data approach": the input are points in a
							 | 
						||
| 
								 | 
							
								      vector space.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      MST_linkage_core_vector: single linkage clustering for vector data
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      generic_linkage_vector: generic algorithm for vector data, suitable for
							 | 
						||
| 
								 | 
							
								      the Ward, centroid and median methods.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      generic_linkage_vector_alternative: alternative scheme for updating the
							 | 
						||
| 
								 | 
							
								      nearest neighbors. This method seems faster than "generic_linkage_vector"
							 | 
						||
| 
								 | 
							
								      for the centroid and median methods but slower for the Ward method.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  All these implementation treat infinity values correctly. They throw an
							 | 
						||
| 
								 | 
							
								  exception if a NaN distance value occurs.
							 | 
						||
| 
								 | 
							
								*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// Older versions of Microsoft Visual Studio do not have the fenv header.
							 | 
						||
| 
								 | 
							
								#ifdef _MSC_VER
							 | 
						||
| 
								 | 
							
								#if (_MSC_VER == 1500 || _MSC_VER == 1600)
							 | 
						||
| 
								 | 
							
								#define NO_INCLUDE_FENV
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								// NaN detection via fenv might not work on systems with software
							 | 
						||
| 
								 | 
							
								// floating-point emulation (bug report for Debian armel).
							 | 
						||
| 
								 | 
							
								#ifdef __SOFTFP__
							 | 
						||
| 
								 | 
							
								#define NO_INCLUDE_FENV
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								#ifdef NO_INCLUDE_FENV
							 | 
						||
| 
								 | 
							
								#pragma message("Do not use fenv header.")
							 | 
						||
| 
								 | 
							
								#else
							 | 
						||
| 
								 
											6 years ago
										 
									 | 
							
								//#pragma message("Use fenv header. If there is a warning about unknown #pragma STDC FENV_ACCESS, this can be ignored.")
							 | 
						||
| 
								 | 
							
								//#pragma STDC FENV_ACCESS on
							 | 
						||
| 
								 
											6 years ago
										 
									 | 
							
								#include <fenv.h>
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#include <cmath> // for std::pow, std::sqrt
							 | 
						||
| 
								 | 
							
								#include <cstddef> // for std::ptrdiff_t
							 | 
						||
| 
								 | 
							
								#include <limits> // for std::numeric_limits<...>::infinity()
							 | 
						||
| 
								 | 
							
								#include <algorithm> // for std::fill_n
							 | 
						||
| 
								 | 
							
								#include <stdexcept> // for std::runtime_error
							 | 
						||
| 
								 | 
							
								#include <string> // for std::string
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#include <cfloat> // also for DBL_MAX, DBL_MIN
							 | 
						||
| 
								 | 
							
								#ifndef DBL_MANT_DIG
							 | 
						||
| 
								 | 
							
								#error The constant DBL_MANT_DIG could not be defined.
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								#define T_FLOAT_MANT_DIG DBL_MANT_DIG
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#ifndef LONG_MAX
							 | 
						||
| 
								 | 
							
								#include <climits>
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								#ifndef LONG_MAX
							 | 
						||
| 
								 | 
							
								#error The constant LONG_MAX could not be defined.
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								#ifndef INT_MAX
							 | 
						||
| 
								 | 
							
								#error The constant INT_MAX could not be defined.
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#ifndef INT32_MAX
							 | 
						||
| 
								 | 
							
								#ifdef _MSC_VER
							 | 
						||
| 
								 | 
							
								#if _MSC_VER >= 1600
							 | 
						||
| 
								 | 
							
								#define __STDC_LIMIT_MACROS
							 | 
						||
| 
								 | 
							
								#include <stdint.h>
							 | 
						||
| 
								 | 
							
								#else
							 | 
						||
| 
								 | 
							
								typedef __int32 int_fast32_t;
							 | 
						||
| 
								 | 
							
								typedef __int64 int64_t;
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								#else
							 | 
						||
| 
								 | 
							
								#define __STDC_LIMIT_MACROS
							 | 
						||
| 
								 | 
							
								#include <stdint.h>
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#define FILL_N std::fill_n
							 | 
						||
| 
								 | 
							
								#ifdef _MSC_VER
							 | 
						||
| 
								 | 
							
								#if _MSC_VER < 1600
							 | 
						||
| 
								 | 
							
								#undef FILL_N
							 | 
						||
| 
								 | 
							
								#define FILL_N stdext::unchecked_fill_n
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// Suppress warnings about (potentially) uninitialized variables.
							 | 
						||
| 
								 | 
							
								#ifdef _MSC_VER
							 | 
						||
| 
								 | 
							
									#pragma warning (disable:4700)
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#ifndef HAVE_DIAGNOSTIC
							 | 
						||
| 
								 | 
							
								#if __GNUC__ > 4 || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 6))
							 | 
						||
| 
								 | 
							
								#define HAVE_DIAGNOSTIC 1
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#ifndef HAVE_VISIBILITY
							 | 
						||
| 
								 | 
							
								#if __GNUC__ >= 4
							 | 
						||
| 
								 | 
							
								#define HAVE_VISIBILITY 1
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* Since the public interface is given by the Python respectively R interface,
							 | 
						||
| 
								 | 
							
								 * we do not want other symbols than the interface initalization routines to be
							 | 
						||
| 
								 | 
							
								 * visible in the shared object file. The "visibility" switch is a GCC concept.
							 | 
						||
| 
								 | 
							
								 * Hiding symbols keeps the relocation table small and decreases startup time.
							 | 
						||
| 
								 | 
							
								 * See http://gcc.gnu.org/wiki/Visibility
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
								#if HAVE_VISIBILITY
							 | 
						||
| 
								 | 
							
								#pragma GCC visibility push(hidden)
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								typedef int_fast32_t t_index;
							 | 
						||
| 
								 | 
							
								#ifndef INT32_MAX
							 | 
						||
| 
								 | 
							
								#define MAX_INDEX 0x7fffffffL
							 | 
						||
| 
								 | 
							
								#else
							 | 
						||
| 
								 | 
							
								#define MAX_INDEX INT32_MAX
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								#if (LONG_MAX < MAX_INDEX)
							 | 
						||
| 
								 | 
							
								#error The integer format "t_index" must not have a greater range than "long int".
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								#if (INT_MAX > MAX_INDEX)
							 | 
						||
| 
								 | 
							
								#error The integer format "int" must not have a greater range than "t_index".
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								typedef double t_float;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* Method codes.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								   These codes must agree with the METHODS array in fastcluster.R and the
							 | 
						||
| 
								 | 
							
								   dictionary mthidx in fastcluster.py.
							 | 
						||
| 
								 | 
							
								*/
							 | 
						||
| 
								 | 
							
								enum method_codes {
							 | 
						||
| 
								 | 
							
								  // non-Euclidean methods
							 | 
						||
| 
								 | 
							
								  METHOD_METR_SINGLE           = 0,
							 | 
						||
| 
								 | 
							
								  METHOD_METR_COMPLETE         = 1,
							 | 
						||
| 
								 | 
							
								  METHOD_METR_AVERAGE          = 2,
							 | 
						||
| 
								 | 
							
								  METHOD_METR_WEIGHTED         = 3,
							 | 
						||
| 
								 | 
							
								  METHOD_METR_WARD             = 4,
							 | 
						||
| 
								 | 
							
								  METHOD_METR_WARD_D           = METHOD_METR_WARD,
							 | 
						||
| 
								 | 
							
								  METHOD_METR_CENTROID         = 5,
							 | 
						||
| 
								 | 
							
								  METHOD_METR_MEDIAN           = 6,
							 | 
						||
| 
								 | 
							
								  METHOD_METR_WARD_D2          = 7,
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  MIN_METHOD_CODE              = 0,
							 | 
						||
| 
								 | 
							
								  MAX_METHOD_CODE              = 7
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								enum method_codes_vector {
							 | 
						||
| 
								 | 
							
								  // Euclidean methods
							 | 
						||
| 
								 | 
							
								  METHOD_VECTOR_SINGLE         = 0,
							 | 
						||
| 
								 | 
							
								  METHOD_VECTOR_WARD           = 1,
							 | 
						||
| 
								 | 
							
								  METHOD_VECTOR_CENTROID       = 2,
							 | 
						||
| 
								 | 
							
								  METHOD_VECTOR_MEDIAN         = 3,
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  MIN_METHOD_VECTOR_CODE       = 0,
							 | 
						||
| 
								 | 
							
								  MAX_METHOD_VECTOR_CODE       = 3
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// self-destructing array pointer
							 | 
						||
| 
								 | 
							
								template <typename type>
							 | 
						||
| 
								 | 
							
								class auto_array_ptr{
							 | 
						||
| 
								 | 
							
								private:
							 | 
						||
| 
								 | 
							
								  type * ptr;
							 | 
						||
| 
								 | 
							
								  auto_array_ptr(auto_array_ptr const &); // non construction-copyable
							 | 
						||
| 
								 | 
							
								  auto_array_ptr& operator=(auto_array_ptr const &); // non copyable
							 | 
						||
| 
								 | 
							
								public:
							 | 
						||
| 
								 | 
							
								  auto_array_ptr()
							 | 
						||
| 
								 | 
							
								    : ptr(NULL)
							 | 
						||
| 
								 | 
							
								  { }
							 | 
						||
| 
								 | 
							
								  template <typename index>
							 | 
						||
| 
								 | 
							
								  auto_array_ptr(index const size)
							 | 
						||
| 
								 | 
							
								    : ptr(new type[size])
							 | 
						||
| 
								 | 
							
								  { }
							 | 
						||
| 
								 | 
							
								  template <typename index, typename value>
							 | 
						||
| 
								 | 
							
								  auto_array_ptr(index const size, value const val)
							 | 
						||
| 
								 | 
							
								    : ptr(new type[size])
							 | 
						||
| 
								 | 
							
								  {
							 | 
						||
| 
								 | 
							
								    FILL_N(ptr, size, val);
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								  ~auto_array_ptr() {
							 | 
						||
| 
								 | 
							
								    delete [] ptr; }
							 | 
						||
| 
								 | 
							
								  void free() {
							 | 
						||
| 
								 | 
							
								    delete [] ptr;
							 | 
						||
| 
								 | 
							
								    ptr = NULL;
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								  template <typename index>
							 | 
						||
| 
								 | 
							
								  void init(index const size) {
							 | 
						||
| 
								 | 
							
								    ptr = new type [size];
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								  template <typename index, typename value>
							 | 
						||
| 
								 | 
							
								  void init(index const size, value const val) {
							 | 
						||
| 
								 | 
							
								    init(size);
							 | 
						||
| 
								 | 
							
								    FILL_N(ptr, size, val);
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								  inline operator type *() const { return ptr; }
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								struct node {
							 | 
						||
| 
								 | 
							
								  t_index node1, node2;
							 | 
						||
| 
								 | 
							
								  t_float dist;
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								inline bool operator< (const node a, const node b) {
							 | 
						||
| 
								 | 
							
								  return (a.dist < b.dist);
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								class cluster_result {
							 | 
						||
| 
								 | 
							
								private:
							 | 
						||
| 
								 | 
							
								  auto_array_ptr<node> Z;
							 | 
						||
| 
								 | 
							
								  t_index pos;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								public:
							 | 
						||
| 
								 | 
							
								  cluster_result(const t_index size)
							 | 
						||
| 
								 | 
							
								    : Z(size)
							 | 
						||
| 
								 | 
							
								    , pos(0)
							 | 
						||
| 
								 | 
							
								  {}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  void append(const t_index node1, const t_index node2, const t_float dist) {
							 | 
						||
| 
								 | 
							
								    Z[pos].node1 = node1;
							 | 
						||
| 
								 | 
							
								    Z[pos].node2 = node2;
							 | 
						||
| 
								 | 
							
								    Z[pos].dist  = dist;
							 | 
						||
| 
								 | 
							
								    ++pos;
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  node * operator[] (const t_index idx) const { return Z + idx; }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  /* Define several methods to postprocess the distances. All these functions
							 | 
						||
| 
								 | 
							
								     are monotone, so they do not change the sorted order of distances. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  void sqrt() const {
							 | 
						||
| 
								 | 
							
								    for (node * ZZ=Z; ZZ!=Z+pos; ++ZZ) {
							 | 
						||
| 
								 | 
							
								      ZZ->dist = std::sqrt(ZZ->dist);
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  void sqrt(const t_float) const { // ignore the argument
							 | 
						||
| 
								 | 
							
								    sqrt();
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  void sqrtdouble(const t_float) const { // ignore the argument
							 | 
						||
| 
								 | 
							
								    for (node * ZZ=Z; ZZ!=Z+pos; ++ZZ) {
							 | 
						||
| 
								 | 
							
								      ZZ->dist = std::sqrt(2*ZZ->dist);
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  #ifdef R_pow
							 | 
						||
| 
								 | 
							
								  #define my_pow R_pow
							 | 
						||
| 
								 | 
							
								  #else
							 | 
						||
| 
								 | 
							
								  #define my_pow std::pow
							 | 
						||
| 
								 | 
							
								  #endif
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  void power(const t_float p) const {
							 | 
						||
| 
								 | 
							
								    t_float const q = 1/p;
							 | 
						||
| 
								 | 
							
								    for (node * ZZ=Z; ZZ!=Z+pos; ++ZZ) {
							 | 
						||
| 
								 | 
							
								      ZZ->dist = my_pow(ZZ->dist,q);
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  void plusone(const t_float) const { // ignore the argument
							 | 
						||
| 
								 | 
							
								    for (node * ZZ=Z; ZZ!=Z+pos; ++ZZ) {
							 | 
						||
| 
								 | 
							
								      ZZ->dist += 1;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  void divide(const t_float denom) const {
							 | 
						||
| 
								 | 
							
								    for (node * ZZ=Z; ZZ!=Z+pos; ++ZZ) {
							 | 
						||
| 
								 | 
							
								      ZZ->dist /= denom;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								class doubly_linked_list {
							 | 
						||
| 
								 | 
							
								  /*
							 | 
						||
| 
								 | 
							
								    Class for a doubly linked list. Initially, the list is the integer range
							 | 
						||
| 
								 | 
							
								    [0, size]. We provide a forward iterator and a method to delete an index
							 | 
						||
| 
								 | 
							
								    from the list.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    Typical use: for (i=L.start; L<size; i=L.succ[I])
							 | 
						||
| 
								 | 
							
								    or
							 | 
						||
| 
								 | 
							
								    for (i=somevalue; L<size; i=L.succ[I])
							 | 
						||
| 
								 | 
							
								  */
							 | 
						||
| 
								 | 
							
								public:
							 | 
						||
| 
								 | 
							
								  t_index start;
							 | 
						||
| 
								 | 
							
								  auto_array_ptr<t_index> succ;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								private:
							 | 
						||
| 
								 | 
							
								  auto_array_ptr<t_index> pred;
							 | 
						||
| 
								 | 
							
								  // Not necessarily private, we just do not need it in this instance.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								public:
							 | 
						||
| 
								 | 
							
								  doubly_linked_list(const t_index size)
							 | 
						||
| 
								 | 
							
								    // Initialize to the given size.
							 | 
						||
| 
								 | 
							
								    : start(0)
							 | 
						||
| 
								 | 
							
								    , succ(size+1)
							 | 
						||
| 
								 | 
							
								    , pred(size+1)
							 | 
						||
| 
								 | 
							
								  {
							 | 
						||
| 
								 | 
							
								    for (t_index i=0; i<size; ++i) {
							 | 
						||
| 
								 | 
							
								      pred[i+1] = i;
							 | 
						||
| 
								 | 
							
								      succ[i] = i+1;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    // pred[0] is never accessed!
							 | 
						||
| 
								 | 
							
								    //succ[size] is never accessed!
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  ~doubly_linked_list() {}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  void remove(const t_index idx) {
							 | 
						||
| 
								 | 
							
								    // Remove an index from the list.
							 | 
						||
| 
								 | 
							
								    if (idx==start) {
							 | 
						||
| 
								 | 
							
								      start = succ[idx];
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    else {
							 | 
						||
| 
								 | 
							
								      succ[pred[idx]] = succ[idx];
							 | 
						||
| 
								 | 
							
								      pred[succ[idx]] = pred[idx];
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    succ[idx] = 0; // Mark as inactive
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  bool is_inactive(t_index idx) const {
							 | 
						||
| 
								 | 
							
								    return (succ[idx]==0);
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// Indexing functions
							 | 
						||
| 
								 | 
							
								// D is the upper triangular part of a symmetric (NxN)-matrix
							 | 
						||
| 
								 | 
							
								// We require r_ < c_ !
							 | 
						||
| 
								 | 
							
								#define D_(r_,c_) ( D[(static_cast<std::ptrdiff_t>(2*N-3-(r_))*(r_)>>1)+(c_)-1] )
							 | 
						||
| 
								 | 
							
								// Z is an ((N-1)x4)-array
							 | 
						||
| 
								 | 
							
								#define Z_(_r, _c) (Z[(_r)*4 + (_c)])
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/*
							 | 
						||
| 
								 | 
							
								  Lookup function for a union-find data structure.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  The function finds the root of idx by going iteratively through all
							 | 
						||
| 
								 | 
							
								  parent elements until a root is found. An element i is a root if
							 | 
						||
| 
								 | 
							
								  nodes[i] is zero. To make subsequent searches faster, the entry for
							 | 
						||
| 
								 | 
							
								  idx and all its parents is updated with the root element.
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
								class union_find {
							 | 
						||
| 
								 | 
							
								private:
							 | 
						||
| 
								 | 
							
								  auto_array_ptr<t_index> parent;
							 | 
						||
| 
								 | 
							
								  t_index nextparent;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								public:
							 | 
						||
| 
								 | 
							
								  union_find(const t_index size)
							 | 
						||
| 
								 | 
							
								    : parent(size>0 ? 2*size-1 : 0, 0)
							 | 
						||
| 
								 | 
							
								    , nextparent(size)
							 | 
						||
| 
								 | 
							
								  { }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  t_index Find (t_index idx) const {
							 | 
						||
| 
								 | 
							
								    if (parent[idx] != 0 ) { // a → b
							 | 
						||
| 
								 | 
							
								      t_index p = idx;
							 | 
						||
| 
								 | 
							
								      idx = parent[idx];
							 | 
						||
| 
								 | 
							
								      if (parent[idx] != 0 ) { // a → b → c
							 | 
						||
| 
								 | 
							
								        do {
							 | 
						||
| 
								 | 
							
								          idx = parent[idx];
							 | 
						||
| 
								 | 
							
								        } while (parent[idx] != 0);
							 | 
						||
| 
								 | 
							
								        do {
							 | 
						||
| 
								 | 
							
								          t_index tmp = parent[p];
							 | 
						||
| 
								 | 
							
								          parent[p] = idx;
							 | 
						||
| 
								 | 
							
								          p = tmp;
							 | 
						||
| 
								 | 
							
								        } while (parent[p] != idx);
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    return idx;
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  void Union (const t_index node1, const t_index node2) {
							 | 
						||
| 
								 | 
							
								    parent[node1] = parent[node2] = nextparent++;
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								class nan_error{};
							 | 
						||
| 
								 | 
							
								#ifdef FE_INVALID
							 | 
						||
| 
								 | 
							
								class fenv_error{};
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								static void MST_linkage_core(const t_index N, const t_float * const D,
							 | 
						||
| 
								 | 
							
								                             cluster_result & Z2) {
							 | 
						||
| 
								 | 
							
								/*
							 | 
						||
| 
								 | 
							
								    N: integer, number of data points
							 | 
						||
| 
								 | 
							
								    D: condensed distance matrix N*(N-1)/2
							 | 
						||
| 
								 | 
							
								    Z2: output data structure
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    The basis of this algorithm is an algorithm by Rohlf:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    F. James Rohlf, Hierarchical clustering using the minimum spanning tree,
							 | 
						||
| 
								 | 
							
								    The Computer Journal, vol. 16, 1973, p. 93–95.
							 | 
						||
| 
								 | 
							
								*/
							 | 
						||
| 
								 | 
							
								  t_index i;
							 | 
						||
| 
								 | 
							
								  t_index idx2;
							 | 
						||
| 
								 | 
							
								  doubly_linked_list active_nodes(N);
							 | 
						||
| 
								 | 
							
								  auto_array_ptr<t_float> d(N);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  t_index prev_node;
							 | 
						||
| 
								 | 
							
								  t_float min;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  // first iteration
							 | 
						||
| 
								 | 
							
								  idx2 = 1;
							 | 
						||
| 
								 | 
							
								  min = std::numeric_limits<t_float>::infinity();
							 | 
						||
| 
								 | 
							
								  for (i=1; i<N; ++i) {
							 | 
						||
| 
								 | 
							
								    d[i] = D[i-1];
							 | 
						||
| 
								 | 
							
								#if HAVE_DIAGNOSTIC
							 | 
						||
| 
								 | 
							
								#pragma GCC diagnostic push
							 | 
						||
| 
								 | 
							
								#pragma GCC diagnostic ignored "-Wfloat-equal"
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								    if (d[i] < min) {
							 | 
						||
| 
								 | 
							
								      min = d[i];
							 | 
						||
| 
								 | 
							
								      idx2 = i;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    else if (fc_isnan(d[i]))
							 | 
						||
| 
								 | 
							
								      throw (nan_error());
							 | 
						||
| 
								 | 
							
								#if HAVE_DIAGNOSTIC
							 | 
						||
| 
								 | 
							
								#pragma GCC diagnostic pop
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								  Z2.append(0, idx2, min);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  for (t_index j=1; j<N-1; ++j) {
							 | 
						||
| 
								 | 
							
								    prev_node = idx2;
							 | 
						||
| 
								 | 
							
								    active_nodes.remove(prev_node);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    idx2 = active_nodes.succ[0];
							 | 
						||
| 
								 | 
							
								    min = d[idx2];
							 | 
						||
| 
								 | 
							
								    for (i=idx2; i<prev_node; i=active_nodes.succ[i]) {
							 | 
						||
| 
								 | 
							
								      t_float tmp = D_(i, prev_node);
							 | 
						||
| 
								 | 
							
								#if HAVE_DIAGNOSTIC
							 | 
						||
| 
								 | 
							
								#pragma GCC diagnostic push
							 | 
						||
| 
								 | 
							
								#pragma GCC diagnostic ignored "-Wfloat-equal"
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								      if (tmp < d[i])
							 | 
						||
| 
								 | 
							
								        d[i] = tmp;
							 | 
						||
| 
								 | 
							
								      else if (fc_isnan(tmp))
							 | 
						||
| 
								 | 
							
								        throw (nan_error());
							 | 
						||
| 
								 | 
							
								#if HAVE_DIAGNOSTIC
							 | 
						||
| 
								 | 
							
								#pragma GCC diagnostic pop
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								      if (d[i] < min) {
							 | 
						||
| 
								 | 
							
								        min = d[i];
							 | 
						||
| 
								 | 
							
								        idx2 = i;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    for (; i<N; i=active_nodes.succ[i]) {
							 | 
						||
| 
								 | 
							
								      t_float tmp = D_(prev_node, i);
							 | 
						||
| 
								 | 
							
								#if HAVE_DIAGNOSTIC
							 | 
						||
| 
								 | 
							
								#pragma GCC diagnostic push
							 | 
						||
| 
								 | 
							
								#pragma GCC diagnostic ignored "-Wfloat-equal"
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								      if (d[i] > tmp)
							 | 
						||
| 
								 | 
							
								        d[i] = tmp;
							 | 
						||
| 
								 | 
							
								      else if (fc_isnan(tmp))
							 | 
						||
| 
								 | 
							
								        throw (nan_error());
							 | 
						||
| 
								 | 
							
								#if HAVE_DIAGNOSTIC
							 | 
						||
| 
								 | 
							
								#pragma GCC diagnostic pop
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								      if (d[i] < min) {
							 | 
						||
| 
								 | 
							
								        min = d[i];
							 | 
						||
| 
								 | 
							
								        idx2 = i;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    Z2.append(prev_node, idx2, min);
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* Functions for the update of the dissimilarity array */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								inline static void f_single( t_float * const b, const t_float a ) {
							 | 
						||
| 
								 | 
							
								  if (*b > a) *b = a;
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								inline static void f_complete( t_float * const b, const t_float a ) {
							 | 
						||
| 
								 | 
							
								  if (*b < a) *b = a;
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								inline static void f_average( t_float * const b, const t_float a, const t_float s, const t_float t) {
							 | 
						||
| 
								 | 
							
								  *b = s*a + t*(*b);
							 | 
						||
| 
								 | 
							
								  #ifndef FE_INVALID
							 | 
						||
| 
								 | 
							
								#if HAVE_DIAGNOSTIC
							 | 
						||
| 
								 | 
							
								#pragma GCC diagnostic push
							 | 
						||
| 
								 | 
							
								#pragma GCC diagnostic ignored "-Wfloat-equal"
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								  if (fc_isnan(*b)) {
							 | 
						||
| 
								 | 
							
								    throw(nan_error());
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								#if HAVE_DIAGNOSTIC
							 | 
						||
| 
								 | 
							
								#pragma GCC diagnostic pop
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								  #endif
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								inline static void f_weighted( t_float * const b, const t_float a) {
							 | 
						||
| 
								 | 
							
								  *b = (a+*b)*.5;
							 | 
						||
| 
								 | 
							
								  #ifndef FE_INVALID
							 | 
						||
| 
								 | 
							
								#if HAVE_DIAGNOSTIC
							 | 
						||
| 
								 | 
							
								#pragma GCC diagnostic push
							 | 
						||
| 
								 | 
							
								#pragma GCC diagnostic ignored "-Wfloat-equal"
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								  if (fc_isnan(*b)) {
							 | 
						||
| 
								 | 
							
								    throw(nan_error());
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								#if HAVE_DIAGNOSTIC
							 | 
						||
| 
								 | 
							
								#pragma GCC diagnostic pop
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								  #endif
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								inline static void f_ward( t_float * const b, const t_float a, const t_float c, const t_float s, const t_float t, const t_float v) {
							 | 
						||
| 
								 | 
							
								  *b = ( (v+s)*a - v*c + (v+t)*(*b) ) / (s+t+v);
							 | 
						||
| 
								 | 
							
								  //*b = a+(*b)-(t*a+s*(*b)+v*c)/(s+t+v);
							 | 
						||
| 
								 | 
							
								  #ifndef FE_INVALID
							 | 
						||
| 
								 | 
							
								#if HAVE_DIAGNOSTIC
							 | 
						||
| 
								 | 
							
								#pragma GCC diagnostic push
							 | 
						||
| 
								 | 
							
								#pragma GCC diagnostic ignored "-Wfloat-equal"
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								  if (fc_isnan(*b)) {
							 | 
						||
| 
								 | 
							
								    throw(nan_error());
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								#if HAVE_DIAGNOSTIC
							 | 
						||
| 
								 | 
							
								#pragma GCC diagnostic pop
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								  #endif
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								inline static void f_centroid( t_float * const b, const t_float a, const t_float stc, const t_float s, const t_float t) {
							 | 
						||
| 
								 | 
							
								  *b = s*a - stc + t*(*b);
							 | 
						||
| 
								 | 
							
								  #ifndef FE_INVALID
							 | 
						||
| 
								 | 
							
								  if (fc_isnan(*b)) {
							 | 
						||
| 
								 | 
							
								    throw(nan_error());
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								#if HAVE_DIAGNOSTIC
							 | 
						||
| 
								 | 
							
								#pragma GCC diagnostic pop
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								  #endif
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								inline static void f_median( t_float * const b, const t_float a, const t_float c_4) {
							 | 
						||
| 
								 | 
							
								  *b = (a+(*b))*.5 - c_4;
							 | 
						||
| 
								 | 
							
								  #ifndef FE_INVALID
							 | 
						||
| 
								 | 
							
								#if HAVE_DIAGNOSTIC
							 | 
						||
| 
								 | 
							
								#pragma GCC diagnostic push
							 | 
						||
| 
								 | 
							
								#pragma GCC diagnostic ignored "-Wfloat-equal"
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								  if (fc_isnan(*b)) {
							 | 
						||
| 
								 | 
							
								    throw(nan_error());
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								#if HAVE_DIAGNOSTIC
							 | 
						||
| 
								 | 
							
								#pragma GCC diagnostic pop
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								  #endif
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								template <method_codes method, typename t_members>
							 | 
						||
| 
								 | 
							
								static void NN_chain_core(const t_index N, t_float * const D, t_members * const members, cluster_result & Z2) {
							 | 
						||
| 
								 | 
							
								/*
							 | 
						||
| 
								 | 
							
								    N: integer
							 | 
						||
| 
								 | 
							
								    D: condensed distance matrix N*(N-1)/2
							 | 
						||
| 
								 | 
							
								    Z2: output data structure
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    This is the NN-chain algorithm, described on page 86 in the following book:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    Fionn Murtagh, Multidimensional Clustering Algorithms,
							 | 
						||
| 
								 | 
							
								    Vienna, Würzburg: Physica-Verlag, 1985.
							 | 
						||
| 
								 | 
							
								*/
							 | 
						||
| 
								 | 
							
								  t_index i;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  auto_array_ptr<t_index> NN_chain(N);
							 | 
						||
| 
								 | 
							
								  t_index NN_chain_tip = 0;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  t_index idx1, idx2;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  t_float size1, size2;
							 | 
						||
| 
								 | 
							
								  doubly_linked_list active_nodes(N);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  t_float min;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  for (t_float const * DD=D; DD!=D+(static_cast<std::ptrdiff_t>(N)*(N-1)>>1);
							 | 
						||
| 
								 | 
							
								       ++DD) {
							 | 
						||
| 
								 | 
							
								#if HAVE_DIAGNOSTIC
							 | 
						||
| 
								 | 
							
								#pragma GCC diagnostic push
							 | 
						||
| 
								 | 
							
								#pragma GCC diagnostic ignored "-Wfloat-equal"
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								    if (fc_isnan(*DD)) {
							 | 
						||
| 
								 | 
							
								      throw(nan_error());
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								#if HAVE_DIAGNOSTIC
							 | 
						||
| 
								 | 
							
								#pragma GCC diagnostic pop
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  #ifdef FE_INVALID
							 | 
						||
| 
								 | 
							
								  if (feclearexcept(FE_INVALID)) throw fenv_error();
							 | 
						||
| 
								 | 
							
								  #endif
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  for (t_index j=0; j<N-1; ++j) {
							 | 
						||
| 
								 | 
							
								    if (NN_chain_tip <= 3) {
							 | 
						||
| 
								 | 
							
								      NN_chain[0] = idx1 = active_nodes.start;
							 | 
						||
| 
								 | 
							
								      NN_chain_tip = 1;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      idx2 = active_nodes.succ[idx1];
							 | 
						||
| 
								 | 
							
								      min = D_(idx1,idx2);
							 | 
						||
| 
								 | 
							
								      for (i=active_nodes.succ[idx2]; i<N; i=active_nodes.succ[i]) {
							 | 
						||
| 
								 | 
							
								        if (D_(idx1,i) < min) {
							 | 
						||
| 
								 | 
							
								          min = D_(idx1,i);
							 | 
						||
| 
								 | 
							
								          idx2 = i;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								    }  // a: idx1   b: idx2
							 | 
						||
| 
								 | 
							
								    else {
							 | 
						||
| 
								 | 
							
								      NN_chain_tip -= 3;
							 | 
						||
| 
								 | 
							
								      idx1 = NN_chain[NN_chain_tip-1];
							 | 
						||
| 
								 | 
							
								      idx2 = NN_chain[NN_chain_tip];
							 | 
						||
| 
								 | 
							
								      min = idx1<idx2 ? D_(idx1,idx2) : D_(idx2,idx1);
							 | 
						||
| 
								 | 
							
								    }  // a: idx1   b: idx2
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    do {
							 | 
						||
| 
								 | 
							
								      NN_chain[NN_chain_tip] = idx2;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      for (i=active_nodes.start; i<idx2; i=active_nodes.succ[i]) {
							 | 
						||
| 
								 | 
							
								        if (D_(i,idx2) < min) {
							 | 
						||
| 
								 | 
							
								          min = D_(i,idx2);
							 | 
						||
| 
								 | 
							
								          idx1 = i;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								      for (i=active_nodes.succ[idx2]; i<N; i=active_nodes.succ[i]) {
							 | 
						||
| 
								 | 
							
								        if (D_(idx2,i) < min) {
							 | 
						||
| 
								 | 
							
								          min = D_(idx2,i);
							 | 
						||
| 
								 | 
							
								          idx1 = i;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      idx2 = idx1;
							 | 
						||
| 
								 | 
							
								      idx1 = NN_chain[NN_chain_tip++];
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    } while (idx2 != NN_chain[NN_chain_tip-2]);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    Z2.append(idx1, idx2, min);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    if (idx1>idx2) {
							 | 
						||
| 
								 | 
							
								      t_index tmp = idx1;
							 | 
						||
| 
								 | 
							
								      idx1 = idx2;
							 | 
						||
| 
								 | 
							
								      idx2 = tmp;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    if (method==METHOD_METR_AVERAGE ||
							 | 
						||
| 
								 | 
							
								        method==METHOD_METR_WARD) {
							 | 
						||
| 
								 | 
							
								      size1 = static_cast<t_float>(members[idx1]);
							 | 
						||
| 
								 | 
							
								      size2 = static_cast<t_float>(members[idx2]);
							 | 
						||
| 
								 | 
							
								      members[idx2] += members[idx1];
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    // Remove the smaller index from the valid indices (active_nodes).
							 | 
						||
| 
								 | 
							
								    active_nodes.remove(idx1);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    switch (method) {
							 | 
						||
| 
								 | 
							
								    case METHOD_METR_SINGLE:
							 | 
						||
| 
								 | 
							
								      /*
							 | 
						||
| 
								 | 
							
								      Single linkage.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      Characteristic: new distances are never longer than the old distances.
							 | 
						||
| 
								 | 
							
								      */
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range [start, idx1).
							 | 
						||
| 
								 | 
							
								      for (i=active_nodes.start; i<idx1; i=active_nodes.succ[i])
							 | 
						||
| 
								 | 
							
								        f_single(&D_(i, idx2), D_(i, idx1) );
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range (idx1, idx2).
							 | 
						||
| 
								 | 
							
								      for (; i<idx2; i=active_nodes.succ[i])
							 | 
						||
| 
								 | 
							
								        f_single(&D_(i, idx2), D_(idx1, i) );
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range (idx2, N).
							 | 
						||
| 
								 | 
							
								      for (i=active_nodes.succ[idx2]; i<N; i=active_nodes.succ[i])
							 | 
						||
| 
								 | 
							
								        f_single(&D_(idx2, i), D_(idx1, i) );
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    case METHOD_METR_COMPLETE:
							 | 
						||
| 
								 | 
							
								      /*
							 | 
						||
| 
								 | 
							
								      Complete linkage.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      Characteristic: new distances are never shorter than the old distances.
							 | 
						||
| 
								 | 
							
								      */
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range [start, idx1).
							 | 
						||
| 
								 | 
							
								      for (i=active_nodes.start; i<idx1; i=active_nodes.succ[i])
							 | 
						||
| 
								 | 
							
								        f_complete(&D_(i, idx2), D_(i, idx1) );
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range (idx1, idx2).
							 | 
						||
| 
								 | 
							
								      for (; i<idx2; i=active_nodes.succ[i])
							 | 
						||
| 
								 | 
							
								        f_complete(&D_(i, idx2), D_(idx1, i) );
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range (idx2, N).
							 | 
						||
| 
								 | 
							
								      for (i=active_nodes.succ[idx2]; i<N; i=active_nodes.succ[i])
							 | 
						||
| 
								 | 
							
								        f_complete(&D_(idx2, i), D_(idx1, i) );
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    case METHOD_METR_AVERAGE: {
							 | 
						||
| 
								 | 
							
								      /*
							 | 
						||
| 
								 | 
							
								      Average linkage.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      Shorter and longer distances can occur.
							 | 
						||
| 
								 | 
							
								      */
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range [start, idx1).
							 | 
						||
| 
								 | 
							
								      t_float s = size1/(size1+size2);
							 | 
						||
| 
								 | 
							
								      t_float t = size2/(size1+size2);
							 | 
						||
| 
								 | 
							
								      for (i=active_nodes.start; i<idx1; i=active_nodes.succ[i])
							 | 
						||
| 
								 | 
							
								        f_average(&D_(i, idx2), D_(i, idx1), s, t );
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range (idx1, idx2).
							 | 
						||
| 
								 | 
							
								      for (; i<idx2; i=active_nodes.succ[i])
							 | 
						||
| 
								 | 
							
								        f_average(&D_(i, idx2), D_(idx1, i), s, t );
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range (idx2, N).
							 | 
						||
| 
								 | 
							
								      for (i=active_nodes.succ[idx2]; i<N; i=active_nodes.succ[i])
							 | 
						||
| 
								 | 
							
								        f_average(&D_(idx2, i), D_(idx1, i), s, t );
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    case METHOD_METR_WEIGHTED:
							 | 
						||
| 
								 | 
							
								      /*
							 | 
						||
| 
								 | 
							
								      Weighted linkage.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      Shorter and longer distances can occur.
							 | 
						||
| 
								 | 
							
								      */
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range [start, idx1).
							 | 
						||
| 
								 | 
							
								      for (i=active_nodes.start; i<idx1; i=active_nodes.succ[i])
							 | 
						||
| 
								 | 
							
								        f_weighted(&D_(i, idx2), D_(i, idx1) );
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range (idx1, idx2).
							 | 
						||
| 
								 | 
							
								      for (; i<idx2; i=active_nodes.succ[i])
							 | 
						||
| 
								 | 
							
								        f_weighted(&D_(i, idx2), D_(idx1, i) );
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range (idx2, N).
							 | 
						||
| 
								 | 
							
								      for (i=active_nodes.succ[idx2]; i<N; i=active_nodes.succ[i])
							 | 
						||
| 
								 | 
							
								        f_weighted(&D_(idx2, i), D_(idx1, i) );
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    case METHOD_METR_WARD:
							 | 
						||
| 
								 | 
							
								      /*
							 | 
						||
| 
								 | 
							
								      Ward linkage.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      Shorter and longer distances can occur, not smaller than min(d1,d2)
							 | 
						||
| 
								 | 
							
								      but maybe bigger than max(d1,d2).
							 | 
						||
| 
								 | 
							
								      */
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range [start, idx1).
							 | 
						||
| 
								 | 
							
								      //t_float v = static_cast<t_float>(members[i]);
							 | 
						||
| 
								 | 
							
								      for (i=active_nodes.start; i<idx1; i=active_nodes.succ[i])
							 | 
						||
| 
								 | 
							
								        f_ward(&D_(i, idx2), D_(i, idx1), min,
							 | 
						||
| 
								 | 
							
								               size1, size2, static_cast<t_float>(members[i]) );
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range (idx1, idx2).
							 | 
						||
| 
								 | 
							
								      for (; i<idx2; i=active_nodes.succ[i])
							 | 
						||
| 
								 | 
							
								        f_ward(&D_(i, idx2), D_(idx1, i), min,
							 | 
						||
| 
								 | 
							
								               size1, size2, static_cast<t_float>(members[i]) );
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range (idx2, N).
							 | 
						||
| 
								 | 
							
								      for (i=active_nodes.succ[idx2]; i<N; i=active_nodes.succ[i])
							 | 
						||
| 
								 | 
							
								        f_ward(&D_(idx2, i), D_(idx1, i), min,
							 | 
						||
| 
								 | 
							
								               size1, size2, static_cast<t_float>(members[i]) );
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    default:
							 | 
						||
| 
								 | 
							
								      throw std::runtime_error(std::string("Invalid method."));
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								  #ifdef FE_INVALID
							 | 
						||
| 
								 | 
							
								  if (fetestexcept(FE_INVALID)) throw fenv_error();
							 | 
						||
| 
								 | 
							
								  #endif
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								class binary_min_heap {
							 | 
						||
| 
								 | 
							
								  /*
							 | 
						||
| 
								 | 
							
								  Class for a binary min-heap. The data resides in an array A. The elements of
							 | 
						||
| 
								 | 
							
								  A are not changed but two lists I and R of indices are generated which point
							 | 
						||
| 
								 | 
							
								  to elements of A and backwards.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  The heap tree structure is
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								     H[2*i+1]     H[2*i+2]
							 | 
						||
| 
								 | 
							
								         \            /
							 | 
						||
| 
								 | 
							
								          \          /
							 | 
						||
| 
								 | 
							
								           ≤        ≤
							 | 
						||
| 
								 | 
							
								            \      /
							 | 
						||
| 
								 | 
							
								             \    /
							 | 
						||
| 
								 | 
							
								              H[i]
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  where the children must be less or equal than their parent. Thus, H[0]
							 | 
						||
| 
								 | 
							
								  contains the minimum. The lists I and R are made such that H[i] = A[I[i]]
							 | 
						||
| 
								 | 
							
								  and R[I[i]] = i.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  This implementation is not designed to handle NaN values.
							 | 
						||
| 
								 | 
							
								  */
							 | 
						||
| 
								 | 
							
								private:
							 | 
						||
| 
								 | 
							
								  t_float * const A;
							 | 
						||
| 
								 | 
							
								  t_index size;
							 | 
						||
| 
								 | 
							
								  auto_array_ptr<t_index> I;
							 | 
						||
| 
								 | 
							
								  auto_array_ptr<t_index> R;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  // no default constructor
							 | 
						||
| 
								 | 
							
								  binary_min_heap();
							 | 
						||
| 
								 | 
							
								  // noncopyable
							 | 
						||
| 
								 | 
							
								  binary_min_heap(binary_min_heap const &);
							 | 
						||
| 
								 | 
							
								  binary_min_heap & operator=(binary_min_heap const &);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								public:
							 | 
						||
| 
								 | 
							
								  binary_min_heap(t_float * const A_, const t_index size_)
							 | 
						||
| 
								 | 
							
								    : A(A_), size(size_), I(size), R(size)
							 | 
						||
| 
								 | 
							
								  { // Allocate memory and initialize the lists I and R to the identity. This
							 | 
						||
| 
								 | 
							
								    // does not make it a heap. Call heapify afterwards!
							 | 
						||
| 
								 | 
							
								    for (t_index i=0; i<size; ++i)
							 | 
						||
| 
								 | 
							
								      R[i] = I[i] = i;
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  binary_min_heap(t_float * const A_, const t_index size1, const t_index size2,
							 | 
						||
| 
								 | 
							
								                  const t_index start)
							 | 
						||
| 
								 | 
							
								    : A(A_), size(size1), I(size1), R(size2)
							 | 
						||
| 
								 | 
							
								  { // Allocate memory and initialize the lists I and R to the identity. This
							 | 
						||
| 
								 | 
							
								    // does not make it a heap. Call heapify afterwards!
							 | 
						||
| 
								 | 
							
								    for (t_index i=0; i<size; ++i) {
							 | 
						||
| 
								 | 
							
								      R[i+start] = i;
							 | 
						||
| 
								 | 
							
								      I[i] = i + start;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  ~binary_min_heap() {}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  void heapify() {
							 | 
						||
| 
								 | 
							
								    // Arrange the indices I and R so that H[i] := A[I[i]] satisfies the heap
							 | 
						||
| 
								 | 
							
								    // condition H[i] < H[2*i+1] and H[i] < H[2*i+2] for each i.
							 | 
						||
| 
								 | 
							
								    //
							 | 
						||
| 
								 | 
							
								    // Complexity: Θ(size)
							 | 
						||
| 
								 | 
							
								    // Reference: Cormen, Leiserson, Rivest, Stein, Introduction to Algorithms,
							 | 
						||
| 
								 | 
							
								    // 3rd ed., 2009, Section 6.3 “Building a heap”
							 | 
						||
| 
								 | 
							
								    t_index idx;
							 | 
						||
| 
								 | 
							
								    for (idx=(size>>1); idx>0; ) {
							 | 
						||
| 
								 | 
							
								      --idx;
							 | 
						||
| 
								 | 
							
								      update_geq_(idx);
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  inline t_index argmin() const {
							 | 
						||
| 
								 | 
							
								    // Return the minimal element.
							 | 
						||
| 
								 | 
							
								    return I[0];
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  void heap_pop() {
							 | 
						||
| 
								 | 
							
								    // Remove the minimal element from the heap.
							 | 
						||
| 
								 | 
							
								    --size;
							 | 
						||
| 
								 | 
							
								    I[0] = I[size];
							 | 
						||
| 
								 | 
							
								    R[I[0]] = 0;
							 | 
						||
| 
								 | 
							
								    update_geq_(0);
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  void remove(t_index idx) {
							 | 
						||
| 
								 | 
							
								    // Remove an element from the heap.
							 | 
						||
| 
								 | 
							
								    --size;
							 | 
						||
| 
								 | 
							
								    R[I[size]] = R[idx];
							 | 
						||
| 
								 | 
							
								    I[R[idx]] = I[size];
							 | 
						||
| 
								 | 
							
								    if ( H(size)<=A[idx] ) {
							 | 
						||
| 
								 | 
							
								      update_leq_(R[idx]);
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    else {
							 | 
						||
| 
								 | 
							
								      update_geq_(R[idx]);
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  void replace ( const t_index idxold, const t_index idxnew,
							 | 
						||
| 
								 | 
							
								                 const t_float val) {
							 | 
						||
| 
								 | 
							
								    R[idxnew] = R[idxold];
							 | 
						||
| 
								 | 
							
								    I[R[idxnew]] = idxnew;
							 | 
						||
| 
								 | 
							
								    if (val<=A[idxold])
							 | 
						||
| 
								 | 
							
								      update_leq(idxnew, val);
							 | 
						||
| 
								 | 
							
								    else
							 | 
						||
| 
								 | 
							
								      update_geq(idxnew, val);
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  void update ( const t_index idx, const t_float val ) const {
							 | 
						||
| 
								 | 
							
								    // Update the element A[i] with val and re-arrange the indices to preserve
							 | 
						||
| 
								 | 
							
								    // the heap condition.
							 | 
						||
| 
								 | 
							
								    if (val<=A[idx])
							 | 
						||
| 
								 | 
							
								      update_leq(idx, val);
							 | 
						||
| 
								 | 
							
								    else
							 | 
						||
| 
								 | 
							
								      update_geq(idx, val);
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  void update_leq ( const t_index idx, const t_float val ) const {
							 | 
						||
| 
								 | 
							
								    // Use this when the new value is not more than the old value.
							 | 
						||
| 
								 | 
							
								    A[idx] = val;
							 | 
						||
| 
								 | 
							
								    update_leq_(R[idx]);
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  void update_geq ( const t_index idx, const t_float val ) const {
							 | 
						||
| 
								 | 
							
								    // Use this when the new value is not less than the old value.
							 | 
						||
| 
								 | 
							
								    A[idx] = val;
							 | 
						||
| 
								 | 
							
								    update_geq_(R[idx]);
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								private:
							 | 
						||
| 
								 | 
							
								  void update_leq_ (t_index i) const {
							 | 
						||
| 
								 | 
							
								    t_index j;
							 | 
						||
| 
								 | 
							
								    for ( ; (i>0) && ( H(i)<H(j=(i-1)>>1) ); i=j)
							 | 
						||
| 
								 | 
							
								      heap_swap(i,j);
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  void update_geq_ (t_index i) const {
							 | 
						||
| 
								 | 
							
								    t_index j;
							 | 
						||
| 
								 | 
							
								    for ( ; (j=2*i+1)<size; i=j) {
							 | 
						||
| 
								 | 
							
								      if ( H(j)>=H(i) ) {
							 | 
						||
| 
								 | 
							
								        ++j;
							 | 
						||
| 
								 | 
							
								        if ( j>=size || H(j)>=H(i) ) break;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								      else if ( j+1<size && H(j+1)<H(j) ) ++j;
							 | 
						||
| 
								 | 
							
								      heap_swap(i, j);
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  void heap_swap(const t_index i, const t_index j) const {
							 | 
						||
| 
								 | 
							
								    // Swap two indices.
							 | 
						||
| 
								 | 
							
								    t_index tmp = I[i];
							 | 
						||
| 
								 | 
							
								    I[i] = I[j];
							 | 
						||
| 
								 | 
							
								    I[j] = tmp;
							 | 
						||
| 
								 | 
							
								    R[I[i]] = i;
							 | 
						||
| 
								 | 
							
								    R[I[j]] = j;
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  inline t_float H(const t_index i) const {
							 | 
						||
| 
								 | 
							
								    return A[I[i]];
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								template <method_codes method, typename t_members>
							 | 
						||
| 
								 | 
							
								static void generic_linkage(const t_index N, t_float * const D, t_members * const members, cluster_result & Z2) {
							 | 
						||
| 
								 | 
							
								  /*
							 | 
						||
| 
								 | 
							
								    N: integer, number of data points
							 | 
						||
| 
								 | 
							
								    D: condensed distance matrix N*(N-1)/2
							 | 
						||
| 
								 | 
							
								    Z2: output data structure
							 | 
						||
| 
								 | 
							
								  */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  const t_index N_1 = N-1;
							 | 
						||
| 
								 | 
							
								  t_index i, j; // loop variables
							 | 
						||
| 
								 | 
							
								  t_index idx1, idx2; // row and column indices
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  auto_array_ptr<t_index> n_nghbr(N_1); // array of nearest neighbors
							 | 
						||
| 
								 | 
							
								  auto_array_ptr<t_float> mindist(N_1); // distances to the nearest neighbors
							 | 
						||
| 
								 | 
							
								  auto_array_ptr<t_index> row_repr(N); // row_repr[i]: node number that the
							 | 
						||
| 
								 | 
							
								                                       // i-th row represents
							 | 
						||
| 
								 | 
							
								  doubly_linked_list active_nodes(N);
							 | 
						||
| 
								 | 
							
								  binary_min_heap nn_distances(&*mindist, N_1); // minimum heap structure for
							 | 
						||
| 
								 | 
							
								                        // the distance to the nearest neighbor of each point
							 | 
						||
| 
								 | 
							
								  t_index node1, node2; // node numbers in the output
							 | 
						||
| 
								 | 
							
								  t_float size1, size2; // and their cardinalities
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  t_float min; // minimum and row index for nearest-neighbor search
							 | 
						||
| 
								 | 
							
								  t_index idx;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  for (i=0; i<N; ++i)
							 | 
						||
| 
								 | 
							
								    // Build a list of row ↔ node label assignments.
							 | 
						||
| 
								 | 
							
								    // Initially i ↦ i
							 | 
						||
| 
								 | 
							
								    row_repr[i] = i;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  // Initialize the minimal distances:
							 | 
						||
| 
								 | 
							
								  // Find the nearest neighbor of each point.
							 | 
						||
| 
								 | 
							
								  // n_nghbr[i] = argmin_{j>i} D(i,j) for i in range(N-1)
							 | 
						||
| 
								 | 
							
								  t_float const * DD = D;
							 | 
						||
| 
								 | 
							
								  for (i=0; i<N_1; ++i) {
							 | 
						||
| 
								 | 
							
								    min = std::numeric_limits<t_float>::infinity();
							 | 
						||
| 
								 | 
							
								    for (idx=j=i+1; j<N; ++j, ++DD) {
							 | 
						||
| 
								 | 
							
								#if HAVE_DIAGNOSTIC
							 | 
						||
| 
								 | 
							
								#pragma GCC diagnostic push
							 | 
						||
| 
								 | 
							
								#pragma GCC diagnostic ignored "-Wfloat-equal"
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								      if (*DD<min) {
							 | 
						||
| 
								 | 
							
								        min = *DD;
							 | 
						||
| 
								 | 
							
								        idx = j;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								      else if (fc_isnan(*DD))
							 | 
						||
| 
								 | 
							
								        throw(nan_error());
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								#if HAVE_DIAGNOSTIC
							 | 
						||
| 
								 | 
							
								#pragma GCC diagnostic pop
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								    mindist[i] = min;
							 | 
						||
| 
								 | 
							
								    n_nghbr[i] = idx;
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  // Put the minimal distances into a heap structure to make the repeated
							 | 
						||
| 
								 | 
							
								  // global minimum searches fast.
							 | 
						||
| 
								 | 
							
								  nn_distances.heapify();
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  #ifdef FE_INVALID
							 | 
						||
| 
								 | 
							
								  if (feclearexcept(FE_INVALID)) throw fenv_error();
							 | 
						||
| 
								 | 
							
								  #endif
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  // Main loop: We have N-1 merging steps.
							 | 
						||
| 
								 | 
							
								  for (i=0; i<N_1; ++i) {
							 | 
						||
| 
								 | 
							
								    /*
							 | 
						||
| 
								 | 
							
								      Here is a special feature that allows fast bookkeeping and updates of the
							 | 
						||
| 
								 | 
							
								      minimal distances.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      mindist[i] stores a lower bound on the minimum distance of the point i to
							 | 
						||
| 
								 | 
							
								      all points of higher index:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								          mindist[i] ≥ min_{j>i} D(i,j)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      Normally, we have equality. However, this minimum may become invalid due
							 | 
						||
| 
								 | 
							
								      to the updates in the distance matrix. The rules are:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      1) If mindist[i] is equal to D(i, n_nghbr[i]), this is the correct
							 | 
						||
| 
								 | 
							
								         minimum and n_nghbr[i] is a nearest neighbor.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      2) If mindist[i] is smaller than D(i, n_nghbr[i]), this might not be the
							 | 
						||
| 
								 | 
							
								         correct minimum. The minimum needs to be recomputed.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      3) mindist[i] is never bigger than the true minimum. Hence, we never
							 | 
						||
| 
								 | 
							
								         miss the true minimum if we take the smallest mindist entry,
							 | 
						||
| 
								 | 
							
								         re-compute the value if necessary (thus maybe increasing it) and
							 | 
						||
| 
								 | 
							
								         looking for the now smallest mindist entry until a valid minimal
							 | 
						||
| 
								 | 
							
								         entry is found. This step is done in the lines below.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      The update process for D below takes care that these rules are
							 | 
						||
| 
								 | 
							
								      fulfilled. This makes sure that the minima in the rows D(i,i+1:)of D are
							 | 
						||
| 
								 | 
							
								      re-calculated when necessary but re-calculation is avoided whenever
							 | 
						||
| 
								 | 
							
								      possible.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      The re-calculation of the minima makes the worst-case runtime of this
							 | 
						||
| 
								 | 
							
								      algorithm cubic in N. We avoid this whenever possible, and in most cases
							 | 
						||
| 
								 | 
							
								      the runtime appears to be quadratic.
							 | 
						||
| 
								 | 
							
								    */
							 | 
						||
| 
								 | 
							
								    idx1 = nn_distances.argmin();
							 | 
						||
| 
								 | 
							
								    if (method != METHOD_METR_SINGLE) {
							 | 
						||
| 
								 | 
							
								      while ( mindist[idx1] < D_(idx1, n_nghbr[idx1]) ) {
							 | 
						||
| 
								 | 
							
								        // Recompute the minimum mindist[idx1] and n_nghbr[idx1].
							 | 
						||
| 
								 | 
							
								        n_nghbr[idx1] = j = active_nodes.succ[idx1]; // exists, maximally N-1
							 | 
						||
| 
								 | 
							
								        min = D_(idx1,j);
							 | 
						||
| 
								 | 
							
								        for (j=active_nodes.succ[j]; j<N; j=active_nodes.succ[j]) {
							 | 
						||
| 
								 | 
							
								          if (D_(idx1,j)<min) {
							 | 
						||
| 
								 | 
							
								            min = D_(idx1,j);
							 | 
						||
| 
								 | 
							
								            n_nghbr[idx1] = j;
							 | 
						||
| 
								 | 
							
								          }
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        /* Update the heap with the new true minimum and search for the
							 | 
						||
| 
								 | 
							
								           (possibly different) minimal entry. */
							 | 
						||
| 
								 | 
							
								        nn_distances.update_geq(idx1, min);
							 | 
						||
| 
								 | 
							
								        idx1 = nn_distances.argmin();
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    nn_distances.heap_pop(); // Remove the current minimum from the heap.
							 | 
						||
| 
								 | 
							
								    idx2 = n_nghbr[idx1];
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    // Write the newly found minimal pair of nodes to the output array.
							 | 
						||
| 
								 | 
							
								    node1 = row_repr[idx1];
							 | 
						||
| 
								 | 
							
								    node2 = row_repr[idx2];
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    if (method==METHOD_METR_AVERAGE ||
							 | 
						||
| 
								 | 
							
								        method==METHOD_METR_WARD ||
							 | 
						||
| 
								 | 
							
								        method==METHOD_METR_CENTROID) {
							 | 
						||
| 
								 | 
							
								      size1 = static_cast<t_float>(members[idx1]);
							 | 
						||
| 
								 | 
							
								      size2 = static_cast<t_float>(members[idx2]);
							 | 
						||
| 
								 | 
							
								      members[idx2] += members[idx1];
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    Z2.append(node1, node2, mindist[idx1]);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    // Remove idx1 from the list of active indices (active_nodes).
							 | 
						||
| 
								 | 
							
								    active_nodes.remove(idx1);
							 | 
						||
| 
								 | 
							
								    // Index idx2 now represents the new (merged) node with label N+i.
							 | 
						||
| 
								 | 
							
								    row_repr[idx2] = N+i;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    // Update the distance matrix
							 | 
						||
| 
								 | 
							
								    switch (method) {
							 | 
						||
| 
								 | 
							
								    case METHOD_METR_SINGLE:
							 | 
						||
| 
								 | 
							
								      /*
							 | 
						||
| 
								 | 
							
								        Single linkage.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        Characteristic: new distances are never longer than the old distances.
							 | 
						||
| 
								 | 
							
								      */
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range [start, idx1).
							 | 
						||
| 
								 | 
							
								      for (j=active_nodes.start; j<idx1; j=active_nodes.succ[j]) {
							 | 
						||
| 
								 | 
							
								        f_single(&D_(j, idx2), D_(j, idx1));
							 | 
						||
| 
								 | 
							
								        if (n_nghbr[j] == idx1)
							 | 
						||
| 
								 | 
							
								          n_nghbr[j] = idx2;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range (idx1, idx2).
							 | 
						||
| 
								 | 
							
								      for (; j<idx2; j=active_nodes.succ[j]) {
							 | 
						||
| 
								 | 
							
								        f_single(&D_(j, idx2), D_(idx1, j));
							 | 
						||
| 
								 | 
							
								        // If the new value is below the old minimum in a row, update
							 | 
						||
| 
								 | 
							
								        // the mindist and n_nghbr arrays.
							 | 
						||
| 
								 | 
							
								        if (D_(j, idx2) < mindist[j]) {
							 | 
						||
| 
								 | 
							
								          nn_distances.update_leq(j, D_(j, idx2));
							 | 
						||
| 
								 | 
							
								          n_nghbr[j] = idx2;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range (idx2, N).
							 | 
						||
| 
								 | 
							
								      // Recompute the minimum mindist[idx2] and n_nghbr[idx2].
							 | 
						||
| 
								 | 
							
								      if (idx2<N_1) {
							 | 
						||
| 
								 | 
							
								        min = mindist[idx2];
							 | 
						||
| 
								 | 
							
								        for (j=active_nodes.succ[idx2]; j<N; j=active_nodes.succ[j]) {
							 | 
						||
| 
								 | 
							
								          f_single(&D_(idx2, j), D_(idx1, j) );
							 | 
						||
| 
								 | 
							
								          if (D_(idx2, j) < min) {
							 | 
						||
| 
								 | 
							
								            n_nghbr[idx2] = j;
							 | 
						||
| 
								 | 
							
								            min = D_(idx2, j);
							 | 
						||
| 
								 | 
							
								          }
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        nn_distances.update_leq(idx2, min);
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    case METHOD_METR_COMPLETE:
							 | 
						||
| 
								 | 
							
								      /*
							 | 
						||
| 
								 | 
							
								        Complete linkage.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        Characteristic: new distances are never shorter than the old distances.
							 | 
						||
| 
								 | 
							
								      */
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range [start, idx1).
							 | 
						||
| 
								 | 
							
								      for (j=active_nodes.start; j<idx1; j=active_nodes.succ[j]) {
							 | 
						||
| 
								 | 
							
								        f_complete(&D_(j, idx2), D_(j, idx1) );
							 | 
						||
| 
								 | 
							
								        if (n_nghbr[j] == idx1)
							 | 
						||
| 
								 | 
							
								          n_nghbr[j] = idx2;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range (idx1, idx2).
							 | 
						||
| 
								 | 
							
								      for (; j<idx2; j=active_nodes.succ[j])
							 | 
						||
| 
								 | 
							
								        f_complete(&D_(j, idx2), D_(idx1, j) );
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range (idx2, N).
							 | 
						||
| 
								 | 
							
								      for (j=active_nodes.succ[idx2]; j<N; j=active_nodes.succ[j])
							 | 
						||
| 
								 | 
							
								        f_complete(&D_(idx2, j), D_(idx1, j) );
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    case METHOD_METR_AVERAGE: {
							 | 
						||
| 
								 | 
							
								      /*
							 | 
						||
| 
								 | 
							
								        Average linkage.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        Shorter and longer distances can occur.
							 | 
						||
| 
								 | 
							
								      */
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range [start, idx1).
							 | 
						||
| 
								 | 
							
								      t_float s = size1/(size1+size2);
							 | 
						||
| 
								 | 
							
								      t_float t = size2/(size1+size2);
							 | 
						||
| 
								 | 
							
								      for (j=active_nodes.start; j<idx1; j=active_nodes.succ[j]) {
							 | 
						||
| 
								 | 
							
								        f_average(&D_(j, idx2), D_(j, idx1), s, t);
							 | 
						||
| 
								 | 
							
								        if (n_nghbr[j] == idx1)
							 | 
						||
| 
								 | 
							
								          n_nghbr[j] = idx2;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range (idx1, idx2).
							 | 
						||
| 
								 | 
							
								      for (; j<idx2; j=active_nodes.succ[j]) {
							 | 
						||
| 
								 | 
							
								        f_average(&D_(j, idx2), D_(idx1, j), s, t);
							 | 
						||
| 
								 | 
							
								        if (D_(j, idx2) < mindist[j]) {
							 | 
						||
| 
								 | 
							
								          nn_distances.update_leq(j, D_(j, idx2));
							 | 
						||
| 
								 | 
							
								          n_nghbr[j] = idx2;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range (idx2, N).
							 | 
						||
| 
								 | 
							
								      if (idx2<N_1) {
							 | 
						||
| 
								 | 
							
								        n_nghbr[idx2] = j = active_nodes.succ[idx2]; // exists, maximally N-1
							 | 
						||
| 
								 | 
							
								        f_average(&D_(idx2, j), D_(idx1, j), s, t);
							 | 
						||
| 
								 | 
							
								        min = D_(idx2,j);
							 | 
						||
| 
								 | 
							
								        for (j=active_nodes.succ[j]; j<N; j=active_nodes.succ[j]) {
							 | 
						||
| 
								 | 
							
								          f_average(&D_(idx2, j), D_(idx1, j), s, t);
							 | 
						||
| 
								 | 
							
								          if (D_(idx2,j) < min) {
							 | 
						||
| 
								 | 
							
								            min = D_(idx2,j);
							 | 
						||
| 
								 | 
							
								            n_nghbr[idx2] = j;
							 | 
						||
| 
								 | 
							
								          }
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        nn_distances.update(idx2, min);
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    case METHOD_METR_WEIGHTED:
							 | 
						||
| 
								 | 
							
								      /*
							 | 
						||
| 
								 | 
							
								        Weighted linkage.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        Shorter and longer distances can occur.
							 | 
						||
| 
								 | 
							
								      */
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range [start, idx1).
							 | 
						||
| 
								 | 
							
								      for (j=active_nodes.start; j<idx1; j=active_nodes.succ[j]) {
							 | 
						||
| 
								 | 
							
								        f_weighted(&D_(j, idx2), D_(j, idx1) );
							 | 
						||
| 
								 | 
							
								        if (n_nghbr[j] == idx1)
							 | 
						||
| 
								 | 
							
								          n_nghbr[j] = idx2;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range (idx1, idx2).
							 | 
						||
| 
								 | 
							
								      for (; j<idx2; j=active_nodes.succ[j]) {
							 | 
						||
| 
								 | 
							
								        f_weighted(&D_(j, idx2), D_(idx1, j) );
							 | 
						||
| 
								 | 
							
								        if (D_(j, idx2) < mindist[j]) {
							 | 
						||
| 
								 | 
							
								          nn_distances.update_leq(j, D_(j, idx2));
							 | 
						||
| 
								 | 
							
								          n_nghbr[j] = idx2;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range (idx2, N).
							 | 
						||
| 
								 | 
							
								      if (idx2<N_1) {
							 | 
						||
| 
								 | 
							
								        n_nghbr[idx2] = j = active_nodes.succ[idx2]; // exists, maximally N-1
							 | 
						||
| 
								 | 
							
								        f_weighted(&D_(idx2, j), D_(idx1, j) );
							 | 
						||
| 
								 | 
							
								        min = D_(idx2,j);
							 | 
						||
| 
								 | 
							
								        for (j=active_nodes.succ[j]; j<N; j=active_nodes.succ[j]) {
							 | 
						||
| 
								 | 
							
								          f_weighted(&D_(idx2, j), D_(idx1, j) );
							 | 
						||
| 
								 | 
							
								          if (D_(idx2,j) < min) {
							 | 
						||
| 
								 | 
							
								            min = D_(idx2,j);
							 | 
						||
| 
								 | 
							
								            n_nghbr[idx2] = j;
							 | 
						||
| 
								 | 
							
								          }
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        nn_distances.update(idx2, min);
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    case METHOD_METR_WARD:
							 | 
						||
| 
								 | 
							
								      /*
							 | 
						||
| 
								 | 
							
								        Ward linkage.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        Shorter and longer distances can occur, not smaller than min(d1,d2)
							 | 
						||
| 
								 | 
							
								        but maybe bigger than max(d1,d2).
							 | 
						||
| 
								 | 
							
								      */
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range [start, idx1).
							 | 
						||
| 
								 | 
							
								      for (j=active_nodes.start; j<idx1; j=active_nodes.succ[j]) {
							 | 
						||
| 
								 | 
							
								        f_ward(&D_(j, idx2), D_(j, idx1), mindist[idx1],
							 | 
						||
| 
								 | 
							
								               size1, size2, static_cast<t_float>(members[j]) );
							 | 
						||
| 
								 | 
							
								        if (n_nghbr[j] == idx1)
							 | 
						||
| 
								 | 
							
								          n_nghbr[j] = idx2;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range (idx1, idx2).
							 | 
						||
| 
								 | 
							
								      for (; j<idx2; j=active_nodes.succ[j]) {
							 | 
						||
| 
								 | 
							
								        f_ward(&D_(j, idx2), D_(idx1, j), mindist[idx1], size1, size2,
							 | 
						||
| 
								 | 
							
								               static_cast<t_float>(members[j]) );
							 | 
						||
| 
								 | 
							
								        if (D_(j, idx2) < mindist[j]) {
							 | 
						||
| 
								 | 
							
								          nn_distances.update_leq(j, D_(j, idx2));
							 | 
						||
| 
								 | 
							
								          n_nghbr[j] = idx2;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range (idx2, N).
							 | 
						||
| 
								 | 
							
								      if (idx2<N_1) {
							 | 
						||
| 
								 | 
							
								        n_nghbr[idx2] = j = active_nodes.succ[idx2]; // exists, maximally N-1
							 | 
						||
| 
								 | 
							
								        f_ward(&D_(idx2, j), D_(idx1, j), mindist[idx1],
							 | 
						||
| 
								 | 
							
								               size1, size2, static_cast<t_float>(members[j]) );
							 | 
						||
| 
								 | 
							
								        min = D_(idx2,j);
							 | 
						||
| 
								 | 
							
								        for (j=active_nodes.succ[j]; j<N; j=active_nodes.succ[j]) {
							 | 
						||
| 
								 | 
							
								          f_ward(&D_(idx2, j), D_(idx1, j), mindist[idx1],
							 | 
						||
| 
								 | 
							
								                 size1, size2, static_cast<t_float>(members[j]) );
							 | 
						||
| 
								 | 
							
								          if (D_(idx2,j) < min) {
							 | 
						||
| 
								 | 
							
								            min = D_(idx2,j);
							 | 
						||
| 
								 | 
							
								            n_nghbr[idx2] = j;
							 | 
						||
| 
								 | 
							
								          }
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        nn_distances.update(idx2, min);
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    case METHOD_METR_CENTROID: {
							 | 
						||
| 
								 | 
							
								      /*
							 | 
						||
| 
								 | 
							
								        Centroid linkage.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        Shorter and longer distances can occur, not bigger than max(d1,d2)
							 | 
						||
| 
								 | 
							
								        but maybe smaller than min(d1,d2).
							 | 
						||
| 
								 | 
							
								      */
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range [start, idx1).
							 | 
						||
| 
								 | 
							
								      t_float s = size1/(size1+size2);
							 | 
						||
| 
								 | 
							
								      t_float t = size2/(size1+size2);
							 | 
						||
| 
								 | 
							
								      t_float stc = s*t*mindist[idx1];
							 | 
						||
| 
								 | 
							
								      for (j=active_nodes.start; j<idx1; j=active_nodes.succ[j]) {
							 | 
						||
| 
								 | 
							
								        f_centroid(&D_(j, idx2), D_(j, idx1), stc, s, t);
							 | 
						||
| 
								 | 
							
								        if (D_(j, idx2) < mindist[j]) {
							 | 
						||
| 
								 | 
							
								          nn_distances.update_leq(j, D_(j, idx2));
							 | 
						||
| 
								 | 
							
								          n_nghbr[j] = idx2;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        else if (n_nghbr[j] == idx1)
							 | 
						||
| 
								 | 
							
								          n_nghbr[j] = idx2;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range (idx1, idx2).
							 | 
						||
| 
								 | 
							
								      for (; j<idx2; j=active_nodes.succ[j]) {
							 | 
						||
| 
								 | 
							
								        f_centroid(&D_(j, idx2), D_(idx1, j), stc, s, t);
							 | 
						||
| 
								 | 
							
								        if (D_(j, idx2) < mindist[j]) {
							 | 
						||
| 
								 | 
							
								          nn_distances.update_leq(j, D_(j, idx2));
							 | 
						||
| 
								 | 
							
								          n_nghbr[j] = idx2;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range (idx2, N).
							 | 
						||
| 
								 | 
							
								      if (idx2<N_1) {
							 | 
						||
| 
								 | 
							
								        n_nghbr[idx2] = j = active_nodes.succ[idx2]; // exists, maximally N-1
							 | 
						||
| 
								 | 
							
								        f_centroid(&D_(idx2, j), D_(idx1, j), stc, s, t);
							 | 
						||
| 
								 | 
							
								        min = D_(idx2,j);
							 | 
						||
| 
								 | 
							
								        for (j=active_nodes.succ[j]; j<N; j=active_nodes.succ[j]) {
							 | 
						||
| 
								 | 
							
								          f_centroid(&D_(idx2, j), D_(idx1, j), stc, s, t);
							 | 
						||
| 
								 | 
							
								          if (D_(idx2,j) < min) {
							 | 
						||
| 
								 | 
							
								            min = D_(idx2,j);
							 | 
						||
| 
								 | 
							
								            n_nghbr[idx2] = j;
							 | 
						||
| 
								 | 
							
								          }
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        nn_distances.update(idx2, min);
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    case METHOD_METR_MEDIAN: {
							 | 
						||
| 
								 | 
							
								      /*
							 | 
						||
| 
								 | 
							
								        Median linkage.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        Shorter and longer distances can occur, not bigger than max(d1,d2)
							 | 
						||
| 
								 | 
							
								        but maybe smaller than min(d1,d2).
							 | 
						||
| 
								 | 
							
								      */
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range [start, idx1).
							 | 
						||
| 
								 | 
							
								      t_float c_4 = mindist[idx1]*.25;
							 | 
						||
| 
								 | 
							
								      for (j=active_nodes.start; j<idx1; j=active_nodes.succ[j]) {
							 | 
						||
| 
								 | 
							
								        f_median(&D_(j, idx2), D_(j, idx1), c_4 );
							 | 
						||
| 
								 | 
							
								        if (D_(j, idx2) < mindist[j]) {
							 | 
						||
| 
								 | 
							
								          nn_distances.update_leq(j, D_(j, idx2));
							 | 
						||
| 
								 | 
							
								          n_nghbr[j] = idx2;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        else if (n_nghbr[j] == idx1)
							 | 
						||
| 
								 | 
							
								          n_nghbr[j] = idx2;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range (idx1, idx2).
							 | 
						||
| 
								 | 
							
								      for (; j<idx2; j=active_nodes.succ[j]) {
							 | 
						||
| 
								 | 
							
								        f_median(&D_(j, idx2), D_(idx1, j), c_4 );
							 | 
						||
| 
								 | 
							
								        if (D_(j, idx2) < mindist[j]) {
							 | 
						||
| 
								 | 
							
								          nn_distances.update_leq(j, D_(j, idx2));
							 | 
						||
| 
								 | 
							
								          n_nghbr[j] = idx2;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range (idx2, N).
							 | 
						||
| 
								 | 
							
								      if (idx2<N_1) {
							 | 
						||
| 
								 | 
							
								        n_nghbr[idx2] = j = active_nodes.succ[idx2]; // exists, maximally N-1
							 | 
						||
| 
								 | 
							
								        f_median(&D_(idx2, j), D_(idx1, j), c_4 );
							 | 
						||
| 
								 | 
							
								        min = D_(idx2,j);
							 | 
						||
| 
								 | 
							
								        for (j=active_nodes.succ[j]; j<N; j=active_nodes.succ[j]) {
							 | 
						||
| 
								 | 
							
								          f_median(&D_(idx2, j), D_(idx1, j), c_4 );
							 | 
						||
| 
								 | 
							
								          if (D_(idx2,j) < min) {
							 | 
						||
| 
								 | 
							
								            min = D_(idx2,j);
							 | 
						||
| 
								 | 
							
								            n_nghbr[idx2] = j;
							 | 
						||
| 
								 | 
							
								          }
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        nn_distances.update(idx2, min);
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    default:
							 | 
						||
| 
								 | 
							
								      throw std::runtime_error(std::string("Invalid method."));
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								  #ifdef FE_INVALID
							 | 
						||
| 
								 | 
							
								  if (fetestexcept(FE_INVALID)) throw fenv_error();
							 | 
						||
| 
								 | 
							
								  #endif
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/*
							 | 
						||
| 
								 | 
							
								  Clustering methods for vector data
							 | 
						||
| 
								 | 
							
								*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								template <typename t_dissimilarity>
							 | 
						||
| 
								 | 
							
								static void MST_linkage_core_vector(const t_index N,
							 | 
						||
| 
								 | 
							
								                                    t_dissimilarity & dist,
							 | 
						||
| 
								 | 
							
								                                    cluster_result & Z2) {
							 | 
						||
| 
								 | 
							
								/*
							 | 
						||
| 
								 | 
							
								    N: integer, number of data points
							 | 
						||
| 
								 | 
							
								    dist: function pointer to the metric
							 | 
						||
| 
								 | 
							
								    Z2: output data structure
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    The basis of this algorithm is an algorithm by Rohlf:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    F. James Rohlf, Hierarchical clustering using the minimum spanning tree,
							 | 
						||
| 
								 | 
							
								    The Computer Journal, vol. 16, 1973, p. 93–95.
							 | 
						||
| 
								 | 
							
								*/
							 | 
						||
| 
								 | 
							
								  t_index i;
							 | 
						||
| 
								 | 
							
								  t_index idx2;
							 | 
						||
| 
								 | 
							
								  doubly_linked_list active_nodes(N);
							 | 
						||
| 
								 | 
							
								  auto_array_ptr<t_float> d(N);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  t_index prev_node;
							 | 
						||
| 
								 | 
							
								  t_float min;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  // first iteration
							 | 
						||
| 
								 | 
							
								  idx2 = 1;
							 | 
						||
| 
								 | 
							
								  min = std::numeric_limits<t_float>::infinity();
							 | 
						||
| 
								 | 
							
								  for (i=1; i<N; ++i) {
							 | 
						||
| 
								 | 
							
								    d[i] = dist(0,i);
							 | 
						||
| 
								 | 
							
								#if HAVE_DIAGNOSTIC
							 | 
						||
| 
								 | 
							
								#pragma GCC diagnostic push
							 | 
						||
| 
								 | 
							
								#pragma GCC diagnostic ignored "-Wfloat-equal"
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								    if (d[i] < min) {
							 | 
						||
| 
								 | 
							
								      min = d[i];
							 | 
						||
| 
								 | 
							
								      idx2 = i;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    else if (fc_isnan(d[i]))
							 | 
						||
| 
								 | 
							
								      throw (nan_error());
							 | 
						||
| 
								 | 
							
								#if HAVE_DIAGNOSTIC
							 | 
						||
| 
								 | 
							
								#pragma GCC diagnostic pop
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  Z2.append(0, idx2, min);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  for (t_index j=1; j<N-1; ++j) {
							 | 
						||
| 
								 | 
							
								    prev_node = idx2;
							 | 
						||
| 
								 | 
							
								    active_nodes.remove(prev_node);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    idx2 = active_nodes.succ[0];
							 | 
						||
| 
								 | 
							
								    min = d[idx2];
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    for (i=idx2; i<N; i=active_nodes.succ[i]) {
							 | 
						||
| 
								 | 
							
								      t_float tmp = dist(i, prev_node);
							 | 
						||
| 
								 | 
							
								#if HAVE_DIAGNOSTIC
							 | 
						||
| 
								 | 
							
								#pragma GCC diagnostic push
							 | 
						||
| 
								 | 
							
								#pragma GCC diagnostic ignored "-Wfloat-equal"
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								      if (d[i] > tmp)
							 | 
						||
| 
								 | 
							
								        d[i] = tmp;
							 | 
						||
| 
								 | 
							
								      else if (fc_isnan(tmp))
							 | 
						||
| 
								 | 
							
								        throw (nan_error());
							 | 
						||
| 
								 | 
							
								#if HAVE_DIAGNOSTIC
							 | 
						||
| 
								 | 
							
								#pragma GCC diagnostic pop
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								      if (d[i] < min) {
							 | 
						||
| 
								 | 
							
								        min = d[i];
							 | 
						||
| 
								 | 
							
								        idx2 = i;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    Z2.append(prev_node, idx2, min);
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								template <method_codes_vector method, typename t_dissimilarity>
							 | 
						||
| 
								 | 
							
								static void generic_linkage_vector(const t_index N,
							 | 
						||
| 
								 | 
							
								                                   t_dissimilarity & dist,
							 | 
						||
| 
								 | 
							
								                                   cluster_result & Z2) {
							 | 
						||
| 
								 | 
							
								  /*
							 | 
						||
| 
								 | 
							
								    N: integer, number of data points
							 | 
						||
| 
								 | 
							
								    dist: function pointer to the metric
							 | 
						||
| 
								 | 
							
								    Z2: output data structure
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    This algorithm is valid for the distance update methods
							 | 
						||
| 
								 | 
							
								    "Ward", "centroid" and "median" only!
							 | 
						||
| 
								 | 
							
								  */
							 | 
						||
| 
								 | 
							
								  const t_index N_1 = N-1;
							 | 
						||
| 
								 | 
							
								  t_index i, j; // loop variables
							 | 
						||
| 
								 | 
							
								  t_index idx1, idx2; // row and column indices
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  auto_array_ptr<t_index> n_nghbr(N_1); // array of nearest neighbors
							 | 
						||
| 
								 | 
							
								  auto_array_ptr<t_float> mindist(N_1); // distances to the nearest neighbors
							 | 
						||
| 
								 | 
							
								  auto_array_ptr<t_index> row_repr(N); // row_repr[i]: node number that the
							 | 
						||
| 
								 | 
							
								                                       // i-th row represents
							 | 
						||
| 
								 | 
							
								  doubly_linked_list active_nodes(N);
							 | 
						||
| 
								 | 
							
								  binary_min_heap nn_distances(&*mindist, N_1); // minimum heap structure for
							 | 
						||
| 
								 | 
							
								                        // the distance to the nearest neighbor of each point
							 | 
						||
| 
								 | 
							
								  t_index node1, node2;     // node numbers in the output
							 | 
						||
| 
								 | 
							
								  t_float min; // minimum and row index for nearest-neighbor search
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  for (i=0; i<N; ++i)
							 | 
						||
| 
								 | 
							
								    // Build a list of row ↔ node label assignments.
							 | 
						||
| 
								 | 
							
								    // Initially i ↦ i
							 | 
						||
| 
								 | 
							
								    row_repr[i] = i;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  // Initialize the minimal distances:
							 | 
						||
| 
								 | 
							
								  // Find the nearest neighbor of each point.
							 | 
						||
| 
								 | 
							
								  // n_nghbr[i] = argmin_{j>i} D(i,j) for i in range(N-1)
							 | 
						||
| 
								 | 
							
								  for (i=0; i<N_1; ++i) {
							 | 
						||
| 
								 | 
							
								    min = std::numeric_limits<t_float>::infinity();
							 | 
						||
| 
								 | 
							
								    t_index idx;
							 | 
						||
| 
								 | 
							
								    for (idx=j=i+1; j<N; ++j) {
							 | 
						||
| 
								 | 
							
								      t_float tmp;
							 | 
						||
| 
								 | 
							
								      switch (method) {
							 | 
						||
| 
								 | 
							
								      case METHOD_VECTOR_WARD:
							 | 
						||
| 
								 | 
							
								        tmp = dist.ward_initial(i,j);
							 | 
						||
| 
								 | 
							
								        break;
							 | 
						||
| 
								 | 
							
								      default:
							 | 
						||
| 
								 | 
							
								        tmp = dist.template sqeuclidean<true>(i,j);
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								      if (tmp<min) {
							 | 
						||
| 
								 | 
							
								        min = tmp;
							 | 
						||
| 
								 | 
							
								        idx = j;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    switch (method) {
							 | 
						||
| 
								 | 
							
								    case METHOD_VECTOR_WARD:
							 | 
						||
| 
								 | 
							
								      mindist[i] = t_dissimilarity::ward_initial_conversion(min);
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								    default:
							 | 
						||
| 
								 | 
							
								      mindist[i] = min;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    n_nghbr[i] = idx;
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  // Put the minimal distances into a heap structure to make the repeated
							 | 
						||
| 
								 | 
							
								  // global minimum searches fast.
							 | 
						||
| 
								 | 
							
								  nn_distances.heapify();
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  // Main loop: We have N-1 merging steps.
							 | 
						||
| 
								 | 
							
								  for (i=0; i<N_1; ++i) {
							 | 
						||
| 
								 | 
							
								    idx1 = nn_distances.argmin();
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    while ( active_nodes.is_inactive(n_nghbr[idx1]) ) {
							 | 
						||
| 
								 | 
							
								      // Recompute the minimum mindist[idx1] and n_nghbr[idx1].
							 | 
						||
| 
								 | 
							
								      n_nghbr[idx1] = j = active_nodes.succ[idx1]; // exists, maximally N-1
							 | 
						||
| 
								 | 
							
								      switch (method) {
							 | 
						||
| 
								 | 
							
								      case METHOD_VECTOR_WARD:
							 | 
						||
| 
								 | 
							
								        min = dist.ward(idx1,j);
							 | 
						||
| 
								 | 
							
								        for (j=active_nodes.succ[j]; j<N; j=active_nodes.succ[j]) {
							 | 
						||
| 
								 | 
							
								          t_float const tmp = dist.ward(idx1,j);
							 | 
						||
| 
								 | 
							
								          if (tmp<min) {
							 | 
						||
| 
								 | 
							
								            min = tmp;
							 | 
						||
| 
								 | 
							
								            n_nghbr[idx1] = j;
							 | 
						||
| 
								 | 
							
								          }
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        break;
							 | 
						||
| 
								 | 
							
								      default:
							 | 
						||
| 
								 | 
							
								        min = dist.template sqeuclidean<true>(idx1,j);
							 | 
						||
| 
								 | 
							
								        for (j=active_nodes.succ[j]; j<N; j=active_nodes.succ[j]) {
							 | 
						||
| 
								 | 
							
								          t_float const tmp = dist.template sqeuclidean<true>(idx1,j);
							 | 
						||
| 
								 | 
							
								          if (tmp<min) {
							 | 
						||
| 
								 | 
							
								            min = tmp;
							 | 
						||
| 
								 | 
							
								            n_nghbr[idx1] = j;
							 | 
						||
| 
								 | 
							
								          }
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								      /* Update the heap with the new true minimum and search for the (possibly
							 | 
						||
| 
								 | 
							
								         different) minimal entry. */
							 | 
						||
| 
								 | 
							
								      nn_distances.update_geq(idx1, min);
							 | 
						||
| 
								 | 
							
								      idx1 = nn_distances.argmin();
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    nn_distances.heap_pop(); // Remove the current minimum from the heap.
							 | 
						||
| 
								 | 
							
								    idx2 = n_nghbr[idx1];
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    // Write the newly found minimal pair of nodes to the output array.
							 | 
						||
| 
								 | 
							
								    node1 = row_repr[idx1];
							 | 
						||
| 
								 | 
							
								    node2 = row_repr[idx2];
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    Z2.append(node1, node2, mindist[idx1]);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    switch (method) {
							 | 
						||
| 
								 | 
							
								    case METHOD_VECTOR_WARD:
							 | 
						||
| 
								 | 
							
								    case METHOD_VECTOR_CENTROID:
							 | 
						||
| 
								 | 
							
								      dist.merge_inplace(idx1, idx2);
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								    case METHOD_VECTOR_MEDIAN:
							 | 
						||
| 
								 | 
							
								      dist.merge_inplace_weighted(idx1, idx2);
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								    default:
							 | 
						||
| 
								 | 
							
								      throw std::runtime_error(std::string("Invalid method."));
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    // Index idx2 now represents the new (merged) node with label N+i.
							 | 
						||
| 
								 | 
							
								    row_repr[idx2] = N+i;
							 | 
						||
| 
								 | 
							
								    // Remove idx1 from the list of active indices (active_nodes).
							 | 
						||
| 
								 | 
							
								    active_nodes.remove(idx1);  // TBD later!!!
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    // Update the distance matrix
							 | 
						||
| 
								 | 
							
								    switch (method) {
							 | 
						||
| 
								 | 
							
								    case METHOD_VECTOR_WARD:
							 | 
						||
| 
								 | 
							
								      /*
							 | 
						||
| 
								 | 
							
								        Ward linkage.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        Shorter and longer distances can occur, not smaller than min(d1,d2)
							 | 
						||
| 
								 | 
							
								        but maybe bigger than max(d1,d2).
							 | 
						||
| 
								 | 
							
								      */
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range [start, idx1).
							 | 
						||
| 
								 | 
							
								      for (j=active_nodes.start; j<idx1; j=active_nodes.succ[j]) {
							 | 
						||
| 
								 | 
							
								        if (n_nghbr[j] == idx2) {
							 | 
						||
| 
								 | 
							
								          n_nghbr[j] = idx1; // invalidate
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								      // Update the distance matrix in the range (idx1, idx2).
							 | 
						||
| 
								 | 
							
								      for ( ; j<idx2; j=active_nodes.succ[j]) {
							 | 
						||
| 
								 | 
							
								        t_float const tmp = dist.ward(j, idx2);
							 | 
						||
| 
								 | 
							
								        if (tmp < mindist[j]) {
							 | 
						||
| 
								 | 
							
								          nn_distances.update_leq(j, tmp);
							 | 
						||
| 
								 | 
							
								          n_nghbr[j] = idx2;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        else if (n_nghbr[j]==idx2) {
							 | 
						||
| 
								 | 
							
								          n_nghbr[j] = idx1; // invalidate
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								      // Find the nearest neighbor for idx2.
							 | 
						||
| 
								 | 
							
								      if (idx2<N_1) {
							 | 
						||
| 
								 | 
							
								        n_nghbr[idx2] = j = active_nodes.succ[idx2]; // exists, maximally N-1
							 | 
						||
| 
								 | 
							
								        min = dist.ward(idx2,j);
							 | 
						||
| 
								 | 
							
								        for (j=active_nodes.succ[j]; j<N; j=active_nodes.succ[j]) {
							 | 
						||
| 
								 | 
							
								          t_float const tmp = dist.ward(idx2,j);
							 | 
						||
| 
								 | 
							
								          if (tmp < min) {
							 | 
						||
| 
								 | 
							
								            min = tmp;
							 | 
						||
| 
								 | 
							
								            n_nghbr[idx2] = j;
							 | 
						||
| 
								 | 
							
								          }
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        nn_distances.update(idx2, min);
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    default:
							 | 
						||
| 
								 | 
							
								      /*
							 | 
						||
| 
								 | 
							
								        Centroid and median linkage.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        Shorter and longer distances can occur, not bigger than max(d1,d2)
							 | 
						||
| 
								 | 
							
								        but maybe smaller than min(d1,d2).
							 | 
						||
| 
								 | 
							
								      */
							 | 
						||
| 
								 | 
							
								      for (j=active_nodes.start; j<idx2; j=active_nodes.succ[j]) {
							 | 
						||
| 
								 | 
							
								        t_float const tmp = dist.template sqeuclidean<true>(j, idx2);
							 | 
						||
| 
								 | 
							
								        if (tmp < mindist[j]) {
							 | 
						||
| 
								 | 
							
								          nn_distances.update_leq(j, tmp);
							 | 
						||
| 
								 | 
							
								          n_nghbr[j] = idx2;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        else if (n_nghbr[j] == idx2)
							 | 
						||
| 
								 | 
							
								          n_nghbr[j] = idx1; // invalidate
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								      // Find the nearest neighbor for idx2.
							 | 
						||
| 
								 | 
							
								      if (idx2<N_1) {
							 | 
						||
| 
								 | 
							
								        n_nghbr[idx2] = j = active_nodes.succ[idx2]; // exists, maximally N-1
							 | 
						||
| 
								 | 
							
								        min = dist.template sqeuclidean<true>(idx2,j);
							 | 
						||
| 
								 | 
							
								        for (j=active_nodes.succ[j]; j<N; j=active_nodes.succ[j]) {
							 | 
						||
| 
								 | 
							
								          t_float const tmp = dist.template sqeuclidean<true>(idx2, j);
							 | 
						||
| 
								 | 
							
								          if (tmp < min) {
							 | 
						||
| 
								 | 
							
								            min = tmp;
							 | 
						||
| 
								 | 
							
								            n_nghbr[idx2] = j;
							 | 
						||
| 
								 | 
							
								          }
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        nn_distances.update(idx2, min);
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								template <method_codes_vector method, typename t_dissimilarity>
							 | 
						||
| 
								 | 
							
								static void generic_linkage_vector_alternative(const t_index N,
							 | 
						||
| 
								 | 
							
								                                               t_dissimilarity & dist,
							 | 
						||
| 
								 | 
							
								                                               cluster_result & Z2) {
							 | 
						||
| 
								 | 
							
								  /*
							 | 
						||
| 
								 | 
							
								    N: integer, number of data points
							 | 
						||
| 
								 | 
							
								    dist: function pointer to the metric
							 | 
						||
| 
								 | 
							
								    Z2: output data structure
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    This algorithm is valid for the distance update methods
							 | 
						||
| 
								 | 
							
								    "Ward", "centroid" and "median" only!
							 | 
						||
| 
								 | 
							
								  */
							 | 
						||
| 
								 | 
							
								  const t_index N_1 = N-1;
							 | 
						||
| 
								 | 
							
								  t_index i, j=0; // loop variables
							 | 
						||
| 
								 | 
							
								  t_index idx1, idx2; // row and column indices
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  auto_array_ptr<t_index> n_nghbr(2*N-2); // array of nearest neighbors
							 | 
						||
| 
								 | 
							
								  auto_array_ptr<t_float> mindist(2*N-2); // distances to the nearest neighbors
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  doubly_linked_list active_nodes(N+N_1);
							 | 
						||
| 
								 | 
							
								  binary_min_heap nn_distances(&*mindist, N_1, 2*N-2, 1); // minimum heap
							 | 
						||
| 
								 | 
							
								      // structure for the distance to the nearest neighbor of each point
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  t_float min; // minimum for nearest-neighbor searches
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  // Initialize the minimal distances:
							 | 
						||
| 
								 | 
							
								  // Find the nearest neighbor of each point.
							 | 
						||
| 
								 | 
							
								  // n_nghbr[i] = argmin_{j>i} D(i,j) for i in range(N-1)
							 | 
						||
| 
								 | 
							
								  for (i=1; i<N; ++i) {
							 | 
						||
| 
								 | 
							
								    min = std::numeric_limits<t_float>::infinity();
							 | 
						||
| 
								 | 
							
								    t_index idx;
							 | 
						||
| 
								 | 
							
								    for (idx=j=0; j<i; ++j) {
							 | 
						||
| 
								 | 
							
								      t_float tmp;
							 | 
						||
| 
								 | 
							
								      switch (method) {
							 | 
						||
| 
								 | 
							
								      case METHOD_VECTOR_WARD:
							 | 
						||
| 
								 | 
							
								        tmp = dist.ward_initial(i,j);
							 | 
						||
| 
								 | 
							
								        break;
							 | 
						||
| 
								 | 
							
								      default:
							 | 
						||
| 
								 | 
							
								        tmp = dist.template sqeuclidean<true>(i,j);
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								      if (tmp<min) {
							 | 
						||
| 
								 | 
							
								        min = tmp;
							 | 
						||
| 
								 | 
							
								        idx = j;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    switch (method) {
							 | 
						||
| 
								 | 
							
								    case METHOD_VECTOR_WARD:
							 | 
						||
| 
								 | 
							
								      mindist[i] = t_dissimilarity::ward_initial_conversion(min);
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								    default:
							 | 
						||
| 
								 | 
							
								      mindist[i] = min;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    n_nghbr[i] = idx;
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  // Put the minimal distances into a heap structure to make the repeated
							 | 
						||
| 
								 | 
							
								  // global minimum searches fast.
							 | 
						||
| 
								 | 
							
								  nn_distances.heapify();
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  // Main loop: We have N-1 merging steps.
							 | 
						||
| 
								 | 
							
								  for (i=N; i<N+N_1; ++i) {
							 | 
						||
| 
								 | 
							
								    /*
							 | 
						||
| 
								 | 
							
								      The bookkeeping is different from the "stored matrix approach" algorithm
							 | 
						||
| 
								 | 
							
								      generic_linkage.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      mindist[i] stores a lower bound on the minimum distance of the point i to
							 | 
						||
| 
								 | 
							
								      all points of *lower* index:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								          mindist[i] ≥ min_{j<i} D(i,j)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      Moreover, new nodes do not re-use one of the old indices, but they are
							 | 
						||
| 
								 | 
							
								      given a new, unique index (SciPy convention: initial nodes are 0,…,N−1,
							 | 
						||
| 
								 | 
							
								      new nodes are N,…,2N−2).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      Invalid nearest neighbors are not recognized by the fact that the stored
							 | 
						||
| 
								 | 
							
								      distance is smaller than the actual distance, but the list active_nodes
							 | 
						||
| 
								 | 
							
								      maintains a flag whether a node is inactive. If n_nghbr[i] points to an
							 | 
						||
| 
								 | 
							
								      active node, the entries nn_distances[i] and n_nghbr[i] are valid,
							 | 
						||
| 
								 | 
							
								      otherwise they must be recomputed.
							 | 
						||
| 
								 | 
							
								    */
							 | 
						||
| 
								 | 
							
								    idx1 = nn_distances.argmin();
							 | 
						||
| 
								 | 
							
								    while ( active_nodes.is_inactive(n_nghbr[idx1]) ) {
							 | 
						||
| 
								 | 
							
								      // Recompute the minimum mindist[idx1] and n_nghbr[idx1].
							 | 
						||
| 
								 | 
							
								      n_nghbr[idx1] = j = active_nodes.start;
							 | 
						||
| 
								 | 
							
								      switch (method) {
							 | 
						||
| 
								 | 
							
								      case METHOD_VECTOR_WARD:
							 | 
						||
| 
								 | 
							
								        min = dist.ward_extended(idx1,j);
							 | 
						||
| 
								 | 
							
								        for (j=active_nodes.succ[j]; j<idx1; j=active_nodes.succ[j]) {
							 | 
						||
| 
								 | 
							
								          t_float tmp = dist.ward_extended(idx1,j);
							 | 
						||
| 
								 | 
							
								          if (tmp<min) {
							 | 
						||
| 
								 | 
							
								            min = tmp;
							 | 
						||
| 
								 | 
							
								            n_nghbr[idx1] = j;
							 | 
						||
| 
								 | 
							
								          }
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        break;
							 | 
						||
| 
								 | 
							
								      default:
							 | 
						||
| 
								 | 
							
								        min = dist.sqeuclidean_extended(idx1,j);
							 | 
						||
| 
								 | 
							
								        for (j=active_nodes.succ[j]; j<idx1; j=active_nodes.succ[j]) {
							 | 
						||
| 
								 | 
							
								          t_float const tmp = dist.sqeuclidean_extended(idx1,j);
							 | 
						||
| 
								 | 
							
								          if (tmp<min) {
							 | 
						||
| 
								 | 
							
								            min = tmp;
							 | 
						||
| 
								 | 
							
								            n_nghbr[idx1] = j;
							 | 
						||
| 
								 | 
							
								          }
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								      /* Update the heap with the new true minimum and search for the (possibly
							 | 
						||
| 
								 | 
							
								         different) minimal entry. */
							 | 
						||
| 
								 | 
							
								      nn_distances.update_geq(idx1, min);
							 | 
						||
| 
								 | 
							
								      idx1 = nn_distances.argmin();
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    idx2 = n_nghbr[idx1];
							 | 
						||
| 
								 | 
							
								    active_nodes.remove(idx1);
							 | 
						||
| 
								 | 
							
								    active_nodes.remove(idx2);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    Z2.append(idx1, idx2, mindist[idx1]);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    if (i<2*N_1) {
							 | 
						||
| 
								 | 
							
								      switch (method) {
							 | 
						||
| 
								 | 
							
								      case METHOD_VECTOR_WARD:
							 | 
						||
| 
								 | 
							
								      case METHOD_VECTOR_CENTROID:
							 | 
						||
| 
								 | 
							
								        dist.merge(idx1, idx2, i);
							 | 
						||
| 
								 | 
							
								        break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      case METHOD_VECTOR_MEDIAN:
							 | 
						||
| 
								 | 
							
								        dist.merge_weighted(idx1, idx2, i);
							 | 
						||
| 
								 | 
							
								        break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      default:
							 | 
						||
| 
								 | 
							
								        throw std::runtime_error(std::string("Invalid method."));
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      n_nghbr[i] = active_nodes.start;
							 | 
						||
| 
								 | 
							
								      if (method==METHOD_VECTOR_WARD) {
							 | 
						||
| 
								 | 
							
								        /*
							 | 
						||
| 
								 | 
							
								          Ward linkage.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								          Shorter and longer distances can occur, not smaller than min(d1,d2)
							 | 
						||
| 
								 | 
							
								          but maybe bigger than max(d1,d2).
							 | 
						||
| 
								 | 
							
								        */
							 | 
						||
| 
								 | 
							
								        min = dist.ward_extended(active_nodes.start, i);
							 | 
						||
| 
								 | 
							
								        for (j=active_nodes.succ[active_nodes.start]; j<i;
							 | 
						||
| 
								 | 
							
								             j=active_nodes.succ[j]) {
							 | 
						||
| 
								 | 
							
								          t_float tmp = dist.ward_extended(j, i);
							 | 
						||
| 
								 | 
							
								          if (tmp < min) {
							 | 
						||
| 
								 | 
							
								            min = tmp;
							 | 
						||
| 
								 | 
							
								            n_nghbr[i] = j;
							 | 
						||
| 
								 | 
							
								          }
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								      else {
							 | 
						||
| 
								 | 
							
								        /*
							 | 
						||
| 
								 | 
							
								          Centroid and median linkage.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								          Shorter and longer distances can occur, not bigger than max(d1,d2)
							 | 
						||
| 
								 | 
							
								          but maybe smaller than min(d1,d2).
							 | 
						||
| 
								 | 
							
								        */
							 | 
						||
| 
								 | 
							
								        min = dist.sqeuclidean_extended(active_nodes.start, i);
							 | 
						||
| 
								 | 
							
								        for (j=active_nodes.succ[active_nodes.start]; j<i;
							 | 
						||
| 
								 | 
							
								             j=active_nodes.succ[j]) {
							 | 
						||
| 
								 | 
							
								          t_float tmp = dist.sqeuclidean_extended(j, i);
							 | 
						||
| 
								 | 
							
								          if (tmp < min) {
							 | 
						||
| 
								 | 
							
								            min = tmp;
							 | 
						||
| 
								 | 
							
								            n_nghbr[i] = j;
							 | 
						||
| 
								 | 
							
								          }
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								      if (idx2<active_nodes.start)  {
							 | 
						||
| 
								 | 
							
								        nn_distances.remove(active_nodes.start);
							 | 
						||
| 
								 | 
							
								      } else {
							 | 
						||
| 
								 | 
							
								        nn_distances.remove(idx2);
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								      nn_distances.replace(idx1, i, min);
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#if HAVE_VISIBILITY
							 | 
						||
| 
								 | 
							
								#pragma GCC visibility pop
							 | 
						||
| 
								 | 
							
								#endif
							 |