3 files changed, 598 insertions, 0 deletions
diff --git a/src/python/gudhi/clustering/__init__.py b/src/python/gudhi/clustering/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/src/python/gudhi/clustering/__init__.py
diff --git a/src/python/gudhi/clustering/_tomato.cc b/src/python/gudhi/clustering/_tomato.cc
new file mode 100644
index 00000000..a76a2c3a
--- /dev/null
+++ b/src/python/gudhi/clustering/_tomato.cc
@@ -0,0 +1,277 @@
+/*    This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+ *    See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+ *    Author(s):       Marc Glisse
+ *
+ *    Copyright (C) 2020 Inria
+ *
+ *    Modification(s):
+ *      - YYYY/MM Author: Description of the modification
+ */
+
+#include <boost/container/flat_map.hpp>
+#include <boost/pending/disjoint_sets.hpp>
+#include <boost/property_map/property_map.hpp>
+#include <boost/property_map/transform_value_property_map.hpp>
+#include <boost/property_map/vector_property_map.hpp>
+#include <boost/property_map/function_property_map.hpp>
+#include <boost/iterator/counting_iterator.hpp>
+#include <boost/range/irange.hpp>
+#include <boost/range/adaptor/transformed.hpp>
+#include <vector>
+#include <unordered_map>
+#include <pybind11/pybind11.h>
+#include <pybind11/numpy.h>
+#include <iostream>
+
+namespace py = pybind11;
+
+template <class T, class = std::enable_if_t<std::is_integral<T>::value>>
+int getint(int i) {
+  return i;
+}
+// Gcc-8 has a bug that breaks this version, fixed in gcc-9
+// template<class T, class=decltype(std::declval<T>().template cast<int>())>
+// int getint(T i){return i.template cast<int>();}
+template <class T>
+auto getint(T i) -> decltype(i.template cast<int>()) {
+  return i.template cast<int>();
+}
+
+// Raw clusters are clusters obtained through single-linkage, no merging.
+
+typedef int Point_index;
+typedef int Cluster_index;
+struct Merge {
+  Cluster_index first, second;
+  double persist;
+};
+
+template <class Neighbors, class Density, class Order, class ROrder>
+auto tomato(Point_index num_points, Neighbors const& neighbors, Density const& density, Order const& order,
+            ROrder const& rorder) {
+  // point index --> index of raw cluster it belongs to
+  std::vector<Cluster_index> raw_cluster;
+  raw_cluster.reserve(num_points);
+  // cluster index --> index of top point in the cluster
+  Cluster_index n_raw_clusters = 0;  // current number of raw clusters seen
+  //
+  std::vector<Merge> merges;
+  struct Data {
+    Cluster_index parent;
+    int rank;
+    Point_index max;
+  };  // information on a cluster
+  std::vector<Data> ds_base;
+  // boost::vector_property_map does resize(size+1) for every new element, don't use it
+  auto ds_data =
+      boost::make_function_property_map<std::size_t>([&ds_base](std::size_t n) -> Data& { return ds_base[n]; });
+  auto ds_parent =
+      boost::make_transform_value_property_map([](auto& p) -> Cluster_index& { return p.parent; }, ds_data);
+  auto ds_rank = boost::make_transform_value_property_map([](auto& p) -> int& { return p.rank; }, ds_data);
+  boost::disjoint_sets<decltype(ds_rank), decltype(ds_parent)> ds(
+      ds_rank, ds_parent);                         // on the clusters, not directly the points
+  std::vector<std::array<double, 2>> persistence;  // diagram (finite points)
+  boost::container::flat_map<Cluster_index, Cluster_index>
+      adj_clusters;  // first: the merged cluster, second: the raw cluster
+  // we only care about the raw cluster, we could use a vector to store the second, store first into a set, and only
+  // insert in the vector if merged is absent from the set
+
+  for (Point_index i = 0; i < num_points; ++i) {
+    // auto&& ngb = neighbors[order[i]];
+    // TODO: have a specialization where we don't need python types and py::cast
+    // TODO: move py::cast and getint into Neighbors
+    py::object ngb = neighbors[py::cast(order[i])];  // auto&& also seems to work
+    adj_clusters.clear();
+    Point_index j = i;  // highest neighbor
+    // for(Point_index k : ngb)
+    for (auto k_py : ngb) {
+      Point_index k = rorder[getint(k_py)];
+      if (k >= i || k < 0)  // ???
+        continue;
+      if (k < j) j = k;
+      Cluster_index rk = raw_cluster[k];
+      adj_clusters.emplace(ds.find_set(rk), rk);
+      // does not insert if ck=ds.find_set(rk) already seen
+      // which rk we keep from those with the same ck is arbitrary
+    }
+    assert((Point_index)raw_cluster.size() == i);
+    if (i == j) {  // local maximum -> new cluster
+      Cluster_index c = n_raw_clusters++;
+      ds_base.emplace_back();  // could be integrated in ds_data, but then we would check the size for every access
+      ds.make_set(c);
+      ds_base[c].max = i;  // max
+      raw_cluster.push_back(c);
+      continue;
+    } else {  // add i to the cluster of j
+      assert(j < i);
+      raw_cluster.push_back(raw_cluster[j]);
+      // FIXME: we are adding point i to the raw cluster of j, but that one might not be in adj_clusters, so we may
+      // merge clusters A and B through a point of C. It is strange, but I don't know if it can really cause problems.
+      // We could just not set j at all and use arbitrarily the first element of adj_clusters.
+    }
+    // possibly merge clusters
+    // we could sort, in case there are several merges, but that doesn't seem so useful
+    Cluster_index rj = raw_cluster[j];
+    Cluster_index cj = ds.find_set(rj);
+    Cluster_index orig_cj = cj;
+    for (auto ckk : adj_clusters) {
+      Cluster_index rk = ckk.second;
+      Cluster_index ck = ckk.first;
+      if (ck == orig_cj) continue;
+      assert(ck == ds.find_set(rk));
+      Point_index j = ds_base[cj].max;
+      Point_index k = ds_base[ck].max;
+      Point_index young = std::max(j, k);
+      Point_index old = std::min(j, k);
+      auto d_young = density[order[young]];
+      auto d_i = density[order[i]];
+      assert(d_young >= d_i);
+      // Always merge (the non-hierarchical algorithm would only conditionally merge here
+      persistence.push_back({d_young, d_i});
+      assert(ds.find_set(rj) != ds.find_set(rk));
+      ds.link(cj, ck);
+      cj = ds.find_set(cj);
+      ds_base[cj].max = old;  // just one parent, no need for find_set
+      // record the raw clusters, we don't know what will have already been merged.
+      merges.push_back({rj, rk, d_young - d_i});
+    }
+  }
+  {
+    boost::counting_iterator<int> b(0), e(ds_base.size());
+    ds.compress_sets(b, e);
+    // Now we stop using find_sets and look at the parent directly
+    // rank is reused to rename clusters contiguously 0, 1, etc
+  }
+  // Maximum for each connected component
+  std::vector<double> max_cc;
+  for (Cluster_index i = 0; i < n_raw_clusters; ++i) {
+    if (ds_base[i].parent == i) max_cc.push_back(density[order[ds_base[i].max]]);
+  }
+  assert((Cluster_index)(merges.size() + max_cc.size()) == n_raw_clusters);
+
+  // TODO: create a "noise" cluster, merging all those not prominent enough?
+
+  // Replay the merges, in increasing order of prominence, to build the hierarchy
+  std::sort(merges.begin(), merges.end(), [](Merge const& a, Merge const& b) { return a.persist < b.persist; });
+  std::vector<std::array<Cluster_index, 2>> children;
+  children.reserve(merges.size());
+  {
+    struct Dat {
+      Cluster_index parent;
+      int rank;
+      Cluster_index name;
+    };
+    std::vector<Dat> ds_bas(2 * n_raw_clusters - 1);
+    Cluster_index i;
+    auto ds_dat =
+        boost::make_function_property_map<std::size_t>([&ds_bas](std::size_t n) -> Dat& { return ds_bas[n]; });
+    auto ds_par = boost::make_transform_value_property_map([](auto& p) -> Cluster_index& { return p.parent; }, ds_dat);
+    auto ds_ran = boost::make_transform_value_property_map([](auto& p) -> int& { return p.rank; }, ds_dat);
+    boost::disjoint_sets<decltype(ds_ran), decltype(ds_par)> ds(ds_ran, ds_par);
+    for (i = 0; i < n_raw_clusters; ++i) {
+      ds.make_set(i);
+      ds_bas[i].name = i;
+    }
+    for (Merge const& m : merges) {
+      Cluster_index j = ds.find_set(m.first);
+      Cluster_index k = ds.find_set(m.second);
+      assert(j != k);
+      children.push_back({ds_bas[j].name, ds_bas[k].name});
+      ds.make_set(i);
+      ds.link(i, j);
+      ds.link(i, k);
+      ds_bas[ds.find_set(i)].name = i;
+      ++i;
+    }
+  }
+
+  std::vector<Cluster_index> raw_cluster_ordered(num_points);
+  for (int i = 0; i < num_points; ++i) raw_cluster_ordered[i] = raw_cluster[rorder[i]];
+  // return raw_cluster, children, persistence
+  // TODO avoid copies: https://github.com/pybind/pybind11/issues/1042
+  return py::make_tuple(py::array(raw_cluster_ordered.size(), raw_cluster_ordered.data()),
+                        py::array(children.size(), children.data()), py::array(persistence.size(), persistence.data()),
+                        py::array(max_cc.size(), max_cc.data()));
+}
+
+auto merge(py::array_t<Cluster_index, py::array::c_style> children, Cluster_index n_leaves, Cluster_index n_final) {
+  if (n_final > n_leaves) {
+    std::cerr << "The number of clusters required " << n_final << " is larger than the number of mini-clusters " << n_leaves << '\n';
+    n_final = n_leaves; // or return something special and let Tomato use leaf_labels_?
+  }
+  py::buffer_info cbuf = children.request();
+  if ((cbuf.ndim != 2 || cbuf.shape[1] != 2) && (cbuf.ndim != 1 || cbuf.shape[0] != 0))
+    throw std::runtime_error("internal error: children have to be (n,2) or empty");
+  const int n_merges = cbuf.shape[0];
+  Cluster_index* d = (Cluster_index*)cbuf.ptr;
+  if (n_merges + n_final < n_leaves) {
+    std::cerr << "The number of clusters required " << n_final << " is smaller than the number of connected components " << n_leaves - n_merges << '\n';
+    n_final = n_leaves - n_merges;
+  }
+  struct Dat {
+    Cluster_index parent;
+    int rank;
+    int name;
+  };
+  std::vector<Dat> ds_bas(2 * n_leaves - 1);
+  auto ds_dat = boost::make_function_property_map<std::size_t>([&ds_bas](std::size_t n) -> Dat& { return ds_bas[n]; });
+  auto ds_par = boost::make_transform_value_property_map([](auto& p) -> Cluster_index& { return p.parent; }, ds_dat);
+  auto ds_ran = boost::make_transform_value_property_map([](auto& p) -> int& { return p.rank; }, ds_dat);
+  boost::disjoint_sets<decltype(ds_ran), decltype(ds_par)> ds(ds_ran, ds_par);
+  Cluster_index i;
+  for (i = 0; i < n_leaves; ++i) {
+    ds.make_set(i);
+    ds_bas[i].name = -1;
+  }
+  for (Cluster_index m = 0; m < n_leaves - n_final; ++m) {
+    Cluster_index j = ds.find_set(d[2 * m]);
+    Cluster_index k = ds.find_set(d[2 * m + 1]);
+    assert(j != k);
+    ds.make_set(i);
+    ds.link(i, j);
+    ds.link(i, k);
+    ++i;
+  }
+  Cluster_index next_cluster_name = 0;
+  std::vector<Cluster_index> ret;
+  ret.reserve(n_leaves);
+  for (Cluster_index j = 0; j < n_leaves; ++j) {
+    Cluster_index k = ds.find_set(j);
+    if (ds_bas[k].name == -1) ds_bas[k].name = next_cluster_name++;
+    ret.push_back(ds_bas[k].name);
+  }
+  return py::array(ret.size(), ret.data());
+}
+
+// TODO: Do a special version when ngb is a numpy array, where we can cast to int[k][n] ?
+// py::isinstance<py::array_t<std::int32_t>> (ou py::isinstance<py::array> et tester dtype) et flags&c_style
+// ou overload (en virant forcecast?)
+// aussi le faire au cas où on n'aurait pas un tableau, mais où chaque liste de voisins serait un tableau ?
+auto hierarchy(py::handle ngb, py::array_t<double, py::array::c_style | py::array::forcecast> density) {
+  // used to be py::iterable ngb, but that's inconvenient if it doesn't come pre-sorted
+  // use py::handle and check if [] (aka __getitem__) works? But then we need to build an object to pass it to []
+  // (I _think_ handle is ok and we don't need object here)
+  py::buffer_info wbuf = density.request();
+  if (wbuf.ndim != 1) throw std::runtime_error("density must be 1D");
+  const int n = wbuf.shape[0];
+  double* d = (double*)wbuf.ptr;
+  // Vector { 0, 1, ..., n-1 }
+  std::vector<Point_index> order(boost::counting_iterator<Point_index>(0), boost::counting_iterator<Point_index>(n));
+  // Permutation of the indices to get points in decreasing order of density
+  std::sort(std::begin(order), std::end(order), [=](Point_index i, Point_index j) { return d[i] > d[j]; });
+  // Inverse permutation
+  std::vector<Point_index> rorder(n);
+  for (Point_index i : boost::irange(0, n)) rorder[order[i]] = i;
+  // Used as:
+  // order[i] is the index of the point with i-th largest density
+  // rorder[i] is the rank of the i-th point in order of decreasing density
+  // TODO: put a wrapper on ngb and d so we don't need to pass (r)order (there is still the issue of reordering the
+  // output)
+  return tomato(n, ngb, d, order, rorder);
+}
+
+PYBIND11_MODULE(_tomato, m) {
+  m.doc() = "Internals of tomato clustering";
+  m.def("hierarchy", &hierarchy, "does the clustering");
+  m.def("merge", &merge, "merge clusters");
+}
diff --git a/src/python/gudhi/clustering/tomato.py b/src/python/gudhi/clustering/tomato.py
new file mode 100644
index 00000000..fbba3cc8
--- /dev/null
+++ b/src/python/gudhi/clustering/tomato.py
@@ -0,0 +1,321 @@
+# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+# Author(s):       Marc Glisse
+#
+# Copyright (C) 2020 Inria
+#
+# Modification(s):
+#   - YYYY/MM Author: Description of the modification
+
+import numpy
+from ..point_cloud.knn import KNearestNeighbors
+from ..point_cloud.dtm import DTMDensity
+from ._tomato import *
+
+# The fit/predict interface is not so well suited...
+
+
+class Tomato:
+    """
+    This clustering algorithm needs a neighborhood graph on the points, and an estimation of the density at each point.
+    A few possible graph constructions and density estimators are provided for convenience, but it is perfectly natural
+    to provide your own.
+
+    :Requires: `SciPy <installation.html#scipy>`_, `Scikit-learn <installation.html#scikit-learn>`_ or others
+        (see :class:`~gudhi.point_cloud.knn.KNearestNeighbors`) in function of the options.
+
+    Attributes
+    ----------
+    n_clusters_: int
+        The number of clusters. Writing to it automatically adjusts `labels_`.
+    merge_threshold_: float
+        minimum prominence of a cluster so it doesn't get merged. Writing to it automatically adjusts `labels_`.
+    n_leaves_: int
+        number of leaves (unstable clusters) in the hierarchical tree
+    leaf_labels_: ndarray of shape (n_samples,)
+        cluster labels for each point, at the very bottom of the hierarchy
+    labels_: ndarray of shape (n_samples,)
+        cluster labels for each point, after merging
+    diagram_: ndarray of shape (`n_leaves_`, 2)
+        persistence diagram (only the finite points)
+    max_weight_per_cc_: ndarray of shape (n_connected_components,)
+        maximum of the density function on each connected component. This corresponds to the abscissa of infinite
+        points in the diagram
+    children_: ndarray of shape (`n_leaves_`-n_connected_components, 2)
+        The children of each non-leaf node. Values less than `n_leaves_` correspond to leaves of the tree.
+        A node i greater than or equal to `n_leaves_` is a non-leaf node and has children children_[i - `n_leaves_`].
+        Alternatively at the i-th iteration, children[i][0] and children[i][1] are merged to form node `n_leaves_` + i
+    weights_: ndarray of shape (n_samples,)
+        weights of the points, as computed by the density estimator or provided by the user
+    params_: dict
+        Parameters like metric, etc
+    """
+
+    def __init__(
+        self,
+        graph_type="knn",
+        density_type="logDTM",
+        n_clusters=None,
+        merge_threshold=None,
+        #       eliminate_threshold=None,
+        #           eliminate_threshold (float): minimum max weight of a cluster so it doesn't get eliminated
+        **params
+    ):
+        """
+        Args:
+            graph_type (str): 'manual', 'knn' or 'radius'. Default is 'knn'.
+            density_type (str): 'manual', 'DTM', 'logDTM', 'KDE' or 'logKDE'. When you have many points,
+                'KDE' and 'logKDE' tend to be slower. Default is 'logDTM'.
+            metric (str|Callable): metric used when calculating the distance between instances in a feature array.
+                Defaults to Minkowski of parameter p.
+            kde_params (dict): if density_type is 'KDE' or 'logKDE', additional parameters passed directly to
+                sklearn.neighbors.KernelDensity.
+            k (int): number of neighbors for a knn graph (including the vertex itself). Defaults to 10.
+            k_DTM (int): number of neighbors for the DTM density estimation (including the vertex itself).
+                Defaults to k.
+            r (float): size of a neighborhood if graph_type is 'radius'. Also used as default bandwidth in kde_params.
+            eps (float): (1+eps) approximation factor when computing distances (ignored in many cases).
+            n_clusters (int): number of clusters requested. Defaults to None, i.e. no merging occurs and we get
+                the maximal number of clusters.
+            merge_threshold (float): minimum prominence of a cluster so it doesn't get merged.
+            symmetrize_graph (bool): whether we should add edges to make the neighborhood graph symmetric.
+                This can be useful with k-NN for small k. Defaults to false.
+            p (float): norm L^p on input points. Defaults to 2.
+            q (float): order used to compute the distance to measure. Defaults to dim.
+                Beware that when the dimension is large, this can easily cause overflows.
+            dim (float): final exponent in DTM density estimation, representing the dimension. Defaults to the
+                dimension, or 2 when the dimension cannot be read from the input (metric is "precomputed").
+            n_jobs (int): Number of jobs to schedule for parallel processing on the CPU.
+                If -1 is given all processors are used. Default: 1.
+            params: extra parameters are passed to :class:`~gudhi.point_cloud.knn.KNearestNeighbors` and
+                :class:`~gudhi.point_cloud.dtm.DTMDensity`.
+        """
+        # Should metric='precomputed' mean input_type='distance_matrix'?
+        # Should we be able to pass metric='minkowski' (what None does currently)?
+        self.graph_type_ = graph_type
+        self.density_type_ = density_type
+        self.params_ = params
+        self.__n_clusters = n_clusters
+        self.__merge_threshold = merge_threshold
+        # self.eliminate_threshold_ = eliminate_threshold
+        if n_clusters and merge_threshold:
+            raise ValueError("Cannot specify both a merge threshold and a number of clusters")
+
+    def fit(self, X, y=None, weights=None):
+        """
+        Args:
+            X ((n,d)-array of float|(n,n)-array of float|Sequence[Iterable[int]]): coordinates of the points,
+                or distance matrix (full, not just a triangle) if metric is "precomputed", or list of neighbors
+                for each point (points are represented by their index, starting from 0) if graph_type is "manual".
+                The number of points is currently limited to about 2 billion.
+            weights (ndarray of shape (n_samples)): if density_type is 'manual', a density estimate at each point
+            y: Not used, present here for API consistency with scikit-learn by convention.
+        """
+        # TODO: First detect if this is a new call with the same data (only threshold changed?)
+        # TODO: less code duplication (subroutines?), less spaghetti, but don't compute neighbors twice if not needed. Clear error message for missing or contradictory parameters.
+        if weights is not None:
+            density_type = "manual"
+        else:
+            density_type = self.density_type_
+            if density_type == "manual":
+                raise ValueError("If density_type is 'manual', you must provide weights to fit()")
+
+        if self.graph_type_ == "manual":
+            self.neighbors_ = X
+            # FIXME: uniformize "message 'option'" vs 'message "option"'
+            assert density_type == "manual", 'If graph_type is "manual", density_type must be as well'
+        else:
+            metric = self.params_.get("metric", "minkowski")
+            if metric != "precomputed":
+                self.points_ = X
+
+        # Slight complication to avoid computing knn twice.
+        need_knn = 0
+        need_knn_ngb = False
+        need_knn_dist = False
+        if self.graph_type_ == "knn":
+            k_graph = self.params_.get("k", 10)
+            # If X has fewer than k points...
+            if k_graph > len(X):
+                k_graph = len(X)
+            need_knn = k_graph
+            need_knn_ngb = True
+        if self.density_type_ in ["DTM", "logDTM"]:
+            k = self.params_.get("k", 10)
+            k_DTM = self.params_.get("k_DTM", k)
+            # If X has fewer than k points...
+            if k_DTM > len(X):
+                k_DTM = len(X)
+            need_knn = max(need_knn, k_DTM)
+            need_knn_dist = True
+            # if we ask for more neighbors for the graph than the DTM, getting the distances is a slight waste,
+            # but it looks negligible
+        if need_knn > 0:
+            knn_args = dict(self.params_)
+            knn_args["k"] = need_knn
+            knn = KNearestNeighbors(return_index=need_knn_ngb, return_distance=need_knn_dist, **knn_args).fit_transform(
+                X
+            )
+            if need_knn_ngb:
+                if need_knn_dist:
+                    self.neighbors_ = knn[0][:, 0:k_graph]
+                    knn_dist = knn[1]
+                else:
+                    self.neighbors_ = knn
+            elif need_knn_dist:
+                knn_dist = knn
+        if self.density_type_ in ["DTM", "logDTM"]:
+            dim = self.params_.get("dim")
+            if dim is None:
+                dim = len(X[0]) if metric != "precomputed" else 2
+            q = self.params_.get("q", dim)
+            weights = DTMDensity(k=k_DTM, metric="neighbors", dim=dim, q=q).fit_transform(knn_dist)
+            if self.density_type_ == "logDTM":
+                weights = numpy.log(weights)
+
+        if self.graph_type_ == "radius":
+            if metric in ["minkowski", "euclidean", "manhattan", "chebyshev"]:
+                from scipy.spatial import cKDTree
+
+                tree = cKDTree(X)
+                # TODO: handle "l1" and "l2" aliases?
+                p = self.params_.get("p")
+                if metric == "euclidean":
+                    assert p is None or p == 2, "p=" + str(p) + " is not consistent with metric='euclidean'"
+                    p = 2
+                elif metric == "manhattan":
+                    assert p is None or p == 1, "p=" + str(p) + " is not consistent with metric='manhattan'"
+                    p = 1
+                elif metric == "chebyshev":
+                    assert p is None or p == numpy.inf, "p=" + str(p) + " is not consistent with metric='chebyshev'"
+                    p = numpy.inf
+                elif p is None:
+                    p = 2  # the default
+                eps = self.params_.get("eps", 0)
+                self.neighbors_ = tree.query_ball_tree(tree, r=self.params_["r"], p=p, eps=eps)
+
+            # TODO: sklearn's NearestNeighbors.radius_neighbors can handle more metrics efficiently via its BallTree
+            # (don't bother with the _graph variant, it just calls radius_neighbors).
+            elif metric != "precomputed":
+                from sklearn.metrics import pairwise_distances
+
+                X = pairwise_distances(X, metric=metric, n_jobs=self.params_.get("n_jobs"))
+                metric = "precomputed"
+
+            if metric == "precomputed":
+                # TODO: parallelize? May not be worth it.
+                X = numpy.asarray(X)
+                r = self.params_["r"]
+                self.neighbors_ = [numpy.flatnonzero(l <= r) for l in X]
+
+        if self.density_type_ in {"KDE", "logKDE"}:
+            # Slow...
+            assert (
+                self.graph_type_ != "manual" and metric != "precomputed"
+            ), "Scikit-learn's KernelDensity requires point coordinates"
+            kde_params = dict(self.params_.get("kde_params", dict()))
+            kde_params.setdefault("metric", metric)
+            r = self.params_.get("r")
+            if r is not None:
+                kde_params.setdefault("bandwidth", r)
+            # Should we default rtol to eps?
+            from sklearn.neighbors import KernelDensity
+
+            weights = KernelDensity(**kde_params).fit(self.points_).score_samples(self.points_)
+            if self.density_type_ == "KDE":
+                weights = numpy.exp(weights)
+
+        # TODO: do it at the C++ level and/or in parallel if this is too slow?
+        if self.params_.get("symmetrize_graph"):
+            self.neighbors_ = [set(line) for line in self.neighbors_]
+            for i, line in enumerate(self.neighbors_):
+                line.discard(i)
+                for j in line:
+                    self.neighbors_[j].add(i)
+
+        self.weights_ = weights
+        # This is where the main computation happens
+        self.leaf_labels_, self.children_, self.diagram_, self.max_weight_per_cc_ = hierarchy(self.neighbors_, weights)
+        self.n_leaves_ = len(self.max_weight_per_cc_) + len(self.children_)
+        assert self.leaf_labels_.max() + 1 == len(self.max_weight_per_cc_) + len(self.children_)
+        # TODO: deduplicate this code with the setters below
+        if self.__merge_threshold:
+            assert not self.__n_clusters
+            self.__n_clusters = numpy.count_nonzero(
+                self.diagram_[:, 0] - self.diagram_[:, 1] > self.__merge_threshold
+            ) + len(self.max_weight_per_cc_)
+        if self.__n_clusters:
+            # TODO: set corresponding merge_threshold?
+            renaming = merge(self.children_, self.n_leaves_, self.__n_clusters)
+            self.labels_ = renaming[self.leaf_labels_]
+            # In case the user asked for something impossible.
+            # TODO: check for impossible situations before calling merge.
+            self.__n_clusters = self.labels_.max() + 1
+        else:
+            self.labels_ = self.leaf_labels_
+            self.__n_clusters = self.n_leaves_
+        return self
+
+    def fit_predict(self, X, y=None, weights=None):
+        """
+        Equivalent to fit(), and returns the `labels_`.
+        """
+        return self.fit(X, y, weights).labels_
+
+    # TODO: add argument k or threshold? Have a version where you can click and it shows the line and the corresponding k?
+    def plot_diagram(self):
+        """
+        """
+        import matplotlib.pyplot as plt
+
+        l = self.max_weight_per_cc_.min()
+        r = self.max_weight_per_cc_.max()
+        if self.diagram_.size > 0:
+            plt.plot(self.diagram_[:, 0], self.diagram_[:, 1], "ro")
+            l = min(l, self.diagram_[:, 1].min())
+            r = max(r, self.diagram_[:, 0].max())
+        if l == r:
+            if l > 0:
+                l, r = 0.9 * l, 1.1 * r
+            elif l < 0:
+                l, r = 1.1 * l, 0.9 * r
+            else:
+                l, r = -1.0, 1.0
+        plt.plot([l, r], [l, r])
+        plt.plot(
+            self.max_weight_per_cc_, numpy.full(self.max_weight_per_cc_.shape, 1.1 * l - 0.1 * r), "ro", color="green"
+        )
+        plt.show()
+
+    # Use set_params instead?
+    @property
+    def n_clusters_(self):
+        return self.__n_clusters
+
+    @n_clusters_.setter
+    def n_clusters_(self, n_clusters):
+        if n_clusters == self.__n_clusters:
+            return
+        self.__n_clusters = n_clusters
+        self.__merge_threshold = None
+        if hasattr(self, "leaf_labels_"):
+            renaming = merge(self.children_, self.n_leaves_, self.__n_clusters)
+            self.labels_ = renaming[self.leaf_labels_]
+            # In case the user asked for something impossible
+            self.__n_clusters = self.labels_.max() + 1
+
+    @property
+    def merge_threshold_(self):
+        return self.__merge_threshold
+
+    @merge_threshold_.setter
+    def merge_threshold_(self, merge_threshold):
+        if merge_threshold == self.__merge_threshold:
+            return
+        if hasattr(self, "leaf_labels_"):
+            self.n_clusters_ = numpy.count_nonzero(self.diagram_[:, 0] - self.diagram_[:, 1] > merge_threshold) + len(
+                self.max_weight_per_cc_
+            )
+        else:
+            self.__n_clusters = None
+        self.__merge_threshold = merge_threshold