From 87a142db9e133fbd8f08d9bcc70a51e2a907aa35 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 25 May 2020 18:53:08 +0200 Subject: Document attribute weights_ --- src/python/doc/clustering.rst | 2 +- src/python/gudhi/clustering/tomato.py | 13 ++++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/python/doc/clustering.rst b/src/python/doc/clustering.rst index dc9de968..1e933dc9 100644 --- a/src/python/doc/clustering.rst +++ b/src/python/doc/clustering.rst @@ -55,7 +55,7 @@ Makes the number of clusters clearer, and changes a bit the shape of the cluster .. image:: img/spiral-diag2.png -A quick look at the corresponding density estimate (`weights_` is not officially supported) +A quick look at the corresponding density estimate .. code-block:: diff --git a/src/python/gudhi/clustering/tomato.py b/src/python/gudhi/clustering/tomato.py index fcb4b234..c4da9deb 100644 --- a/src/python/gudhi/clustering/tomato.py +++ b/src/python/gudhi/clustering/tomato.py @@ -4,8 +4,7 @@ from ..point_cloud.dtm import DTMDensity from ._tomato import * # The fit/predict interface is not so well suited... -# TODO: option for a faster, weaker (probabilistic) knn - +# FIXME: choose if they are called weight, density, filtration, etc and be consistent. class Tomato: """ @@ -21,14 +20,16 @@ class Tomato: minimum prominence of a cluster so it doesn't get merged. Writing to it automatically adjusts labels_. n_leaves_: int number of leaves (unstable clusters) in the hierarchical tree - leaf_labels_: ndarray of shape (n_samples) + leaf_labels_: ndarray of shape (n_samples,) cluster labels for each point, at the very bottom of the hierarchy - labels_: ndarray of shape (n_samples) + labels_: ndarray of shape (n_samples,) cluster labels for each point, after merging diagram_: ndarray of shape (n_leaves_,2) persistence diagram (only the finite points) children_: ndarray of shape (n_leaves_-1,2) The children of each non-leaf node. Values less than n_leaves_ correspond to leaves of the tree. A node i greater than or equal to n_leaves_ is a non-leaf node and has children children_[i - n_leaves_]. Alternatively at the i-th iteration, children[i][0] and children[i][1] are merged to form node n_leaves_ + i + weights_: ndarray of shape (n_samples,) + weights of the points, as computed by the density estimator or provided by the user params_: dict Parameters like metric, etc """ @@ -180,12 +181,14 @@ class Tomato: self.neighbors_ = [numpy.flatnonzero(l <= r) for l in X] if self.density_type_ in {"KDE", "logKDE"}: + # Slow... assert self.graph_type_ != "manual" and metric != "precomputed", "Scikit-learn's KernelDensity requires point coordinates" kde_params = dict(self.params_.get("kde_params", dict())) kde_params.setdefault("metric", metric) r = self.params_.get("r") if r is not None: kde_params.setdefault("bandwidth", r) + # Should we default rtol to eps? from sklearn.neighbors import KernelDensity weights = KernelDensity(**kde_params).fit(self.points_).score_samples(self.points_) if self.density_type_ == "KDE": @@ -199,7 +202,7 @@ class Tomato: for j in line: self.neighbors_[j].add(i) - self.weights_ = weights # TODO remove + self.weights_ = weights self.leaf_labels_, self.children_, self.diagram_, self.max_density_per_cc_ = doit( list(self.neighbors_), weights ) -- cgit v1.2.3