From 87a142db9e133fbd8f08d9bcc70a51e2a907aa35 Mon Sep 17 00:00:00 2001
From: Marc Glisse <marc.glisse@inria.fr>
Date: Mon, 25 May 2020 18:53:08 +0200
Subject: Document attribute weights_

---
 src/python/doc/clustering.rst         |  2 +-
 src/python/gudhi/clustering/tomato.py | 13 ++++++++-----
 2 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'src')

diff --git a/src/python/doc/clustering.rst b/src/python/doc/clustering.rst
index dc9de968..1e933dc9 100644
--- a/src/python/doc/clustering.rst
+++ b/src/python/doc/clustering.rst
@@ -55,7 +55,7 @@ Makes the number of clusters clearer, and changes a bit the shape of the cluster
 
 .. image:: img/spiral-diag2.png
 
-A quick look at the corresponding density estimate (`weights_` is not officially supported)
+A quick look at the corresponding density estimate
 
 .. code-block::
 
diff --git a/src/python/gudhi/clustering/tomato.py b/src/python/gudhi/clustering/tomato.py
index fcb4b234..c4da9deb 100644
--- a/src/python/gudhi/clustering/tomato.py
+++ b/src/python/gudhi/clustering/tomato.py
@@ -4,8 +4,7 @@ from ..point_cloud.dtm import DTMDensity
 from ._tomato import *
 
 # The fit/predict interface is not so well suited...
-# TODO: option for a faster, weaker (probabilistic) knn
-
+# FIXME: choose if they are called weight, density, filtration, etc and be consistent.
 
 class Tomato:
     """
@@ -21,14 +20,16 @@ class Tomato:
         minimum prominence of a cluster so it doesn't get merged. Writing to it automatically adjusts labels_.
     n_leaves_: int
         number of leaves (unstable clusters) in the hierarchical tree
-    leaf_labels_: ndarray of shape (n_samples)
+    leaf_labels_: ndarray of shape (n_samples,)
         cluster labels for each point, at the very bottom of the hierarchy
-    labels_: ndarray of shape (n_samples)
+    labels_: ndarray of shape (n_samples,)
         cluster labels for each point, after merging
     diagram_: ndarray of shape (n_leaves_,2)
         persistence diagram (only the finite points)
     children_: ndarray of shape (n_leaves_-1,2)
         The children of each non-leaf node. Values less than n_leaves_ correspond to leaves of the tree. A node i greater than or equal to n_leaves_ is a non-leaf node and has children children_[i - n_leaves_]. Alternatively at the i-th iteration, children[i][0] and children[i][1] are merged to form node n_leaves_ + i
+    weights_: ndarray of shape (n_samples,)
+        weights of the points, as computed by the density estimator or provided by the user
     params_: dict
         Parameters like metric, etc
     """
@@ -180,12 +181,14 @@ class Tomato:
                 self.neighbors_ = [numpy.flatnonzero(l <= r) for l in X]
 
         if self.density_type_ in {"KDE", "logKDE"}:
+            # Slow...
             assert self.graph_type_ != "manual" and metric != "precomputed", "Scikit-learn's KernelDensity requires point coordinates"
             kde_params = dict(self.params_.get("kde_params", dict()))
             kde_params.setdefault("metric", metric)
             r = self.params_.get("r")
             if r is not None:
                 kde_params.setdefault("bandwidth", r)
+            # Should we default rtol to eps?
             from sklearn.neighbors import KernelDensity
             weights = KernelDensity(**kde_params).fit(self.points_).score_samples(self.points_)
             if self.density_type_ == "KDE":
@@ -199,7 +202,7 @@ class Tomato:
                 for j in line:
                     self.neighbors_[j].add(i)
 
-        self.weights_ = weights  # TODO remove
+        self.weights_ = weights
         self.leaf_labels_, self.children_, self.diagram_, self.max_density_per_cc_ = doit(
             list(self.neighbors_), weights
         )
-- 
cgit v1.2.3