From 23f0949dd204a4f4b0fec5527b64b5d5eabbebf8 Mon Sep 17 00:00:00 2001
From: Marc Glisse <marc.glisse@inria.fr>
Date: Fri, 28 Feb 2020 23:45:24 +0100
Subject: metric==Callable

---
 src/python/gudhi/clustering/tomato.py | 32 +++++++++++++++++++-------------
 1 file changed, 19 insertions(+), 13 deletions(-)

(limited to 'src')

diff --git a/src/python/gudhi/clustering/tomato.py b/src/python/gudhi/clustering/tomato.py
index 5257e487..467dd17e 100644
--- a/src/python/gudhi/clustering/tomato.py
+++ b/src/python/gudhi/clustering/tomato.py
@@ -34,11 +34,12 @@ class Tomato:
         self,
         input_type="points",
         metric=None,
-        graph_type=None,
-        density_type="manual",
+        graph_type="knn",
+        density_type="DTM",
         n_clusters=None,
         merge_threshold=None,
-        eliminate_threshold=None,
+#       eliminate_threshold=None,
+#           eliminate_threshold (float): minimum max weight of a cluster so it doesn't get eliminated
         **params
     ):
         """
@@ -46,7 +47,7 @@ class Tomato:
 
         Args:
             input_type (str): 'points', 'distance_matrix' or 'neighbors'.
-            metric (str or callable): FIXME ???
+            metric (None|Callable): If None, use Minkowski of parameter p.
             graph_type (str): 'manual', 'knn' or 'radius'. Ignored if input_type is 'neighbors'.
             density_type (str): 'manual', 'DTM', 'logDTM' or 'kde'.
             kde_params (dict): if density_type is 'kde', additional parameters passed directly to sklearn.neighbors.KernelDensity.
@@ -57,7 +58,6 @@ class Tomato:
             gpu (bool): enable use of CUDA (through pykeops) to compute k nearest neighbors. This is useful when the dimension becomes large (10+) but the number of points remains low (less than a million).
             n_clusters (int): number of clusters requested. Defaults to ???
             merge_threshold (float): minimum prominence of a cluster so it doesn't get merged.
-            eliminate_threshold (float): minimum height of a cluster so it doesn't get eliminated
             symmetrize_graph (bool): whether we should add edges to make the neighborhood graph symmetric. This can be useful with k-NN for small k. Defaults to false.
             p (float): norm L^p on input points (numpy.inf is supported without gpu). Defaults to 2.
             p_DTM (float): order used to compute the distance to measure. Defaults to 2.
@@ -90,20 +90,26 @@ class Tomato:
             if density_type == "manual":
                 raise ValueError("If density_type is 'manual', you must provide weights to fit()")
 
-        if self.input_type_ == "distance_matrix" and self.graph_type_ == "radius":
+        input_type = self.input_type_
+        if input_type == "points" and self.metric_:
+            from sklearn.metrics import pairwise_distances
+            X = pairwise_distances(X,metric=self.metric_,n_jobs=self.params_.get("n_jobs"))
+            input_type="distance_matrix"
+
+        if input_type == "distance_matrix" and self.graph_type_ == "radius":
             X = numpy.array(X)
             r = self.params_["r"]
             self.neighbors_ = [numpy.nonzero(l <= r) for l in X]
 
-        if self.input_type_ == "distance_matrix" and self.graph_type_ == "knn":
+        if input_type == "distance_matrix" and self.graph_type_ == "knn":
             k = self.params_["k"]
             self.neighbors_ = numpy.argpartition(X, k - 1)[:, 0:k]
 
-        if self.input_type_ == "neighbors":
+        if input_type == "neighbors":
             self.neighbors_ = X
             assert density_type == "manual"
 
-        if self.input_type_ == "points" and self.graph_type_ == "knn" and self.density_type_ in {"DTM", "logDTM"}:
+        if input_type == "points" and self.graph_type_ == "knn" and self.density_type_ in {"DTM", "logDTM"}:
             self.points_ = X
             q = self.params_.get("p_DTM", 2)
             p = self.params_.get("p", 2)
@@ -174,7 +180,7 @@ class Tomato:
                 # We ignore exponents, which become constant factors with log
                 weights = -numpy.log(weights)
 
-        if self.input_type_ == "points" and self.graph_type_ == "knn" and self.density_type_ not in {"DTM", "logDTM"}:
+        if input_type == "points" and self.graph_type_ == "knn" and self.density_type_ not in {"DTM", "logDTM"}:
             self.points_ = X
             p = self.params_.get("p", 2)
             k = self.params_.get("k", 10)
@@ -203,7 +209,7 @@ class Tomato:
                 qargs = {k: v for k, v in self.params_.items() if k in {"eps", "n_jobs"}}
                 _, self.neighbors_ = kdtree.query(self.points_, k=k, p=p, **qargs)
 
-        if self.input_type_ == "points" and self.graph_type_ != "knn" and self.density_type_ in {"DTM", "logDTM"}:
+        if input_type == "points" and self.graph_type_ != "knn" and self.density_type_ in {"DTM", "logDTM"}:
             self.points_ = X
             q = self.params_.get("p_DTM", 2)
             p = self.params_.get("p", 2)
@@ -253,7 +259,7 @@ class Tomato:
             else:
                 weights = -numpy.log(weights)
 
-        if self.input_type_ == "distance_matrix" and self.density_type_ in {"DTM", "logDTM"}:
+        if input_type == "distance_matrix" and self.density_type_ in {"DTM", "logDTM"}:
             q = self.params_.get("p_DTM", 2)
             X = numpy.array(X)
             k = self.params_.get("k_DTM")
@@ -269,7 +275,7 @@ class Tomato:
 
         if self.density_type_ == "kde":
             # FIXME: replace most assert with raise ValueError("blabla")
-            assert self.input_type_ == "points"
+            assert input_type == "points"
             kde_params = self.params_.get("kde_params", dict())
             from sklearn.neighbors import KernelDensity
 
-- 
cgit v1.2.3