From 9c45abcdf165519c58d59556dea74fd9f27c8396 Mon Sep 17 00:00:00 2001
From: martinroyer <martinpierreroyer@gmail.com>
Date: Mon, 8 Jun 2020 15:56:34 +0200
Subject: ATOL introduction as finite vectorisation method

---
 src/python/gudhi/representations/vector_methods.py | 128 ++++++++++++++++++++-
 1 file changed, 125 insertions(+), 3 deletions(-)

(limited to 'src/python/gudhi/representations/vector_methods.py')

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 46fee086..df66ffc3 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -1,16 +1,17 @@
 # This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
 # See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
-# Author(s):       Mathieu Carrière
+# Author(s):       Mathieu Carrière, Martin Royer
 #
-# Copyright (C) 2018-2019 Inria
+# Copyright (C) 2018-2020 Inria
 #
 # Modification(s):
-#   - YYYY/MM Author: Description of the modification
+#   - 2020/06 Martin: ATOL integration
 
 import numpy as np
 from sklearn.base          import BaseEstimator, TransformerMixin
 from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler
 from sklearn.neighbors     import DistanceMetric
+from sklearn.metrics       import pairwise
 
 from .preprocessing import DiagramScaler, BirthPersistenceTransform
 
@@ -574,3 +575,124 @@ class ComplexPolynomial(BaseEstimator, TransformerMixin):
             numpy array with shape (**threshold**): output complex vector of coefficients.
         """
         return self.fit_transform([diag])[0,:]
+
+def _lapl_contrast(measure, centers, inertias, eps=1e-8):
+	"""contrast function for vectorising `measure` in ATOL"""
+    return np.exp(-np.sqrt(pairwise.pairwise_distances(measure, Y=centers) / (inertias + eps)))
+
+def _gaus_contrast(measure, centers, inertias, eps=1e-8):
+	"""contrast function for vectorising `measure` in ATOL"""
+    return np.exp(-pairwise.pairwise_distances(measure, Y=centers) / (inertias + eps))
+
+def _indicator_contrast(diags, centers, inertias, eps=1e-8):
+	"""contrast function for vectorising `measure` in ATOL"""
+    pair_dist = pairwise.pairwise_distances(diags, Y=centers)
+    flat_circ = (pair_dist < (inertias+eps)).astype(int)
+    robe_curve = np.positive((2-pair_dist/(inertias+eps))*((inertias+eps) < pair_dist).astype(int))
+    return flat_circ + robe_curve
+
+def _cloud_weighting(measure):
+	"""automatic uniform weighting with mass 1 for `measure` in ATOL"""
+    return np.ones(shape=measure.shape[0])
+
+def _iidproba_weighting(measure):
+	"""automatic uniform weighting with mass 1/N for `measure` in ATOL"""
+    return np.ones(shape=measure.shape[0]) / measure.shape[0]
+
+class Atol(BaseEstimator, TransformerMixin):
+    """
+    This class allows to vectorise measures (e.g. point clouds, persistence diagrams, etc) after a quantisation step.
+
+    ATOL paper: https://arxiv.org/abs/1909.13472
+    """
+    def __init__(self, quantiser, weighting_method="cloud", contrast="gaus"):
+        """
+        Constructor for the Atol measure vectorisation class.
+
+        Parameters:
+            quantiser (Object): Object with `fit` (sklearn API consistent) and `cluster_centers` and `n_clusters`
+                attributes (default: MiniBatchKMeans()). This object will be fitted by the function `fit`.
+            weighting_method (function): constant generic function for weighting the measure points
+			    choose from {"cloud", "iidproba"}
+                (default: constant function, i.e. the measure is seen as a point cloud by default).
+                This will have no impact if weights are provided along with measures all the way: `fit` and `transform`.
+            contrast (string): constant function for evaluating proximity of a measure with respect to centers
+			    choose from {"gaus", "lapl", "indi"}
+                (default: laplacian contrast function, see page 3 in the ATOL paper).
+        """
+        self.quantiser = quantiser
+        self.contrast = {
+            "gaus": _gaus_contrast,
+            "lapl": _lapl_contrast,
+            "indi": _indicator_contrast,
+        }.get(contrast, _gaus_contrast)
+        self.centers = np.ones(shape=(self.quantiser.n_clusters, 2))*np.inf
+        self.inertias = np.full(self.quantiser.n_clusters, np.nan)
+        self.weighting_method = {
+            "cloud"   : _cloud_weighting,
+            "iidproba": _iidproba_weighting,
+        }.get(weighting_method, _cloud_weighting)
+
+    def fit(self, X, y=None, sample_weight=None):
+        """
+        Calibration step: fit centers to the sample measures and derive inertias between centers.
+
+        Parameters:
+            X (list N x d numpy arrays): input measures in R^d from which to learn center locations and inertias
+                (measures can have different N).
+            y: Ignored, present for API consistency by convention.
+            sample_weight (list of numpy arrays): weights for each measure point in X, optional.
+                If None, the object's weighting_method will be used.
+
+        Returns:
+            self
+        """
+        if not hasattr(self.quantiser, 'fit'):
+            raise TypeError("quantiser %s has no `fit` attribute." % (self.quantiser))
+        if len(X) < self.quantiser.n_clusters:
+            # in case there are not enough observations for fitting the quantiser, we add random points in [0, 1]^2
+			# @Martin: perhaps this behaviour is to be externalised and a warning should be raised instead
+            random_points = np.random.rand(self.quantiser.n_clusters-len(X), X[0].shape[1])
+            X.append(random_points)
+        if sample_weight is None:
+            sample_weight = np.concatenate([self.weighting_method(measure) for measure in X])
+
+        measures_concat = np.concatenate(X)
+        self.quantiser.fit(X=measures_concat, sample_weight=sample_weight)
+        self.centers = self.quantiser.cluster_centers_
+        labels = np.argmin(pairwise.pairwise_distances(measures_concat, Y=self.centers), axis=1)
+        dist_centers = pairwise.pairwise_distances(self.centers)
+        np.fill_diagonal(dist_centers, np.inf)
+        self.inertias = np.min(dist_centers, axis=0)/2
+        return self
+
+    def __call__(self, measure, sample_weight=None):
+        """
+        Apply measure vectorisation on a single measure.
+
+        Parameters:
+            measure (n x d numpy array): input measure in R^d.
+
+        Returns:
+            numpy array in R^self.quantiser.n_clusters.
+        """
+        if sample_weight is None:
+            sample_weight = self.weighting_method(measure)
+        return np.sum(sample_weight * self.contrast(measure, self.centers, self.inertias.T).T, axis=1)
+
+    def transform(self, X, sample_weight=None):
+        """
+        Apply measure vectorisation on a list of measures.
+
+        Parameters:
+            X (list N x d numpy arrays): input measures in R^d from which to learn center locations and inertias
+                (measures can have different N).
+            sample_weight (list of numpy arrays): weights for each measure point in X, optional.
+                If None, the object's weighting_method will be used.
+
+        Returns:
+            numpy array with shape (number of measures) x (self.quantiser.n_clusters).
+        """
+        if sample_weight is None:
+            sample_weight = [self.weighting_method(measure) for measure in X]
+        return np.stack([self(measure, sample_weight=weight) for measure, weight in zip(X, sample_weight)])
-- 
cgit v1.2.3


From 9b4de0e29a01552b4bb3f47fe0d3f01f5601c000 Mon Sep 17 00:00:00 2001
From: martinroyer <16647869+martinroyer@users.noreply.github.com>
Date: Tue, 9 Jun 2020 08:42:30 +0200
Subject: Apply suggestions from code review

---
 src/python/gudhi/representations/vector_methods.py | 45 ++++++++++++++++------
 1 file changed, 33 insertions(+), 12 deletions(-)

(limited to 'src/python/gudhi/representations/vector_methods.py')

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index df66ffc3..a09b9356 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -577,26 +577,26 @@ class ComplexPolynomial(BaseEstimator, TransformerMixin):
         return self.fit_transform([diag])[0,:]
 
 def _lapl_contrast(measure, centers, inertias, eps=1e-8):
-	"""contrast function for vectorising `measure` in ATOL"""
+    """contrast function for vectorising `measure` in ATOL"""
     return np.exp(-np.sqrt(pairwise.pairwise_distances(measure, Y=centers) / (inertias + eps)))
 
 def _gaus_contrast(measure, centers, inertias, eps=1e-8):
-	"""contrast function for vectorising `measure` in ATOL"""
+    """contrast function for vectorising `measure` in ATOL"""
     return np.exp(-pairwise.pairwise_distances(measure, Y=centers) / (inertias + eps))
 
 def _indicator_contrast(diags, centers, inertias, eps=1e-8):
-	"""contrast function for vectorising `measure` in ATOL"""
+    """contrast function for vectorising `measure` in ATOL"""
     pair_dist = pairwise.pairwise_distances(diags, Y=centers)
     flat_circ = (pair_dist < (inertias+eps)).astype(int)
     robe_curve = np.positive((2-pair_dist/(inertias+eps))*((inertias+eps) < pair_dist).astype(int))
     return flat_circ + robe_curve
 
 def _cloud_weighting(measure):
-	"""automatic uniform weighting with mass 1 for `measure` in ATOL"""
+    """automatic uniform weighting with mass 1 for `measure` in ATOL"""
     return np.ones(shape=measure.shape[0])
 
 def _iidproba_weighting(measure):
-	"""automatic uniform weighting with mass 1/N for `measure` in ATOL"""
+    """automatic uniform weighting with mass 1/N for `measure` in ATOL"""
     return np.ones(shape=measure.shape[0]) / measure.shape[0]
 
 class Atol(BaseEstimator, TransformerMixin):
@@ -611,20 +611,41 @@ class Atol(BaseEstimator, TransformerMixin):
 
         Parameters:
             quantiser (Object): Object with `fit` (sklearn API consistent) and `cluster_centers` and `n_clusters`
-                attributes (default: MiniBatchKMeans()). This object will be fitted by the function `fit`.
-            weighting_method (function): constant generic function for weighting the measure points
+                attributes. This object will be fitted by the function `fit`.
+            weighting_method (string): constant generic function for weighting the measure points
 			    choose from {"cloud", "iidproba"}
                 (default: constant function, i.e. the measure is seen as a point cloud by default).
                 This will have no impact if weights are provided along with measures all the way: `fit` and `transform`.
             contrast (string): constant function for evaluating proximity of a measure with respect to centers
-			    choose from {"gaus", "lapl", "indi"}
+			    choose from {"gaussian", "laplacian", "indicator"}
                 (default: laplacian contrast function, see page 3 in the ATOL paper).
-        """
+
+    Example
+    --------
+    >>> from sklearn.cluster import KMeans
+    >>> import numpy as np
+    >>> a = np.array([[1, 2, 4], [1, 4, 0], [1, 0, 4]])
+    >>> b = np.array([[4, 2, 0], [4, 4, 0], [4, 0, 2]])
+    >>> c = np.array([[3, 2, -1], [1, 2, -1]])
+    >>> atol_vectoriser = Atol(quantiser=KMeans(n_clusters=2))
+    >>> atol_vectoriser.fit(X=[a, b, c])
+    >>> atol_vectoriser.centers
+    array([[ 2.6       ,  2.8       , -0.4       ],
+       [ 2.        ,  0.66666667,  3.33333333]])
+    >>> atol_vectoriser(a)
+    array([0.58394704, 1.0769395 ])
+    >>> atol_vectoriser(c)
+    array([1.02816136, 0.23559623])
+    >>> atol_vectoriser.transform(X=[a, b, c])
+    array([[0.58394704, 1.0769395 ],
+       [1.04696684, 0.56203292],
+       [1.02816136, 0.23559623]])
+    """
         self.quantiser = quantiser
         self.contrast = {
-            "gaus": _gaus_contrast,
-            "lapl": _lapl_contrast,
-            "indi": _indicator_contrast,
+            "gaussian": _gaus_contrast,
+            "laplacian": _lapl_contrast,
+            "indicator": _indicator_contrast,
         }.get(contrast, _gaus_contrast)
         self.centers = np.ones(shape=(self.quantiser.n_clusters, 2))*np.inf
         self.inertias = np.full(self.quantiser.n_clusters, np.nan)
-- 
cgit v1.2.3


From 285919ad4a19c6bf9ec11355cd32bc4b39014365 Mon Sep 17 00:00:00 2001
From: martinroyer <16647869+martinroyer@users.noreply.github.com>
Date: Tue, 9 Jun 2020 09:12:18 +0200
Subject: fix minimal example

---
 src/python/gudhi/representations/vector_methods.py | 31 +++++++++++-----------
 1 file changed, 16 insertions(+), 15 deletions(-)

(limited to 'src/python/gudhi/representations/vector_methods.py')

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index a09b9356..f77338b7 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -604,25 +604,11 @@ class Atol(BaseEstimator, TransformerMixin):
     This class allows to vectorise measures (e.g. point clouds, persistence diagrams, etc) after a quantisation step.
 
     ATOL paper: https://arxiv.org/abs/1909.13472
-    """
-    def __init__(self, quantiser, weighting_method="cloud", contrast="gaus"):
-        """
-        Constructor for the Atol measure vectorisation class.
-
-        Parameters:
-            quantiser (Object): Object with `fit` (sklearn API consistent) and `cluster_centers` and `n_clusters`
-                attributes. This object will be fitted by the function `fit`.
-            weighting_method (string): constant generic function for weighting the measure points
-			    choose from {"cloud", "iidproba"}
-                (default: constant function, i.e. the measure is seen as a point cloud by default).
-                This will have no impact if weights are provided along with measures all the way: `fit` and `transform`.
-            contrast (string): constant function for evaluating proximity of a measure with respect to centers
-			    choose from {"gaussian", "laplacian", "indicator"}
-                (default: laplacian contrast function, see page 3 in the ATOL paper).
 
     Example
     --------
     >>> from sklearn.cluster import KMeans
+    >>> from gudhi.representations.vector_methods import Atol
     >>> import numpy as np
     >>> a = np.array([[1, 2, 4], [1, 4, 0], [1, 0, 4]])
     >>> b = np.array([[4, 2, 0], [4, 4, 0], [4, 0, 2]])
@@ -641,6 +627,21 @@ class Atol(BaseEstimator, TransformerMixin):
        [1.04696684, 0.56203292],
        [1.02816136, 0.23559623]])
     """
+    def __init__(self, quantiser, weighting_method="cloud", contrast="gaus"):
+        """
+        Constructor for the Atol measure vectorisation class.
+
+        Parameters:
+            quantiser (Object): Object with `fit` (sklearn API consistent) and `cluster_centers` and `n_clusters`
+                attributes. This object will be fitted by the function `fit`.
+            weighting_method (string): constant generic function for weighting the measure points
+			    choose from {"cloud", "iidproba"}
+                (default: constant function, i.e. the measure is seen as a point cloud by default).
+                This will have no impact if weights are provided along with measures all the way: `fit` and `transform`.
+            contrast (string): constant function for evaluating proximity of a measure with respect to centers
+			    choose from {"gaussian", "laplacian", "indicator"}
+                (default: laplacian contrast function, see page 3 in the ATOL paper).
+	"""
         self.quantiser = quantiser
         self.contrast = {
             "gaussian": _gaus_contrast,
-- 
cgit v1.2.3


From 5d5a2b1a3b6a2d3d2dc061e6e9c940677e782c80 Mon Sep 17 00:00:00 2001
From: martinroyer <16647869+martinroyer@users.noreply.github.com>
Date: Tue, 9 Jun 2020 09:13:28 +0200
Subject: Update src/python/gudhi/representations/vector_methods.py

---
 src/python/gudhi/representations/vector_methods.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/python/gudhi/representations/vector_methods.py')

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index f77338b7..16e91812 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -641,7 +641,7 @@ class Atol(BaseEstimator, TransformerMixin):
             contrast (string): constant function for evaluating proximity of a measure with respect to centers
 			    choose from {"gaussian", "laplacian", "indicator"}
                 (default: laplacian contrast function, see page 3 in the ATOL paper).
-	"""
+        """
         self.quantiser = quantiser
         self.contrast = {
             "gaussian": _gaus_contrast,
-- 
cgit v1.2.3


From 588e7127d1616e40bf7e3de7e7797b54aee137da Mon Sep 17 00:00:00 2001
From: martinroyer <16647869+martinroyer@users.noreply.github.com>
Date: Tue, 9 Jun 2020 09:42:40 +0200
Subject: tweak test results from doc

---
 src/python/gudhi/representations/vector_methods.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'src/python/gudhi/representations/vector_methods.py')

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 16e91812..d3b85636 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -614,18 +614,17 @@ class Atol(BaseEstimator, TransformerMixin):
     >>> b = np.array([[4, 2, 0], [4, 4, 0], [4, 0, 2]])
     >>> c = np.array([[3, 2, -1], [1, 2, -1]])
     >>> atol_vectoriser = Atol(quantiser=KMeans(n_clusters=2))
-    >>> atol_vectoriser.fit(X=[a, b, c])
-    >>> atol_vectoriser.centers
+    >>> atol_vectoriser.fit(X=[a, b, c]).centers
     array([[ 2.6       ,  2.8       , -0.4       ],
-       [ 2.        ,  0.66666667,  3.33333333]])
+           [ 2.        ,  0.66666667,  3.33333333]])
     >>> atol_vectoriser(a)
     array([0.58394704, 1.0769395 ])
     >>> atol_vectoriser(c)
     array([1.02816136, 0.23559623])
     >>> atol_vectoriser.transform(X=[a, b, c])
     array([[0.58394704, 1.0769395 ],
-       [1.04696684, 0.56203292],
-       [1.02816136, 0.23559623]])
+           [1.04696684, 0.56203292],
+           [1.02816136, 0.23559623]])
     """
     def __init__(self, quantiser, weighting_method="cloud", contrast="gaus"):
         """
-- 
cgit v1.2.3


From 7eed21c364e244df7fceae11ce9d1c319db8bec9 Mon Sep 17 00:00:00 2001
From: martinroyer <16647869+martinroyer@users.noreply.github.com>
Date: Tue, 9 Jun 2020 11:21:29 +0200
Subject: Update src/python/gudhi/representations/vector_methods.py

---
 src/python/gudhi/representations/vector_methods.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/python/gudhi/representations/vector_methods.py')

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index d3b85636..0a26a8e5 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -626,7 +626,7 @@ class Atol(BaseEstimator, TransformerMixin):
            [1.04696684, 0.56203292],
            [1.02816136, 0.23559623]])
     """
-    def __init__(self, quantiser, weighting_method="cloud", contrast="gaus"):
+    def __init__(self, quantiser, weighting_method="cloud", contrast="gaussian"):
         """
         Constructor for the Atol measure vectorisation class.
 
@@ -639,7 +639,7 @@ class Atol(BaseEstimator, TransformerMixin):
                 This will have no impact if weights are provided along with measures all the way: `fit` and `transform`.
             contrast (string): constant function for evaluating proximity of a measure with respect to centers
 			    choose from {"gaussian", "laplacian", "indicator"}
-                (default: laplacian contrast function, see page 3 in the ATOL paper).
+                (default: gaussian contrast function, see page 3 in the ATOL paper).
         """
         self.quantiser = quantiser
         self.contrast = {
-- 
cgit v1.2.3


From a22b48e00ca858c0e9c300cee87f265d70aeecc7 Mon Sep 17 00:00:00 2001
From: martinroyer <16647869+martinroyer@users.noreply.github.com>
Date: Tue, 9 Jun 2020 12:23:20 +0200
Subject: remove randomness in example

---
 src/python/gudhi/representations/vector_methods.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/python/gudhi/representations/vector_methods.py')

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 0a26a8e5..98cd6153 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -613,7 +613,7 @@ class Atol(BaseEstimator, TransformerMixin):
     >>> a = np.array([[1, 2, 4], [1, 4, 0], [1, 0, 4]])
     >>> b = np.array([[4, 2, 0], [4, 4, 0], [4, 0, 2]])
     >>> c = np.array([[3, 2, -1], [1, 2, -1]])
-    >>> atol_vectoriser = Atol(quantiser=KMeans(n_clusters=2))
+    >>> atol_vectoriser = Atol(quantiser=KMeans(n_clusters=2, random_state=202006))
     >>> atol_vectoriser.fit(X=[a, b, c]).centers
     array([[ 2.6       ,  2.8       , -0.4       ],
            [ 2.        ,  0.66666667,  3.33333333]])
-- 
cgit v1.2.3


From 3d126356fd3fcaeb2bde8824b8c5894450fccdd9 Mon Sep 17 00:00:00 2001
From: martinroyer <16647869+martinroyer@users.noreply.github.com>
Date: Tue, 9 Jun 2020 12:43:28 +0200
Subject: awful test

---
 src/python/gudhi/representations/vector_methods.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'src/python/gudhi/representations/vector_methods.py')

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 98cd6153..77b2836f 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -615,16 +615,16 @@ class Atol(BaseEstimator, TransformerMixin):
     >>> c = np.array([[3, 2, -1], [1, 2, -1]])
     >>> atol_vectoriser = Atol(quantiser=KMeans(n_clusters=2, random_state=202006))
     >>> atol_vectoriser.fit(X=[a, b, c]).centers
-    array([[ 2.6       ,  2.8       , -0.4       ],
-           [ 2.        ,  0.66666667,  3.33333333]])
+    array([[ 2.        ,  0.66666667,  3.33333333],
+           [ 2.6       ,  2.8       , -0.4       ]])
     >>> atol_vectoriser(a)
-    array([0.58394704, 1.0769395 ])
+    array([1.0769395 , 0.58394704])
     >>> atol_vectoriser(c)
-    array([1.02816136, 0.23559623])
+    array([0.23559623, 1.02816136])
     >>> atol_vectoriser.transform(X=[a, b, c])
-    array([[0.58394704, 1.0769395 ],
-           [1.04696684, 0.56203292],
-           [1.02816136, 0.23559623]])
+    array([[1.0769395 , 0.58394704],
+           [0.56203292, 1.04696684],
+           [0.23559623, 1.02816136]])
     """
     def __init__(self, quantiser, weighting_method="cloud", contrast="gaussian"):
         """
-- 
cgit v1.2.3


From cdba6045ddf1dd41e8addb7351d1c87a5506ba0f Mon Sep 17 00:00:00 2001
From: martinroyer <16647869+martinroyer@users.noreply.github.com>
Date: Wed, 10 Jun 2020 10:20:13 +0200
Subject: Apply suggestions from code review

---
 src/python/gudhi/representations/vector_methods.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'src/python/gudhi/representations/vector_methods.py')

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 77b2836f..667f963b 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -578,17 +578,17 @@ class ComplexPolynomial(BaseEstimator, TransformerMixin):
 
 def _lapl_contrast(measure, centers, inertias, eps=1e-8):
     """contrast function for vectorising `measure` in ATOL"""
-    return np.exp(-np.sqrt(pairwise.pairwise_distances(measure, Y=centers) / (inertias + eps)))
+    return np.exp(-pairwise.pairwise_distances(measure, Y=centers) / (inertias + eps))
 
 def _gaus_contrast(measure, centers, inertias, eps=1e-8):
     """contrast function for vectorising `measure` in ATOL"""
-    return np.exp(-pairwise.pairwise_distances(measure, Y=centers) / (inertias + eps))
+    return np.exp(-pairwise.pairwise_distances(measure, Y=centers)**2 / (inertias**2 + eps))
 
 def _indicator_contrast(diags, centers, inertias, eps=1e-8):
     """contrast function for vectorising `measure` in ATOL"""
     pair_dist = pairwise.pairwise_distances(diags, Y=centers)
     flat_circ = (pair_dist < (inertias+eps)).astype(int)
-    robe_curve = np.positive((2-pair_dist/(inertias+eps))*((inertias+eps) < pair_dist).astype(int))
+    robe_curve = np.clip(2-pair_dist/(inertias+eps), 0, 1)
     return flat_circ + robe_curve
 
 def _cloud_weighting(measure):
@@ -638,7 +638,7 @@ class Atol(BaseEstimator, TransformerMixin):
                 (default: constant function, i.e. the measure is seen as a point cloud by default).
                 This will have no impact if weights are provided along with measures all the way: `fit` and `transform`.
             contrast (string): constant function for evaluating proximity of a measure with respect to centers
-			    choose from {"gaussian", "laplacian", "indicator"}
+                choose from {"gaussian", "laplacian", "indicator"}
                 (default: gaussian contrast function, see page 3 in the ATOL paper).
         """
         self.quantiser = quantiser
@@ -670,7 +670,7 @@ class Atol(BaseEstimator, TransformerMixin):
         """
         if not hasattr(self.quantiser, 'fit'):
             raise TypeError("quantiser %s has no `fit` attribute." % (self.quantiser))
-        if len(X) < self.quantiser.n_clusters:
+        if np.sum([measure.shape[0] for measure in X]) < self.quantiser.n_clusters:
             # in case there are not enough observations for fitting the quantiser, we add random points in [0, 1]^2
 			# @Martin: perhaps this behaviour is to be externalised and a warning should be raised instead
             random_points = np.random.rand(self.quantiser.n_clusters-len(X), X[0].shape[1])
@@ -681,7 +681,6 @@ class Atol(BaseEstimator, TransformerMixin):
         measures_concat = np.concatenate(X)
         self.quantiser.fit(X=measures_concat, sample_weight=sample_weight)
         self.centers = self.quantiser.cluster_centers_
-        labels = np.argmin(pairwise.pairwise_distances(measures_concat, Y=self.centers), axis=1)
         dist_centers = pairwise.pairwise_distances(self.centers)
         np.fill_diagonal(dist_centers, np.inf)
         self.inertias = np.min(dist_centers, axis=0)/2
-- 
cgit v1.2.3


From bef50e15e499e40d4dd4f5d991ec87eab4236108 Mon Sep 17 00:00:00 2001
From: martinroyer <16647869+martinroyer@users.noreply.github.com>
Date: Wed, 10 Jun 2020 10:32:48 +0200
Subject: remove epsilons

---
 src/python/gudhi/representations/vector_methods.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'src/python/gudhi/representations/vector_methods.py')

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 667f963b..ede1087f 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -576,19 +576,19 @@ class ComplexPolynomial(BaseEstimator, TransformerMixin):
         """
         return self.fit_transform([diag])[0,:]
 
-def _lapl_contrast(measure, centers, inertias, eps=1e-8):
+def _lapl_contrast(measure, centers, inertias):
     """contrast function for vectorising `measure` in ATOL"""
-    return np.exp(-pairwise.pairwise_distances(measure, Y=centers) / (inertias + eps))
+    return np.exp(-pairwise.pairwise_distances(measure, Y=centers) / inertias)
 
-def _gaus_contrast(measure, centers, inertias, eps=1e-8):
+def _gaus_contrast(measure, centers, inertias):
     """contrast function for vectorising `measure` in ATOL"""
-    return np.exp(-pairwise.pairwise_distances(measure, Y=centers)**2 / (inertias**2 + eps))
+    return np.exp(-pairwise.pairwise_distances(measure, Y=centers)**2 / inertias**2)
 
-def _indicator_contrast(diags, centers, inertias, eps=1e-8):
+def _indicator_contrast(diags, centers, inertias):
     """contrast function for vectorising `measure` in ATOL"""
     pair_dist = pairwise.pairwise_distances(diags, Y=centers)
-    flat_circ = (pair_dist < (inertias+eps)).astype(int)
-    robe_curve = np.clip(2-pair_dist/(inertias+eps), 0, 1)
+    flat_circ = (pair_dist < inertias).astype(int)
+    robe_curve = np.clip(2-pair_dist/inertias, 0, 1)
     return flat_circ + robe_curve
 
 def _cloud_weighting(measure):
-- 
cgit v1.2.3


From 76529cae58f8a2736a1730fd81a9e12c3f4c7e19 Mon Sep 17 00:00:00 2001
From: martinroyer <16647869+martinroyer@users.noreply.github.com>
Date: Thu, 11 Jun 2020 16:47:16 +0200
Subject: Apply suggestions from code review #456

(thank you Marc!)
---
 src/python/gudhi/representations/vector_methods.py | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

(limited to 'src/python/gudhi/representations/vector_methods.py')

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index ede1087f..49c05c51 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -586,10 +586,8 @@ def _gaus_contrast(measure, centers, inertias):
 
 def _indicator_contrast(diags, centers, inertias):
     """contrast function for vectorising `measure` in ATOL"""
-    pair_dist = pairwise.pairwise_distances(diags, Y=centers)
-    flat_circ = (pair_dist < inertias).astype(int)
-    robe_curve = np.clip(2-pair_dist/inertias, 0, 1)
-    return flat_circ + robe_curve
+    robe_curve = np.clip(2-pairwise.pairwise_distances(diags, Y=centers)/inertias, 0, 1)
+    return robe_curve
 
 def _cloud_weighting(measure):
     """automatic uniform weighting with mass 1 for `measure` in ATOL"""
@@ -603,7 +601,7 @@ class Atol(BaseEstimator, TransformerMixin):
     """
     This class allows to vectorise measures (e.g. point clouds, persistence diagrams, etc) after a quantisation step.
 
-    ATOL paper: https://arxiv.org/abs/1909.13472
+    ATOL paper: :cite:`royer2019atol`
 
     Example
     --------
@@ -632,9 +630,9 @@ class Atol(BaseEstimator, TransformerMixin):
 
         Parameters:
             quantiser (Object): Object with `fit` (sklearn API consistent) and `cluster_centers` and `n_clusters`
-                attributes. This object will be fitted by the function `fit`.
+                attributes, e.g. sklearn.cluster.KMeans. It will be fitted when the Atol object function `fit` is called.
             weighting_method (string): constant generic function for weighting the measure points
-			    choose from {"cloud", "iidproba"}
+                choose from {"cloud", "iidproba"}
                 (default: constant function, i.e. the measure is seen as a point cloud by default).
                 This will have no impact if weights are provided along with measures all the way: `fit` and `transform`.
             contrast (string): constant function for evaluating proximity of a measure with respect to centers
@@ -647,8 +645,6 @@ class Atol(BaseEstimator, TransformerMixin):
             "laplacian": _lapl_contrast,
             "indicator": _indicator_contrast,
         }.get(contrast, _gaus_contrast)
-        self.centers = np.ones(shape=(self.quantiser.n_clusters, 2))*np.inf
-        self.inertias = np.full(self.quantiser.n_clusters, np.nan)
         self.weighting_method = {
             "cloud"   : _cloud_weighting,
             "iidproba": _iidproba_weighting,
@@ -670,11 +666,6 @@ class Atol(BaseEstimator, TransformerMixin):
         """
         if not hasattr(self.quantiser, 'fit'):
             raise TypeError("quantiser %s has no `fit` attribute." % (self.quantiser))
-        if np.sum([measure.shape[0] for measure in X]) < self.quantiser.n_clusters:
-            # in case there are not enough observations for fitting the quantiser, we add random points in [0, 1]^2
-			# @Martin: perhaps this behaviour is to be externalised and a warning should be raised instead
-            random_points = np.random.rand(self.quantiser.n_clusters-len(X), X[0].shape[1])
-            X.append(random_points)
         if sample_weight is None:
             sample_weight = np.concatenate([self.weighting_method(measure) for measure in X])
 
-- 
cgit v1.2.3


From a90843c6bf5f7f05392c4262efb60e94ccfb0e48 Mon Sep 17 00:00:00 2001
From: martinroyer <16647869+martinroyer@users.noreply.github.com>
Date: Thu, 11 Jun 2020 17:03:40 +0200
Subject: test value tweak

---
 src/python/gudhi/representations/vector_methods.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'src/python/gudhi/representations/vector_methods.py')

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 49c05c51..5a45f179 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -616,13 +616,13 @@ class Atol(BaseEstimator, TransformerMixin):
     array([[ 2.        ,  0.66666667,  3.33333333],
            [ 2.6       ,  2.8       , -0.4       ]])
     >>> atol_vectoriser(a)
-    array([1.0769395 , 0.58394704])
+    array([1.18168665, 0.42375966])
     >>> atol_vectoriser(c)
-    array([0.23559623, 1.02816136])
+    array([0.02062512, 1.25157463])
     >>> atol_vectoriser.transform(X=[a, b, c])
-    array([[1.0769395 , 0.58394704],
-           [0.56203292, 1.04696684],
-           [0.23559623, 1.02816136]])
+    array([[1.18168665, 0.42375966],
+           [0.29861028, 1.06330156],
+           [0.02062512, 1.25157463]])
     """
     def __init__(self, quantiser, weighting_method="cloud", contrast="gaussian"):
         """
-- 
cgit v1.2.3


From ec1c3ad11aeb46a67926a615fd5c00fbc70b501e Mon Sep 17 00:00:00 2001
From: martinroyer <16647869+martinroyer@users.noreply.github.com>
Date: Thu, 11 Jun 2020 21:17:27 +0200
Subject: case n_centers = 1

---
 src/python/gudhi/representations/vector_methods.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'src/python/gudhi/representations/vector_methods.py')

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 5a45f179..a576267c 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -672,9 +672,14 @@ class Atol(BaseEstimator, TransformerMixin):
         measures_concat = np.concatenate(X)
         self.quantiser.fit(X=measures_concat, sample_weight=sample_weight)
         self.centers = self.quantiser.cluster_centers_
-        dist_centers = pairwise.pairwise_distances(self.centers)
-        np.fill_diagonal(dist_centers, np.inf)
-        self.inertias = np.min(dist_centers, axis=0)/2
+        if self.quantiser.n_clusters == 1:
+            dist_centers = pairwise.pairwise_distances(measures_concat)
+            np.fill_diagonal(dist_centers, 0)
+            self.inertias = np.max(dist_centers)/2
+        else:
+            dist_centers = pairwise.pairwise_distances(self.centers)
+            np.fill_diagonal(dist_centers, np.inf)
+            self.inertias = np.min(dist_centers, axis=0)/2
         return self
 
     def __call__(self, measure, sample_weight=None):
-- 
cgit v1.2.3


From 1abc47f5bf65ee3451a907ecfc9db84c0471ef93 Mon Sep 17 00:00:00 2001
From: martinroyer <16647869+martinroyer@users.noreply.github.com>
Date: Tue, 16 Jun 2020 22:36:31 +0200
Subject: Update src/python/gudhi/representations/vector_methods.py

---
 src/python/gudhi/representations/vector_methods.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/python/gudhi/representations/vector_methods.py')

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index a576267c..566c24a3 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -675,7 +675,7 @@ class Atol(BaseEstimator, TransformerMixin):
         if self.quantiser.n_clusters == 1:
             dist_centers = pairwise.pairwise_distances(measures_concat)
             np.fill_diagonal(dist_centers, 0)
-            self.inertias = np.max(dist_centers)/2
+            self.inertias = np.array([np.max(dist_centers)/2])
         else:
             dist_centers = pairwise.pairwise_distances(self.centers)
             np.fill_diagonal(dist_centers, np.inf)
-- 
cgit v1.2.3


From 4a558f9542283533d1218a35ce43751615ca2ffd Mon Sep 17 00:00:00 2001
From: martinroyer <16647869+martinroyer@users.noreply.github.com>
Date: Fri, 19 Jun 2020 14:29:55 +0200
Subject: fix for null inertias

---
 src/python/gudhi/representations/vector_methods.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/python/gudhi/representations/vector_methods.py')

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 566c24a3..aaf7ffeb 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -678,7 +678,7 @@ class Atol(BaseEstimator, TransformerMixin):
             self.inertias = np.array([np.max(dist_centers)/2])
         else:
             dist_centers = pairwise.pairwise_distances(self.centers)
-            np.fill_diagonal(dist_centers, np.inf)
+            dist_centers[dist_centers == 0] = np.inf
             self.inertias = np.min(dist_centers, axis=0)/2
         return self
 
-- 
cgit v1.2.3


From b7d9cc2b1e8f58f563d23c3588d785ced98222b3 Mon Sep 17 00:00:00 2001
From: martinroyer-buntu <martinpierreroyer@gmail.com>
Date: Fri, 3 Jul 2020 11:01:26 +0200
Subject: small optim

---
 src/python/gudhi/representations/vector_methods.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/python/gudhi/representations/vector_methods.py')

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index aaf7ffeb..5ca127f6 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -582,7 +582,7 @@ def _lapl_contrast(measure, centers, inertias):
 
 def _gaus_contrast(measure, centers, inertias):
     """contrast function for vectorising `measure` in ATOL"""
-    return np.exp(-pairwise.pairwise_distances(measure, Y=centers)**2 / inertias**2)
+    return np.exp(-pairwise.pairwise_distances(measure, Y=centers, squared=True) / inertias**2)
 
 def _indicator_contrast(diags, centers, inertias):
     """contrast function for vectorising `measure` in ATOL"""
-- 
cgit v1.2.3