Merge remote-tracking branch 'origin/master' into dtmdensity

author: Marc Glisse <marc.glisse@inria.fr> 2020-04-29 23:00:17 +0200
committer: Marc Glisse <marc.glisse@inria.fr> 2020-04-29 23:00:17 +0200
commit: 73a74011e4b5af0794f0463295beca924d32e0ee (patch)
tree: fb825dc77e9984594109f9ea48838d0544dfca1e /src/python/gudhi
parent: 74155081bb8b3330c562d5c40d7f0a32fc188012 (diff)
parent: 0bba67db83f33ff608366057d9c4f005fa6a514b (diff)
9 files changed, 568 insertions, 203 deletions
diff --git a/src/python/gudhi/cubical_complex.pyx b/src/python/gudhi/cubical_complex.pyx
index d5ad1266..007abcb6 100644
--- a/src/python/gudhi/cubical_complex.pyx
+++ b/src/python/gudhi/cubical_complex.pyx
@@ -35,7 +35,8 @@ cdef extern from "Cubical_complex_interface.h" namespace "Gudhi":
 cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi":
     cdef cppclass Cubical_complex_persistence_interface "Gudhi::Persistent_cohomology_interface<Gudhi::Cubical_complex::Cubical_complex_interface<>>":
         Cubical_complex_persistence_interface(Bitmap_cubical_complex_base_interface * st, bool persistence_dim_max)
-        vector[pair[int, pair[double, double]]] get_persistence(int homology_coeff_field, double min_persistence)
+        void compute_persistence(int homology_coeff_field, double min_persistence)
+        vector[pair[int, pair[double, double]]] get_persistence()
         vector[int] betti_numbers()
         vector[int] persistent_betti_numbers(double from_value, double to_value)
         vector[pair[double,double]] intervals_in_dimension(int dimension)
@@ -129,8 +130,31 @@ cdef class CubicalComplex:
         """
         return self.thisptr.dimension()
 
+    def compute_persistence(self, homology_coeff_field=11, min_persistence=0):
+        """This function computes the persistence of the complex, so it can be
+        accessed through :func:`persistent_betti_numbers`,
+        :func:`persistence_intervals_in_dimension`, etc. This function is
+        equivalent to :func:`persistence` when you do not want the list
+        :func:`persistence` returns.
+
+        :param homology_coeff_field: The homology coefficient field. Must be a
+            prime number
+        :type homology_coeff_field: int.
+        :param min_persistence: The minimum persistence value to take into
+            account (strictly greater than min_persistence). Default value is
+            0.0.
+            Sets min_persistence to -1.0 to see all values.
+        :type min_persistence: float.
+        :returns: Nothing.
+        """
+        if self.pcohptr != NULL:
+            del self.pcohptr
+        assert self.__is_defined()
+        self.pcohptr = new Cubical_complex_persistence_interface(self.thisptr, True)
+        self.pcohptr.compute_persistence(homology_coeff_field, min_persistence)
+
     def persistence(self, homology_coeff_field=11, min_persistence=0):
-        """This function returns the persistence of the complex.
+        """This function computes and returns the persistence of the complex.
 
         :param homology_coeff_field: The homology coefficient field. Must be a
             prime number
@@ -143,30 +167,22 @@ cdef class CubicalComplex:
         :returns: list of pairs(dimension, pair(birth, death)) -- the
             persistence of the complex.
         """
-        if self.pcohptr != NULL:
-            del self.pcohptr
-        if self.thisptr != NULL:
-            self.pcohptr = new Cubical_complex_persistence_interface(self.thisptr, True)
-        cdef vector[pair[int, pair[double, double]]] persistence_result
-        if self.pcohptr != NULL:
-            persistence_result = self.pcohptr.get_persistence(homology_coeff_field, min_persistence)
-        return persistence_result
+        self.compute_persistence(homology_coeff_field, min_persistence)
+        return self.pcohptr.get_persistence()
 
     def betti_numbers(self):
         """This function returns the Betti numbers of the complex.
 
         :returns: list of int -- The Betti numbers ([B0, B1, ..., Bn]).
 
-        :note: betti_numbers function requires persistence function to be
+        :note: betti_numbers function requires :func:`compute_persistence` function to be
             launched first.
 
         :note: betti_numbers function always returns [1, 0, 0, ...] as infinity
             filtration cubes are not removed from the complex.
         """
-        cdef vector[int] bn_result
-        if self.pcohptr != NULL:
-            bn_result = self.pcohptr.betti_numbers()
-        return bn_result
+        assert self.pcohptr != NULL, "compute_persistence() must be called before betti_numbers()"
+        return self.pcohptr.betti_numbers()
 
     def persistent_betti_numbers(self, from_value, to_value):
         """This function returns the persistent Betti numbers of the complex.
@@ -181,13 +197,11 @@ cdef class CubicalComplex:
         :returns: list of int -- The persistent Betti numbers ([B0, B1, ...,
             Bn]).
 
-        :note: persistent_betti_numbers function requires persistence
+        :note: persistent_betti_numbers function requires :func:`compute_persistence`
             function to be launched first.
         """
-        cdef vector[int] pbn_result
-        if self.pcohptr != NULL:
-            pbn_result = self.pcohptr.persistent_betti_numbers(<double>from_value, <double>to_value)
-        return pbn_result
+        assert self.pcohptr != NULL, "compute_persistence() must be called before persistent_betti_numbers()"
+        return self.pcohptr.persistent_betti_numbers(<double>from_value, <double>to_value)
 
     def persistence_intervals_in_dimension(self, dimension):
         """This function returns the persistence intervals of the complex in a
@@ -198,13 +212,8 @@ cdef class CubicalComplex:
         :returns: The persistence intervals.
         :rtype:  numpy array of dimension 2
 
-        :note: intervals_in_dim function requires persistence function to be
+        :note: intervals_in_dim function requires :func:`compute_persistence` function to be
             launched first.
         """
-        cdef vector[pair[double,double]] intervals_result
-        if self.pcohptr != NULL:
-            intervals_result = self.pcohptr.intervals_in_dimension(dimension)
-        else:
-            print("intervals_in_dim function requires persistence function"
-                  " to be launched first.", file=sys.stderr)
-        return np.array(intervals_result)
+        assert self.pcohptr != NULL, "compute_persistence() must be called before persistence_intervals_in_dimension()"
+        return np.array(self.pcohptr.intervals_in_dimension(dimension))
diff --git a/src/python/gudhi/periodic_cubical_complex.pyx b/src/python/gudhi/periodic_cubical_complex.pyx
index fd08b976..246a3a02 100644
--- a/src/python/gudhi/periodic_cubical_complex.pyx
+++ b/src/python/gudhi/periodic_cubical_complex.pyx
@@ -32,7 +32,8 @@ cdef extern from "Cubical_complex_interface.h" namespace "Gudhi":
 cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi":
     cdef cppclass Periodic_cubical_complex_persistence_interface "Gudhi::Persistent_cohomology_interface<Gudhi::Cubical_complex::Cubical_complex_interface<Gudhi::cubical_complex::Bitmap_cubical_complex_periodic_boundary_conditions_base<double>>>":
         Periodic_cubical_complex_persistence_interface(Periodic_cubical_complex_base_interface * st, bool persistence_dim_max)
-        vector[pair[int, pair[double, double]]] get_persistence(int homology_coeff_field, double min_persistence)
+        void compute_persistence(int homology_coeff_field, double min_persistence)
+        vector[pair[int, pair[double, double]]] get_persistence()
         vector[int] betti_numbers()
         vector[int] persistent_betti_numbers(double from_value, double to_value)
         vector[pair[double,double]] intervals_in_dimension(int dimension)
@@ -134,8 +135,31 @@ cdef class PeriodicCubicalComplex:
         """
         return self.thisptr.dimension()
 
+    def compute_persistence(self, homology_coeff_field=11, min_persistence=0):
+        """This function computes the persistence of the complex, so it can be
+        accessed through :func:`persistent_betti_numbers`,
+        :func:`persistence_intervals_in_dimension`, etc. This function is
+        equivalent to :func:`persistence` when you do not want the list
+        :func:`persistence` returns.
+
+        :param homology_coeff_field: The homology coefficient field. Must be a
+            prime number
+        :type homology_coeff_field: int.
+        :param min_persistence: The minimum persistence value to take into
+            account (strictly greater than min_persistence). Default value is
+            0.0.
+            Sets min_persistence to -1.0 to see all values.
+        :type min_persistence: float.
+        :returns: Nothing.
+        """
+        if self.pcohptr != NULL:
+            del self.pcohptr
+        assert self.__is_defined()
+        self.pcohptr = new Periodic_cubical_complex_persistence_interface(self.thisptr, True)
+        self.pcohptr.compute_persistence(homology_coeff_field, min_persistence)
+
     def persistence(self, homology_coeff_field=11, min_persistence=0):
-        """This function returns the persistence of the complex.
+        """This function computes and returns the persistence of the complex.
 
         :param homology_coeff_field: The homology coefficient field. Must be a
             prime number
@@ -148,30 +172,22 @@ cdef class PeriodicCubicalComplex:
         :returns: list of pairs(dimension, pair(birth, death)) -- the
             persistence of the complex.
         """
-        if self.pcohptr != NULL:
-            del self.pcohptr
-        if self.thisptr != NULL:
-            self.pcohptr = new Periodic_cubical_complex_persistence_interface(self.thisptr, True)
-        cdef vector[pair[int, pair[double, double]]] persistence_result
-        if self.pcohptr != NULL:
-            persistence_result = self.pcohptr.get_persistence(homology_coeff_field, min_persistence)
-        return persistence_result
+        self.compute_persistence(homology_coeff_field, min_persistence)
+        return self.pcohptr.get_persistence()
 
     def betti_numbers(self):
         """This function returns the Betti numbers of the complex.
 
         :returns: list of int -- The Betti numbers ([B0, B1, ..., Bn]).
 
-        :note: betti_numbers function requires persistence function to be
+        :note: betti_numbers function requires :func:`compute_persistence` function to be
             launched first.
 
-        :note: betti_numbers function always returns [1, 0, 0, ...] as infinity
+        :note: This function always returns the Betti numbers of a torus as infinity
             filtration cubes are not removed from the complex.
         """
-        cdef vector[int] bn_result
-        if self.pcohptr != NULL:
-            bn_result = self.pcohptr.betti_numbers()
-        return bn_result
+        assert self.pcohptr != NULL, "compute_persistence() must be called before betti_numbers()"
+        return self.pcohptr.betti_numbers()
 
     def persistent_betti_numbers(self, from_value, to_value):
         """This function returns the persistent Betti numbers of the complex.
@@ -186,13 +202,11 @@ cdef class PeriodicCubicalComplex:
         :returns: list of int -- The persistent Betti numbers ([B0, B1, ...,
             Bn]).
 
-        :note: persistent_betti_numbers function requires persistence
+        :note: persistent_betti_numbers function requires :func:`compute_persistence`
             function to be launched first.
         """
-        cdef vector[int] pbn_result
-        if self.pcohptr != NULL:
-            pbn_result = self.pcohptr.persistent_betti_numbers(<double>from_value, <double>to_value)
-        return pbn_result
+        assert self.pcohptr != NULL, "compute_persistence() must be called before persistent_betti_numbers()"
+        return self.pcohptr.persistent_betti_numbers(<double>from_value, <double>to_value)
 
     def persistence_intervals_in_dimension(self, dimension):
         """This function returns the persistence intervals of the complex in a
@@ -203,13 +217,8 @@ cdef class PeriodicCubicalComplex:
         :returns: The persistence intervals.
         :rtype:  numpy array of dimension 2
 
-        :note: intervals_in_dim function requires persistence function to be
+        :note: intervals_in_dim function requires :func:`compute_persistence` function to be
             launched first.
         """
-        cdef vector[pair[double,double]] intervals_result
-        if self.pcohptr != NULL:
-            intervals_result = self.pcohptr.intervals_in_dimension(dimension)
-        else:
-            print("intervals_in_dim function requires persistence function"
-                  " to be launched first.", file=sys.stderr)
-        return np.array(intervals_result)
+        assert self.pcohptr != NULL, "compute_persistence() must be called before persistence_intervals_in_dimension()"
+        return np.array(self.pcohptr.intervals_in_dimension(dimension))
diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py
index e12eefa1..c5405526 100644
--- a/src/python/gudhi/point_cloud/dtm.py
+++ b/src/python/gudhi/point_cloud/dtm.py
@@ -7,11 +7,15 @@
 # Modification(s):
 #   - YYYY/MM Author: Description of the modification
 
-from .knn import KNN
+from .knn import KNearestNeighbors
 import numpy as np
 
+__author__ = "Marc Glisse"
+__copyright__ = "Copyright (C) 2020 Inria"
+__license__ = "MIT"
 
-class DTM:
+
+class DistanceToMeasure:
     """
     Class to compute the distance to the empirical measure defined by a point set, as introduced in :cite:`dtm`.
     """
@@ -21,7 +25,9 @@ class DTM:
         Args:
             k (int): number of neighbors (possibly including the point itself).
             q (float): order used to compute the distance to measure. Defaults to 2.
-            kwargs: same parameters as :class:`~gudhi.point_cloud.knn.KNN`, except that metric="neighbors" means that :func:`transform` expects an array with the distances to the k nearest neighbors.
+            kwargs: same parameters as :class:`~gudhi.point_cloud.knn.KNearestNeighbors`, except that
+                metric="neighbors" means that :func:`transform` expects an array with the distances
+                to the k nearest neighbors.
         """
         self.k = k
         self.q = q
@@ -36,14 +42,22 @@ class DTM:
             X (numpy.array): coordinates for mass points.
         """
         if self.params.setdefault("metric", "euclidean") != "neighbors":
-            self.knn = KNN(self.k, return_index=False, return_distance=True, sort_results=False, **self.params)
+            self.knn = KNearestNeighbors(
+                self.k, return_index=False, return_distance=True, sort_results=False, **self.params
+            )
             self.knn.fit(X)
         return self
 
     def transform(self, X):
         """
         Args:
-            X (numpy.array): coordinates for query points, or distance matrix if metric is "precomputed", or distances to the k nearest neighbors if metric is "neighbors" (if the array has more than k columns, the remaining ones are ignored).
+            X (numpy.array): coordinates for query points, or distance matrix if metric is "precomputed",
+                or distances to the k nearest neighbors if metric is "neighbors" (if the array has more
+                than k columns, the remaining ones are ignored).
+
+        Returns:
+            numpy.array: a 1-d array with, for each point of X, its distance to the measure defined
+            by the argument of :func:`fit`.
         """
         if self.params["metric"] == "neighbors":
             distances = X[:, : self.k]
diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py
index 8369f1f8..07553d6d 100644
--- a/src/python/gudhi/point_cloud/knn.py
+++ b/src/python/gudhi/point_cloud/knn.py
@@ -9,8 +9,14 @@
 
 import numpy
 
+# TODO: https://github.com/facebookresearch/faiss
 
-class KNN:
+__author__ = "Marc Glisse"
+__copyright__ = "Copyright (C) 2020 Inria"
+__license__ = "MIT"
+
+
+class KNearestNeighbors:
     """
     Class wrapping several implementations for computing the k nearest neighbors in a point set.
     """
@@ -36,6 +42,10 @@ class KNN:
             sort_results (bool): if True, then distances and indices of each point are
                 sorted on return, so that the first column contains the closest points.
                 Otherwise, neighbors are returned in an arbitrary order. Defaults to True.
+            enable_autodiff (bool): if the input is a torch.tensor, jax.numpy.ndarray or tensorflow.Tensor, this
+                instructs the function to compute distances in a way that works with automatic differentiation.
+                This is experimental, not supported for all metrics, and requires the package EagerPy.
+                Defaults to False.
             kwargs: additional parameters are forwarded to the backends.
         """
         self.k = k
@@ -64,6 +74,8 @@ class KNN:
                 self.params["implementation"] = "ckdtree"
             else:
                 self.params["implementation"] = "sklearn"
+        if not return_distance:
+            self.params["enable_autodiff"] = False
 
     def fit_transform(self, X, y=None):
         return self.fit(X).transform(X)
@@ -74,6 +86,14 @@ class KNN:
             X (numpy.array): coordinates for reference points.
         """
         self.ref_points = X
+        if self.params.get("enable_autodiff", False):
+            import eagerpy as ep
+
+            X = ep.astensor(X)
+            if self.params["implementation"] != "keops" or not isinstance(X, ep.PyTorchTensor):
+                # I don't know a clever way to reuse a GPU tensor from tensorflow in pytorch
+                # without copying to/from the CPU.
+                X = X.numpy()
         if self.params["implementation"] == "ckdtree":
             # sklearn could handle this, but it is much slower
             from scipy.spatial import cKDTree
@@ -109,31 +129,122 @@ class KNN:
         """
         Args:
             X (numpy.array): coordinates for query points, or distance matrix if metric is "precomputed".
+
+        Returns:
+            numpy.array: if return_index, an array of shape (len(X), k) with the indices (in the argument
+            of :func:`fit`) of the k nearest neighbors to the points of X. If return_distance, an array of the
+            same shape with the distances to those neighbors. If both, a tuple with the two arrays, in this order.
         """
+        if self.params.get("enable_autodiff", False):
+            # pykeops does not support autodiff for kmin yet, but when it does in the future,
+            # we may want a special path.
+            import eagerpy as ep
+
+            save_return_index = self.return_index
+            self.return_index = True
+            self.return_distance = False
+            self.params["enable_autodiff"] = False
+            try:
+                newX = ep.astensor(X)
+                if self.params["implementation"] != "keops" or (
+                    not isinstance(newX, ep.PyTorchTensor) and not isinstance(newX, ep.NumPyTensor)
+                ):
+                    newX = newX.numpy()
+                else:
+                    newX = newX.raw
+                neighbors = self.transform(newX)
+            finally:
+                self.return_index = save_return_index
+                self.return_distance = True
+                self.params["enable_autodiff"] = True
+            # We can implement more later as needed
+            assert self.metric == "minkowski"
+            p = self.params["p"]
+            Y = ep.astensor(self.ref_points)
+            neighbor_pts = Y[
+                neighbors,
+            ]
+            diff = neighbor_pts - X[:, None, :]
+            if isinstance(diff, ep.PyTorchTensor):
+                # https://github.com/jonasrauber/eagerpy/issues/6
+                distances = ep.astensor(diff.raw.norm(p, -1))
+            else:
+                distances = diff.norms.lp(p, -1)
+            if self.return_index:
+                return neighbors, distances.raw
+            else:
+                return distances.raw
+
         metric = self.metric
         k = self.k
 
         if metric == "precomputed":
             # scikit-learn could handle that, but they insist on calling fit() with an unused square array, which is too unnatural.
-            X = numpy.array(X)
             if self.return_index:
-                neighbors = numpy.argpartition(X, k - 1)[:, 0:k]
-                if self.params.get("sort_results", True):
-                    X = numpy.take_along_axis(X, neighbors, axis=-1)
-                    ngb_order = numpy.argsort(X, axis=-1)
-                    neighbors = numpy.take_along_axis(neighbors, ngb_order, axis=-1)
-                else:
-                    ngb_order = neighbors
-                if self.return_distance:
-                    distances = numpy.take_along_axis(X, ngb_order, axis=-1)
-                    return neighbors, distances
+                n_jobs = self.params.get("n_jobs", 1)
+                # Supposedly numpy can be compiled with OpenMP and handle this, but nobody does that?!
+                if n_jobs == 1:
+                    neighbors = numpy.argpartition(X, k - 1)[:, 0:k]
+                    if self.params.get("sort_results", True):
+                        X = numpy.take_along_axis(X, neighbors, axis=-1)
+                        ngb_order = numpy.argsort(X, axis=-1)
+                        neighbors = numpy.take_along_axis(neighbors, ngb_order, axis=-1)
+                    else:
+                        ngb_order = neighbors
+                    if self.return_distance:
+                        distances = numpy.take_along_axis(X, ngb_order, axis=-1)
+                        return neighbors, distances
+                    else:
+                        return neighbors
                 else:
-                    return neighbors
+                    from joblib import Parallel, delayed, effective_n_jobs
+                    from sklearn.utils import gen_even_slices
+
+                    slices = gen_even_slices(len(X), effective_n_jobs(-1))
+                    parallel = Parallel(backend="threading", n_jobs=-1)
+                    if self.params.get("sort_results", True):
+
+                        def func(M):
+                            neighbors = numpy.argpartition(M, k - 1)[:, 0:k]
+                            Y = numpy.take_along_axis(M, neighbors, axis=-1)
+                            ngb_order = numpy.argsort(Y, axis=-1)
+                            return numpy.take_along_axis(neighbors, ngb_order, axis=-1)
+
+                    else:
+
+                        def func(M):
+                            return numpy.argpartition(M, k - 1)[:, 0:k]
+
+                    neighbors = numpy.concatenate(parallel(delayed(func)(X[s]) for s in slices))
+                    if self.return_distance:
+                        distances = numpy.take_along_axis(X, neighbors, axis=-1)
+                        return neighbors, distances
+                    else:
+                        return neighbors
             if self.return_distance:
-                distances = numpy.partition(X, k - 1)[:, 0:k]
-                if self.params.get("sort_results"):
-                    # partition is not guaranteed to sort the lower half, although it often does
-                    distances.sort(axis=-1)
+                n_jobs = self.params.get("n_jobs", 1)
+                if n_jobs == 1:
+                    distances = numpy.partition(X, k - 1)[:, 0:k]
+                    if self.params.get("sort_results"):
+                        # partition is not guaranteed to sort the lower half, although it often does
+                        distances.sort(axis=-1)
+                else:
+                    from joblib import Parallel, delayed, effective_n_jobs
+                    from sklearn.utils import gen_even_slices
+
+                    if self.params.get("sort_results"):
+
+                        def func(M):
+                            # Not partitioning in place, because we should not modify the user's array?
+                            r = numpy.partition(M, k - 1)[:, 0:k]
+                            r.sort(axis=-1)
+                            return r
+
+                    else:
+                        func = lambda M: numpy.partition(M, k - 1)[:, 0:k]
+                    slices = gen_even_slices(len(X), effective_n_jobs(-1))
+                    parallel = Parallel(backend="threading", n_jobs=-1)
+                    distances = numpy.concatenate(parallel(delayed(func)(X[s]) for s in slices))
                 return distances
             return None
 
@@ -158,12 +269,11 @@ class KNN:
             from pykeops.torch import LazyTensor
 
             # 'float64' is slow except on super expensive GPUs. Allow it with some param?
-            XX = torch.tensor(X, dtype=torch.float32)
+            XX = torch.as_tensor(X, dtype=torch.float32)
             if X is self.ref_points:
                 YY = XX
             else:
-                YY = torch.tensor(self.ref_points, dtype=torch.float32)
-
+                YY = torch.as_tensor(self.ref_points, dtype=torch.float32)
             p = self.params["p"]
             if p == numpy.inf:
                 # Requires pykeops 1.4 or later
@@ -188,7 +298,6 @@ class KNN:
                     distances = distances ** (1.0 / p)
                 return distances
             return None
-        # FIXME: convert everything back to numpy arrays or not?
 
         if self.params["implementation"] == "ckdtree":
             qargs = {key: val for key, val in self.params.items() if key in {"p", "eps", "n_jobs"}}
diff --git a/src/python/gudhi/simplex_tree.pxd b/src/python/gudhi/simplex_tree.pxd
index 82f155de..1d4ed926 100644
--- a/src/python/gudhi/simplex_tree.pxd
+++ b/src/python/gudhi/simplex_tree.pxd
@@ -48,8 +48,7 @@ cdef extern from "Simplex_tree_interface.h" namespace "Gudhi":
         int dimension()
         int upper_bound_dimension()
         bool find_simplex(vector[int] simplex)
-        bool insert_simplex_and_subfaces(vector[int] simplex,
-                                         double filtration)
+        bool insert(vector[int] simplex, double filtration)
         vector[pair[vector[int], double]] get_star(vector[int] simplex)
         vector[pair[vector[int], double]] get_cofaces(vector[int] simplex,
                                                           int dimension)
@@ -57,6 +56,8 @@ cdef extern from "Simplex_tree_interface.h" namespace "Gudhi":
         void remove_maximal_simplex(vector[int] simplex)
         bool prune_above_filtration(double filtration)
         bool make_filtration_non_decreasing()
+        void compute_extended_filtration()
+        vector[vector[pair[int, pair[double, double]]]] compute_extended_persistence_subdiagrams(vector[pair[int, pair[double, double]]] dgm, double min_persistence)
         # Iterators over Simplex tree
         pair[vector[int], double] get_simplex_and_filtration(Simplex_tree_simplex_handle f_simplex)
         Simplex_tree_simplices_iterator get_simplices_iterator_begin()
@@ -69,9 +70,12 @@ cdef extern from "Simplex_tree_interface.h" namespace "Gudhi":
 cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi":
     cdef cppclass Simplex_tree_persistence_interface "Gudhi::Persistent_cohomology_interface<Gudhi::Simplex_tree<Gudhi::Simplex_tree_options_full_featured>>":
         Simplex_tree_persistence_interface(Simplex_tree_interface_full_featured * st, bool persistence_dim_max)
-        vector[pair[int, pair[double, double]]] get_persistence(int homology_coeff_field, double min_persistence)
+        void compute_persistence(int homology_coeff_field, double min_persistence)
+        vector[pair[int, pair[double, double]]] get_persistence()
         vector[int] betti_numbers()
         vector[int] persistent_betti_numbers(double from_value, double to_value)
         vector[pair[double,double]] intervals_in_dimension(int dimension)
-        void write_output_diagram(string diagram_file_name)
+        void write_output_diagram(string diagram_file_name) except +
         vector[pair[vector[int], vector[int]]] persistence_pairs()
+        pair[vector[vector[int]], vector[vector[int]]] lower_star_generators()
+        pair[vector[vector[int]], vector[vector[int]]] flag_generators()
diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx
index c01cc905..55115cca 100644
--- a/src/python/gudhi/simplex_tree.pyx
+++ b/src/python/gudhi/simplex_tree.pyx
@@ -9,6 +9,7 @@
 
 from cython.operator import dereference, preincrement
 from libc.stdint cimport intptr_t
+import numpy
 from numpy import array as np_array
 cimport simplex_tree
 
@@ -90,7 +91,7 @@ cdef class SimplexTree:
             (with more :meth:`assign_filtration` or
             :meth:`make_filtration_non_decreasing` for instance) before calling
             any function that relies on the filtration property, like
-            :meth:`initialize_filtration`.
+            :meth:`persistence`.
         """
         self.get_ptr().assign_simplex_filtration(simplex, filtration)
 
@@ -98,16 +99,7 @@ cdef class SimplexTree:
         """This function initializes and sorts the simplicial complex
         filtration vector.
 
-        .. note::
-
-            This function must be launched before
-            :func:`persistence()<gudhi.SimplexTree.persistence>`,
-            :func:`betti_numbers()<gudhi.SimplexTree.betti_numbers>`,
-            :func:`persistent_betti_numbers()<gudhi.SimplexTree.persistent_betti_numbers>`,
-            or :func:`get_filtration()<gudhi.SimplexTree.get_filtration>`
-            after :func:`inserting<gudhi.SimplexTree.insert>` or
-            :func:`removing<gudhi.SimplexTree.remove_maximal_simplex>`
-            simplices.
+        .. deprecated:: 3.2.0
         """
         self.get_ptr().initialize_filtration()
 
@@ -139,9 +131,9 @@ cdef class SimplexTree:
 
             This function is not constant time because it can recompute
             dimension if required (can be triggered by
-            :func:`remove_maximal_simplex()<gudhi.SimplexTree.remove_maximal_simplex>`
+            :func:`remove_maximal_simplex`
             or
-            :func:`prune_above_filtration()<gudhi.SimplexTree.prune_above_filtration>`
+            :func:`prune_above_filtration`
             methods).
         """
         return self.get_ptr().dimension()
@@ -166,9 +158,9 @@ cdef class SimplexTree:
             This function must be used with caution because it disables
             dimension recomputation when required
             (this recomputation can be triggered by
-            :func:`remove_maximal_simplex()<gudhi.SimplexTree.remove_maximal_simplex>`
+            :func:`remove_maximal_simplex`
             or
-            :func:`prune_above_filtration()<gudhi.SimplexTree.prune_above_filtration>`
+            :func:`prune_above_filtration`
             ).
         """
         self.get_ptr().set_dimension(<int>dimension)
@@ -182,10 +174,7 @@ cdef class SimplexTree:
         :returns:  true if the simplex was found, false otherwise.
         :rtype:  bool
         """
-        cdef vector[int] csimplex
-        for i in simplex:
-            csimplex.push_back(i)
-        return self.get_ptr().find_simplex(csimplex)
+        return self.get_ptr().find_simplex(simplex)
 
     def insert(self, simplex, filtration=0.0):
         """This function inserts the given N-simplex and its subfaces with the
@@ -202,11 +191,7 @@ cdef class SimplexTree:
             otherwise (whatever its original filtration value).
         :rtype:  bool
         """
-        cdef vector[int] csimplex
-        for i in simplex:
-            csimplex.push_back(i)
-        return self.get_ptr().insert_simplex_and_subfaces(csimplex,
-                                                        <double>filtration)
+        return self.get_ptr().insert(simplex, <double>filtration)
 
     def get_simplices(self):
         """This function returns a generator with simplices and their given
@@ -308,17 +293,12 @@ cdef class SimplexTree:
 
         .. note::
 
-            Be aware that removing is shifting data in a flat_map
-            (:func:`initialize_filtration()<gudhi.SimplexTree.initialize_filtration>` to be done).
-
-        .. note::
-
             The dimension of the simplicial complex may be lower after calling
             remove_maximal_simplex than it was before. However,
-            :func:`upper_bound_dimension()<gudhi.SimplexTree.upper_bound_dimension>`
+            :func:`upper_bound_dimension`
             method will return the old value, which
             remains a valid upper bound. If you care, you can call
-            :func:`dimension()<gudhi.SimplexTree.dimension>`
+            :func:`dimension`
             to recompute the exact dimension.
         """
         self.get_ptr().remove_maximal_simplex(simplex)
@@ -334,24 +314,14 @@ cdef class SimplexTree:
 
         .. note::
 
-            Some simplex tree functions require the filtration to be valid.
-            prune_above_filtration function is not launching
-            :func:`initialize_filtration()<gudhi.SimplexTree.initialize_filtration>`
-            but returns the filtration modification
-            information. If the complex has changed , please call
-            :func:`initialize_filtration()<gudhi.SimplexTree.initialize_filtration>`
-            to recompute it.
-
-        .. note::
-
             Note that the dimension of the simplicial complex may be lower
             after calling
-            :func:`prune_above_filtration()<gudhi.SimplexTree.prune_above_filtration>`
+            :func:`prune_above_filtration`
             than it was before. However,
-            :func:`upper_bound_dimension()<gudhi.SimplexTree.upper_bound_dimension>`
+            :func:`upper_bound_dimension`
             will return the old value, which remains a
             valid upper bound. If you care, you can call
-            :func:`dimension()<gudhi.SimplexTree.dimension>`
+            :func:`dimension`
             method to recompute the exact dimension.
         """
         return self.get_ptr().prune_above_filtration(filtration)
@@ -382,22 +352,63 @@ cdef class SimplexTree:
         :returns: True if any filtration value was modified,
             False if the filtration was already non-decreasing.
         :rtype: bool
+        """
+        return self.get_ptr().make_filtration_non_decreasing()
+
+    def extend_filtration(self):
+        """ Extend filtration for computing extended persistence. This function only uses the 
+        filtration values at the 0-dimensional simplices, and computes the extended persistence 
+        diagram induced by the lower-star filtration computed with these values. 
+
+        .. note::
 
+            Note that after calling this function, the filtration 
+            values are actually modified within the Simplex_tree. 
+            The function :func:`extended_persistence`
+            retrieves the original values.
 
         .. note::
 
-            Some simplex tree functions require the filtration to be valid.
-            make_filtration_non_decreasing function is not launching
-            :func:`initialize_filtration()<gudhi.SimplexTree.initialize_filtration>`
-            but returns the filtration modification
-            information. If the complex has changed , please call
-            :func:`initialize_filtration()<gudhi.SimplexTree.initialize_filtration>`
-            to recompute it.
+            Note that this code creates an extra vertex internally, so you should make sure that
+            the Simplex_tree does not contain a vertex with the largest possible value (i.e., 4294967295). 
         """
-        return self.get_ptr().make_filtration_non_decreasing()
+        self.get_ptr().compute_extended_filtration()
+
+    def extended_persistence(self, homology_coeff_field=11, min_persistence=0):
+        """This function retrieves good values for extended persistence, and separate the diagrams 
+        into the Ordinary, Relative, Extended+ and Extended- subdiagrams.
+
+        :param homology_coeff_field: The homology coefficient field. Must be a
+            prime number. Default value is 11.
+        :type homology_coeff_field: int.
+        :param min_persistence: The minimum persistence value (i.e., the absolute value of the difference between the persistence diagram point coordinates) to take into
+            account (strictly greater than min_persistence). Default value is
+            0.0.
+            Sets min_persistence to -1.0 to see all values.
+        :type min_persistence: float.
+        :returns: A list of four persistence diagrams in the format described in :func:`persistence`. The first one is Ordinary, the second one is Relative, the third one is Extended+ and the fourth one is Extended-. See https://link.springer.com/article/10.1007/s10208-008-9027-z and/or section 2.2 in https://link.springer.com/article/10.1007/s10208-017-9370-z for a description of these subtypes.
+
+        .. note::
+
+            This function should be called only if :func:`extend_filtration` has been called first!
+
+        .. note::
+
+            The coordinates of the persistence diagram points might be a little different than the
+            original filtration values due to the internal transformation (scaling to [-2,-1]) that is 
+            performed on these values during the computation of extended persistence.
+        """
+        cdef vector[pair[int, pair[double, double]]] persistence_result
+        if self.pcohptr != NULL:
+            del self.pcohptr
+        self.pcohptr = new Simplex_tree_persistence_interface(self.get_ptr(), False)
+        self.pcohptr.compute_persistence(homology_coeff_field, -1.)
+        persistence_result = self.pcohptr.get_persistence()
+        return self.get_ptr().compute_extended_persistence_subdiagrams(persistence_result, min_persistence)
+
 
     def persistence(self, homology_coeff_field=11, min_persistence=0, persistence_dim_max = False):
-        """This function returns the persistence of the simplicial complex.
+        """This function computes and returns the persistence of the simplicial complex.
 
         :param homology_coeff_field: The homology coefficient field. Must be a
             prime number. Default value is 11.
@@ -405,7 +416,7 @@ cdef class SimplexTree:
         :param min_persistence: The minimum persistence value to take into
             account (strictly greater than min_persistence). Default value is
             0.0.
-            Sets min_persistence to -1.0 to see all values.
+            Set min_persistence to -1.0 to see all values.
         :type min_persistence: float.
         :param persistence_dim_max: If true, the persistent homology for the
             maximal dimension in the complex is computed. If false, it is
@@ -414,13 +425,32 @@ cdef class SimplexTree:
         :returns: The persistence of the simplicial complex.
         :rtype:  list of pairs(dimension, pair(birth, death))
         """
+        self.compute_persistence(homology_coeff_field, min_persistence, persistence_dim_max)
+        return self.pcohptr.get_persistence()
+
+    def compute_persistence(self, homology_coeff_field=11, min_persistence=0, persistence_dim_max = False):
+        """This function computes the persistence of the simplicial complex, so it can be accessed through
+        :func:`persistent_betti_numbers`, :func:`persistence_pairs`, etc. This function is equivalent to :func:`persistence`
+        when you do not want the list :func:`persistence` returns.
+
+        :param homology_coeff_field: The homology coefficient field. Must be a
+            prime number. Default value is 11.
+        :type homology_coeff_field: int.
+        :param min_persistence: The minimum persistence value to take into
+            account (strictly greater than min_persistence). Default value is
+            0.0.
+            Sets min_persistence to -1.0 to see all values.
+        :type min_persistence: float.
+        :param persistence_dim_max: If true, the persistent homology for the
+            maximal dimension in the complex is computed. If false, it is
+            ignored. Default is false.
+        :type persistence_dim_max: bool
+        :returns: Nothing.
+        """
         if self.pcohptr != NULL:
             del self.pcohptr
         self.pcohptr = new Simplex_tree_persistence_interface(self.get_ptr(), persistence_dim_max)
-        cdef vector[pair[int, pair[double, double]]] persistence_result
-        if self.pcohptr != NULL:
-            persistence_result = self.pcohptr.get_persistence(homology_coeff_field, min_persistence)
-        return persistence_result
+        self.pcohptr.compute_persistence(homology_coeff_field, min_persistence)
 
     def betti_numbers(self):
         """This function returns the Betti numbers of the simplicial complex.
@@ -429,16 +459,11 @@ cdef class SimplexTree:
         :rtype:  list of int
 
         :note: betti_numbers function requires
-            :func:`persistence()<gudhi.SimplexTree.persistence>`
+            :func:`compute_persistence`
             function to be launched first.
         """
-        cdef vector[int] bn_result
-        if self.pcohptr != NULL:
-            bn_result = self.pcohptr.betti_numbers()
-        else:
-            print("betti_numbers function requires persistence function"
-                  " to be launched first.")
-        return bn_result
+        assert self.pcohptr != NULL, "compute_persistence() must be called before betti_numbers()"
+        return self.pcohptr.betti_numbers()
 
     def persistent_betti_numbers(self, from_value, to_value):
         """This function returns the persistent Betti numbers of the
@@ -455,16 +480,11 @@ cdef class SimplexTree:
         :rtype:  list of int
 
         :note: persistent_betti_numbers function requires
-            :func:`persistence()<gudhi.SimplexTree.persistence>`
+            :func:`compute_persistence`
             function to be launched first.
         """
-        cdef vector[int] pbn_result
-        if self.pcohptr != NULL:
-            pbn_result = self.pcohptr.persistent_betti_numbers(<double>from_value, <double>to_value)
-        else:
-            print("persistent_betti_numbers function requires persistence function"
-                  " to be launched first.")
-        return pbn_result
+        assert self.pcohptr != NULL, "compute_persistence() must be called before persistent_betti_numbers()"
+        return self.pcohptr.persistent_betti_numbers(<double>from_value, <double>to_value)
 
     def persistence_intervals_in_dimension(self, dimension):
         """This function returns the persistence intervals of the simplicial
@@ -476,16 +496,11 @@ cdef class SimplexTree:
         :rtype:  numpy array of dimension 2
 
         :note: intervals_in_dim function requires
-            :func:`persistence()<gudhi.SimplexTree.persistence>`
+            :func:`compute_persistence`
             function to be launched first.
         """
-        cdef vector[pair[double,double]] intervals_result
-        if self.pcohptr != NULL:
-            intervals_result = self.pcohptr.intervals_in_dimension(dimension)
-        else:
-            print("intervals_in_dim function requires persistence function"
-                  " to be launched first.")
-        return np_array(intervals_result)
+        assert self.pcohptr != NULL, "compute_persistence() must be called before persistence_intervals_in_dimension()"
+        return np_array(self.pcohptr.intervals_in_dimension(dimension))
 
     def persistence_pairs(self):
         """This function returns a list of persistence birth and death simplices pairs.
@@ -494,33 +509,68 @@ cdef class SimplexTree:
         :rtype:  list of pair of list of int
 
         :note: persistence_pairs function requires
-            :func:`persistence()<gudhi.SimplexTree.persistence>`
+            :func:`compute_persistence`
             function to be launched first.
         """
-        cdef vector[pair[vector[int],vector[int]]] persistence_pairs_result
-        if self.pcohptr != NULL:
-            persistence_pairs_result = self.pcohptr.persistence_pairs()
-        else:
-            print("persistence_pairs function requires persistence function"
-                  " to be launched first.")
-        return persistence_pairs_result
+        assert self.pcohptr != NULL, "compute_persistence() must be called before persistence_pairs()"
+        return self.pcohptr.persistence_pairs()
 
-    def write_persistence_diagram(self, persistence_file=''):
+    def write_persistence_diagram(self, persistence_file):
         """This function writes the persistence intervals of the simplicial
         complex in a user given file name.
 
-        :param persistence_file: The specific dimension.
+        :param persistence_file: Name of the file.
         :type persistence_file: string.
 
         :note: intervals_in_dim function requires
-            :func:`persistence()<gudhi.SimplexTree.persistence>`
+            :func:`compute_persistence`
             function to be launched first.
         """
-        if self.pcohptr != NULL:
-            if persistence_file != '':
-                self.pcohptr.write_output_diagram(persistence_file.encode('utf-8'))
-            else:
-                print("persistence_file must be specified")
+        assert self.pcohptr != NULL, "compute_persistence() must be called before write_persistence_diagram()"
+        self.pcohptr.write_output_diagram(persistence_file.encode('utf-8'))
+
+    def lower_star_persistence_generators(self):
+        """Assuming this is a lower-star filtration, this function returns the persistence pairs,
+        where each simplex is replaced with the vertex that gave it its filtration value.
+
+        :returns: First the regular persistence pairs, grouped by dimension, with one vertex per extremity,
+            and second the essential features, grouped by dimension, with one vertex each
+        :rtype: Tuple[List[numpy.array[int] of shape (n,2)], List[numpy.array[int] of shape (m,)]]
+
+        :note: lower_star_persistence_generators requires that `persistence()` be called first.
+        """
+        assert self.pcohptr != NULL, "lower_star_persistence_generators() requires that persistence() be called first."
+        gen = self.pcohptr.lower_star_generators()
+        normal = [np_array(d).reshape(-1,2) for d in gen.first]
+        infinite = [np_array(d) for d in gen.second]
+        return (normal, infinite)
+
+    def flag_persistence_generators(self):
+        """Assuming this is a flag complex, this function returns the persistence pairs,
+        where each simplex is replaced with the vertices of the edges that gave it its filtration value.
+
+        :returns: First the regular persistence pairs of dimension 0, with one vertex for birth and two for death;
+            then the other regular persistence pairs, grouped by dimension, with 2 vertices per extremity;
+            then the connected components, with one vertex each;
+            finally the other essential features, grouped by dimension, with 2 vertices for birth.
+        :rtype: Tuple[numpy.array[int] of shape (n,3), List[numpy.array[int] of shape (m,4)], numpy.array[int] of shape (l,), List[numpy.array[int] of shape (k,2)]]
+
+        :note: flag_persistence_generators requires that `persistence()` be called first.
+        """
+        assert self.pcohptr != NULL, "flag_persistence_generators() requires that persistence() be called first."
+        gen = self.pcohptr.flag_generators()
+        if len(gen.first) == 0:
+            normal0 = numpy.empty((0,3))
+            normals = []
+        else:
+            l = iter(gen.first)
+            normal0 = np_array(next(l)).reshape(-1,3)
+            normals = [np_array(d).reshape(-1,4) for d in l]
+        if len(gen.second) == 0:
+            infinite0 = numpy.empty(0)
+            infinites = []
         else:
-            print("intervals_in_dim function requires persistence function"
-                  " to be launched first.")
+            l = iter(gen.second)
+            infinite0 = np_array(next(l))
+            infinites = [np_array(d).reshape(-1,2) for d in l]
+        return (normal0, normals, infinite0, infinites)
diff --git a/src/python/gudhi/wasserstein/__init__.py b/src/python/gudhi/wasserstein/__init__.py
new file mode 100644
index 00000000..ed225ba4
--- /dev/null
+++ b/src/python/gudhi/wasserstein/__init__.py
@@ -0,0 +1 @@
+from .wasserstein import wasserstein_distance
diff --git a/src/python/gudhi/wasserstein/barycenter.py b/src/python/gudhi/wasserstein/barycenter.py
new file mode 100644
index 00000000..de7aea81
--- /dev/null
+++ b/src/python/gudhi/wasserstein/barycenter.py
@@ -0,0 +1,159 @@
+# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+# Author(s):       Theo Lacombe
+#
+# Copyright (C) 2019 Inria
+#
+# Modification(s):
+#   - YYYY/MM Author: Description of the modification
+
+
+import ot
+import numpy as np
+import scipy.spatial.distance as sc
+
+from gudhi.wasserstein import wasserstein_distance
+
+
+def _mean(x, m):
+    '''
+    :param x: a list of 2D-points, off diagonal, x_0... x_{k-1}
+    :param m: total amount of points taken into account,
+                that is we have (m-k) copies of diagonal
+    :returns: the weighted mean of x with (m-k) copies of the diagonal
+    '''
+    k = len(x)
+    if k > 0:
+        w = np.mean(x, axis=0)
+        w_delta = (w[0] + w[1]) / 2 * np.ones(2)
+        return (k * w + (m-k) * w_delta) / m
+    else:
+        return np.array([0, 0])
+
+
+def lagrangian_barycenter(pdiagset, init=None, verbose=False):
+    '''
+    :param pdiagset: a list of ``numpy.array`` of shape `(n x 2)`
+                    (`n` can variate), encoding a set of
+                    persistence diagrams with only finite coordinates.
+    :param init: The initial value for barycenter estimate.
+                    If ``None``, init is made on a random diagram from the dataset.
+                    Otherwise, it can be an ``int``
+                    (then initialization is made on ``pdiagset[init]``)
+                    or a `(n x 2)` ``numpy.array`` enconding
+                    a persistence diagram with `n` points.
+    :type init: ``int``, or (n x 2) ``np.array``
+    :param verbose: if ``True``, returns additional information about the
+                    barycenter.
+    :type verbose: boolean
+    :returns: If not verbose (default), a ``numpy.array`` encoding
+              the barycenter estimate of pdiagset
+              (local minimum of the energy function).
+              If ``pdiagset`` is empty, returns ``None``.
+              If verbose, returns a couple ``(Y, log)``
+              where ``Y`` is the barycenter estimate,
+              and ``log`` is a ``dict`` that contains additional informations:
+
+              - `"groupings"`, a list of list of pairs ``(i,j)``.
+              Namely, ``G[k] = [...(i, j)...]``, where ``(i,j)`` indicates
+              that ``pdiagset[k][i]`` is matched to ``Y[j]``
+              if ``i = -1`` or ``j = -1``, it means they
+              represent the diagonal.
+
+              - `"energy"`, ``float`` representing the Frechet energy value obtained.
+              It is the mean of squared distances of observations to the output.
+
+              - `"nb_iter"`, ``int`` number of iterations performed before convergence of the algorithm.
+    '''
+    X = pdiagset  # to shorten notations, not a copy
+    m = len(X)  # number of diagrams we are averaging
+    if m == 0:
+        print("Warning: computing barycenter of empty diag set. Returns None")
+        return None
+
+    # store the number of off-diagonal point for each of the X_i
+    nb_off_diag = np.array([len(X_i) for X_i in X])
+    # Initialisation of barycenter
+    if init is None:
+        i0 = np.random.randint(m)  # Index of first state for the barycenter
+        Y = X[i0].copy()
+    else:
+        if type(init)==int:
+            Y = X[init].copy()
+        else:
+            Y = init.copy()
+
+    nb_iter = 0
+
+    converged = False  # stoping criterion
+    while not converged:
+        nb_iter += 1
+        K = len(Y)  # current nb of points in Y (some might be on diagonal)
+        G = np.full((K, m), -1, dtype=int)  # will store for each j, the (index)
+                              # point matched in each other diagram
+                              #(might be the diagonal).
+                              # that is G[j, i] = k <=> y_j is matched to
+                              # x_k in the diagram i-th diagram X[i]
+        updated_points = np.zeros((K, 2))  # will store the new positions of
+                                           # the points of Y.
+                                           # If points disappear, there thrown
+                                           # on [0,0] by default.
+        new_created_points = []  # will store potential new points.
+
+        # Step 1 : compute optimal matching (Y, X_i) for each X_i
+        #          and create new points in Y if needed
+        for i in range(m):
+            _, indices = wasserstein_distance(Y, X[i], matching=True, order=2., internal_p=2.)
+            for y_j, x_i_j in indices:
+                if y_j >= 0:  # we matched an off diagonal point to x_i_j...
+                    if x_i_j >= 0:  # ...which is also an off-diagonal point.
+                        G[y_j, i] = x_i_j
+                    else:  # ...which is a diagonal point
+                        G[y_j, i] = -1  # -1 stands for the diagonal (mask)
+                else:  # We matched a diagonal point to x_i_j...
+                    if x_i_j >= 0:  # which is a off-diag point !
+                                                # need to create new point in Y
+                        new_y = _mean(np.array([X[i][x_i_j]]), m)
+                        # Average this point with (m-1) copies of Delta
+                        new_created_points.append(new_y)
+
+        # Step 2 : Update current point position thanks to groupings computed
+        to_delete = []
+        for j in range(K):
+            matched_points = [X[i][G[j, i]] for i in range(m) if G[j, i] > -1]
+            new_y_j = _mean(matched_points, m)
+            if not np.array_equal(new_y_j, np.array([0,0])):
+                updated_points[j] = new_y_j
+            else: # this points is no longer of any use.
+                to_delete.append(j)
+        # we remove the point to be deleted now.
+        updated_points = np.delete(updated_points, to_delete, axis=0)
+
+        # we cannot converge if there have been new created points.
+        if new_created_points:
+            Y = np.concatenate((updated_points, new_created_points))
+        else:
+            # Step 3 : we check convergence
+            if np.array_equal(updated_points, Y):
+                converged = True
+            Y = updated_points
+
+
+    if verbose:
+        groupings = []
+        energy = 0
+        log = {}
+        n_y = len(Y)
+        for i in range(m):
+            cost, edges = wasserstein_distance(Y, X[i], matching=True, order=2., internal_p=2.)
+            groupings.append(edges)
+            energy += cost
+            log["groupings"] = groupings
+        energy = energy/m
+        log["energy"] = energy
+        log["nb_iter"] = nb_iter
+
+        return Y, log
+    else:
+        return Y
+
diff --git a/src/python/gudhi/wasserstein.py b/src/python/gudhi/wasserstein/wasserstein.py
index 3dd993f9..efc851a0 100644
--- a/src/python/gudhi/wasserstein.py
+++ b/src/python/gudhi/wasserstein/wasserstein.py
@@ -9,11 +9,14 @@
 
 import numpy as np
 import scipy.spatial.distance as sc
+
 try:
     import ot
 except ImportError:
     print("POT (Python Optimal Transport) package is not installed. Try to run $ conda install -c conda-forge pot ; or $ pip install POT")
 
+
+# Currently unused, but Théo says it is likely to be used again.
 def _proj_on_diag(X):
     '''
     :param X: (n x 2) array encoding the points of a persistent diagram.
@@ -23,28 +26,36 @@ def _proj_on_diag(X):
     return np.array([Z , Z]).T
 
 
-def _build_dist_matrix(X, Y, order=2., internal_p=2.):
+def _dist_to_diag(X, internal_p):
+    '''
+    :param X: (n x 2) array encoding the points of a persistent diagram.
+    :param internal_p: Ground metric (i.e. norm L^p).
+    :returns: (n) array encoding the (respective orthogonal) distances of the points to the diagonal
+
+    .. note::
+        Assumes that the points are above the diagonal.
+    '''
+    return (X[:, 1] - X[:, 0]) * 2 ** (1.0 / internal_p - 1)
+
+
+def _build_dist_matrix(X, Y, order, internal_p):
     '''
     :param X: (n x 2) numpy.array encoding the (points of the) first diagram.
     :param Y: (m x 2) numpy.array encoding the second diagram.
     :param order: exponent for the Wasserstein metric.
     :param internal_p: Ground metric (i.e. norm L^p).
-    :returns: (n+1) x (m+1) np.array encoding the cost matrix C. 
-                For 0 <= i < n, 0 <= j < m, C[i,j] encodes the distance between X[i] and Y[j], 
-                while C[i, m] (resp. C[n, j]) encodes the distance (to the p) between X[i] (resp Y[j]) 
+    :returns: (n+1) x (m+1) np.array encoding the cost matrix C.
+                For 0 <= i < n, 0 <= j < m, C[i,j] encodes the distance between X[i] and Y[j],
+                while C[i, m] (resp. C[n, j]) encodes the distance (to the p) between X[i] (resp Y[j])
                 and its orthogonal projection onto the diagonal.
                 note also that C[n, m] = 0  (it costs nothing to move from the diagonal to the diagonal).
     '''
-    Xdiag = _proj_on_diag(X)
-    Ydiag = _proj_on_diag(Y)
+    Cxd = _dist_to_diag(X, internal_p)**order
+    Cdy = _dist_to_diag(Y, internal_p)**order
     if np.isinf(internal_p):
         C = sc.cdist(X,Y, metric='chebyshev')**order
-        Cxd = np.linalg.norm(X - Xdiag, ord=internal_p, axis=1)**order
-        Cdy = np.linalg.norm(Y - Ydiag, ord=internal_p, axis=1)**order
     else:
         C = sc.cdist(X,Y, metric='minkowski', p=internal_p)**order
-        Cxd = np.linalg.norm(X - Xdiag, ord=internal_p, axis=1)**order
-        Cdy = np.linalg.norm(Y - Ydiag, ord=internal_p, axis=1)**order
     Cf = np.hstack((C, Cxd[:,None]))
     Cdy = np.append(Cdy, 0)
 
@@ -58,24 +69,23 @@ def _perstot(X, order, internal_p):
     :param X: (n x 2) numpy.array (points of a given diagram).
     :param order: exponent for Wasserstein. Default value is 2.
     :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); Default value is 2 (Euclidean norm).
-    :returns: float, the total persistence of the diagram (that is, its distance to the empty diagram).    
+    :returns: float, the total persistence of the diagram (that is, its distance to the empty diagram).
     '''
-    Xdiag = _proj_on_diag(X)
-    return (np.sum(np.linalg.norm(X - Xdiag, ord=internal_p, axis=1)**order))**(1./order)
+    return np.linalg.norm(_dist_to_diag(X, internal_p), ord=order)
 
 
 def wasserstein_distance(X, Y, matching=False, order=2., internal_p=2.):
     '''
-    :param X: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points 
+    :param X: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points
                 (i.e. with infinite coordinate).
     :param Y: (m x 2) numpy.array encoding the second diagram.
     :param matching: if True, computes and returns the optimal matching between X and Y, encoded as
                      a (n x 2) np.array  [...[i,j]...], meaning the i-th point in X is matched to
                      the j-th point in Y, with the convention (-1) represents the diagonal.
     :param order: exponent for Wasserstein; Default value is 2.
-    :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); 
+    :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2);
                        Default value is 2 (Euclidean norm).
-    :returns: the Wasserstein distance of order q (1 <= q < infinity) between persistence diagrams with 
+    :returns: the Wasserstein distance of order q (1 <= q < infinity) between persistence diagrams with
               respect to the internal_p-norm as ground metric.
               If matching is set to True, also returns the optimal matching between X and Y.
     '''
author	Marc Glisse <marc.glisse@inria.fr>	2020-04-29 23:00:17 +0200
committer	Marc Glisse <marc.glisse@inria.fr>	2020-04-29 23:00:17 +0200
commit	73a74011e4b5af0794f0463295beca924d32e0ee (patch)
tree	fb825dc77e9984594109f9ea48838d0544dfca1e /src/python/gudhi
parent	74155081bb8b3330c562d5c40d7f0a32fc188012 (diff)
parent	0bba67db83f33ff608366057d9c4f005fa6a514b (diff)