Merge remote-tracking branch 'origin/master' into dtm

author: Marc Glisse <marc.glisse@inria.fr> 2020-04-14 19:49:49 +0200
committer: Marc Glisse <marc.glisse@inria.fr> 2020-04-14 19:49:49 +0200
commit: 3f1e4bf5f139afe887ae501f20c5d3f40b5a6f79 (patch)
tree: 936aa887b1e7a0e23854262f17a5d21cf2f604c1 /src/python/gudhi
parent: 9518287cfa2a62948ede2e7d17d5c9f29092e0f4 (diff)
parent: 6d02ca0e077cc9750275abdfc024429cec0ba5a5 (diff)
5 files changed, 221 insertions, 7 deletions
diff --git a/src/python/gudhi/simplex_tree.pxd b/src/python/gudhi/simplex_tree.pxd
index 82f155de..595f22bb 100644
--- a/src/python/gudhi/simplex_tree.pxd
+++ b/src/python/gudhi/simplex_tree.pxd
@@ -57,6 +57,8 @@ cdef extern from "Simplex_tree_interface.h" namespace "Gudhi":
         void remove_maximal_simplex(vector[int] simplex)
         bool prune_above_filtration(double filtration)
         bool make_filtration_non_decreasing()
+        void compute_extended_filtration()
+        vector[vector[pair[int, pair[double, double]]]] compute_extended_persistence_subdiagrams(vector[pair[int, pair[double, double]]] dgm, double min_persistence)
         # Iterators over Simplex tree
         pair[vector[int], double] get_simplex_and_filtration(Simplex_tree_simplex_handle f_simplex)
         Simplex_tree_simplices_iterator get_simplices_iterator_begin()
diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx
index c01cc905..cc3753e1 100644
--- a/src/python/gudhi/simplex_tree.pyx
+++ b/src/python/gudhi/simplex_tree.pyx
@@ -396,6 +396,57 @@ cdef class SimplexTree:
         """
         return self.get_ptr().make_filtration_non_decreasing()
 
+    def extend_filtration(self):
+        """ Extend filtration for computing extended persistence. This function only uses the 
+        filtration values at the 0-dimensional simplices, and computes the extended persistence 
+        diagram induced by the lower-star filtration computed with these values. 
+
+        .. note::
+
+            Note that after calling this function, the filtration 
+            values are actually modified within the Simplex_tree. 
+            The function :func:`extended_persistence()<gudhi.SimplexTree.extended_persistence>`
+            retrieves the original values.
+
+        .. note::
+
+            Note that this code creates an extra vertex internally, so you should make sure that
+            the Simplex_tree does not contain a vertex with the largest possible value (i.e., 4294967295). 
+        """
+        return self.get_ptr().compute_extended_filtration()
+
+    def extended_persistence(self, homology_coeff_field=11, min_persistence=0):
+        """This function retrieves good values for extended persistence, and separate the diagrams 
+        into the Ordinary, Relative, Extended+ and Extended- subdiagrams.
+
+        :param homology_coeff_field: The homology coefficient field. Must be a
+            prime number. Default value is 11.
+        :type homology_coeff_field: int.
+        :param min_persistence: The minimum persistence value (i.e., the absolute value of the difference between the persistence diagram point coordinates) to take into
+            account (strictly greater than min_persistence). Default value is
+            0.0.
+            Sets min_persistence to -1.0 to see all values.
+        :type min_persistence: float.
+        :returns: A list of four persistence diagrams in the format described in :func:`persistence()<gudhi.SimplexTree.persistence>`. The first one is Ordinary, the second one is Relative, the third one is Extended+ and the fourth one is Extended-. See https://link.springer.com/article/10.1007/s10208-008-9027-z and/or section 2.2 in https://link.springer.com/article/10.1007/s10208-017-9370-z for a description of these subtypes.
+
+        .. note::
+
+            This function should be called only if :func:`extend_filtration()<gudhi.SimplexTree.extend_filtration>` has been called first!
+
+        .. note::
+
+            The coordinates of the persistence diagram points might be a little different than the
+            original filtration values due to the internal transformation (scaling to [-2,-1]) that is 
+            performed on these values during the computation of extended persistence.
+        """
+        cdef vector[pair[int, pair[double, double]]] persistence_result
+        if self.pcohptr != NULL:
+            del self.pcohptr
+        self.pcohptr = new Simplex_tree_persistence_interface(self.get_ptr(), False)
+        persistence_result = self.pcohptr.get_persistence(homology_coeff_field, -1.)
+        return self.get_ptr().compute_extended_persistence_subdiagrams(persistence_result, min_persistence)
+
+
     def persistence(self, homology_coeff_field=11, min_persistence=0, persistence_dim_max = False):
         """This function returns the persistence of the simplicial complex.
 
diff --git a/src/python/gudhi/wasserstein/__init__.py b/src/python/gudhi/wasserstein/__init__.py
new file mode 100644
index 00000000..ed225ba4
--- /dev/null
+++ b/src/python/gudhi/wasserstein/__init__.py
@@ -0,0 +1 @@
+from .wasserstein import wasserstein_distance
diff --git a/src/python/gudhi/wasserstein/barycenter.py b/src/python/gudhi/wasserstein/barycenter.py
new file mode 100644
index 00000000..de7aea81
--- /dev/null
+++ b/src/python/gudhi/wasserstein/barycenter.py
@@ -0,0 +1,159 @@
+# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+# Author(s):       Theo Lacombe
+#
+# Copyright (C) 2019 Inria
+#
+# Modification(s):
+#   - YYYY/MM Author: Description of the modification
+
+
+import ot
+import numpy as np
+import scipy.spatial.distance as sc
+
+from gudhi.wasserstein import wasserstein_distance
+
+
+def _mean(x, m):
+    '''
+    :param x: a list of 2D-points, off diagonal, x_0... x_{k-1}
+    :param m: total amount of points taken into account,
+                that is we have (m-k) copies of diagonal
+    :returns: the weighted mean of x with (m-k) copies of the diagonal
+    '''
+    k = len(x)
+    if k > 0:
+        w = np.mean(x, axis=0)
+        w_delta = (w[0] + w[1]) / 2 * np.ones(2)
+        return (k * w + (m-k) * w_delta) / m
+    else:
+        return np.array([0, 0])
+
+
+def lagrangian_barycenter(pdiagset, init=None, verbose=False):
+    '''
+    :param pdiagset: a list of ``numpy.array`` of shape `(n x 2)`
+                    (`n` can variate), encoding a set of
+                    persistence diagrams with only finite coordinates.
+    :param init: The initial value for barycenter estimate.
+                    If ``None``, init is made on a random diagram from the dataset.
+                    Otherwise, it can be an ``int``
+                    (then initialization is made on ``pdiagset[init]``)
+                    or a `(n x 2)` ``numpy.array`` enconding
+                    a persistence diagram with `n` points.
+    :type init: ``int``, or (n x 2) ``np.array``
+    :param verbose: if ``True``, returns additional information about the
+                    barycenter.
+    :type verbose: boolean
+    :returns: If not verbose (default), a ``numpy.array`` encoding
+              the barycenter estimate of pdiagset
+              (local minimum of the energy function).
+              If ``pdiagset`` is empty, returns ``None``.
+              If verbose, returns a couple ``(Y, log)``
+              where ``Y`` is the barycenter estimate,
+              and ``log`` is a ``dict`` that contains additional informations:
+
+              - `"groupings"`, a list of list of pairs ``(i,j)``.
+              Namely, ``G[k] = [...(i, j)...]``, where ``(i,j)`` indicates
+              that ``pdiagset[k][i]`` is matched to ``Y[j]``
+              if ``i = -1`` or ``j = -1``, it means they
+              represent the diagonal.
+
+              - `"energy"`, ``float`` representing the Frechet energy value obtained.
+              It is the mean of squared distances of observations to the output.
+
+              - `"nb_iter"`, ``int`` number of iterations performed before convergence of the algorithm.
+    '''
+    X = pdiagset  # to shorten notations, not a copy
+    m = len(X)  # number of diagrams we are averaging
+    if m == 0:
+        print("Warning: computing barycenter of empty diag set. Returns None")
+        return None
+
+    # store the number of off-diagonal point for each of the X_i
+    nb_off_diag = np.array([len(X_i) for X_i in X])
+    # Initialisation of barycenter
+    if init is None:
+        i0 = np.random.randint(m)  # Index of first state for the barycenter
+        Y = X[i0].copy()
+    else:
+        if type(init)==int:
+            Y = X[init].copy()
+        else:
+            Y = init.copy()
+
+    nb_iter = 0
+
+    converged = False  # stoping criterion
+    while not converged:
+        nb_iter += 1
+        K = len(Y)  # current nb of points in Y (some might be on diagonal)
+        G = np.full((K, m), -1, dtype=int)  # will store for each j, the (index)
+                              # point matched in each other diagram
+                              #(might be the diagonal).
+                              # that is G[j, i] = k <=> y_j is matched to
+                              # x_k in the diagram i-th diagram X[i]
+        updated_points = np.zeros((K, 2))  # will store the new positions of
+                                           # the points of Y.
+                                           # If points disappear, there thrown
+                                           # on [0,0] by default.
+        new_created_points = []  # will store potential new points.
+
+        # Step 1 : compute optimal matching (Y, X_i) for each X_i
+        #          and create new points in Y if needed
+        for i in range(m):
+            _, indices = wasserstein_distance(Y, X[i], matching=True, order=2., internal_p=2.)
+            for y_j, x_i_j in indices:
+                if y_j >= 0:  # we matched an off diagonal point to x_i_j...
+                    if x_i_j >= 0:  # ...which is also an off-diagonal point.
+                        G[y_j, i] = x_i_j
+                    else:  # ...which is a diagonal point
+                        G[y_j, i] = -1  # -1 stands for the diagonal (mask)
+                else:  # We matched a diagonal point to x_i_j...
+                    if x_i_j >= 0:  # which is a off-diag point !
+                                                # need to create new point in Y
+                        new_y = _mean(np.array([X[i][x_i_j]]), m)
+                        # Average this point with (m-1) copies of Delta
+                        new_created_points.append(new_y)
+
+        # Step 2 : Update current point position thanks to groupings computed
+        to_delete = []
+        for j in range(K):
+            matched_points = [X[i][G[j, i]] for i in range(m) if G[j, i] > -1]
+            new_y_j = _mean(matched_points, m)
+            if not np.array_equal(new_y_j, np.array([0,0])):
+                updated_points[j] = new_y_j
+            else: # this points is no longer of any use.
+                to_delete.append(j)
+        # we remove the point to be deleted now.
+        updated_points = np.delete(updated_points, to_delete, axis=0)
+
+        # we cannot converge if there have been new created points.
+        if new_created_points:
+            Y = np.concatenate((updated_points, new_created_points))
+        else:
+            # Step 3 : we check convergence
+            if np.array_equal(updated_points, Y):
+                converged = True
+            Y = updated_points
+
+
+    if verbose:
+        groupings = []
+        energy = 0
+        log = {}
+        n_y = len(Y)
+        for i in range(m):
+            cost, edges = wasserstein_distance(Y, X[i], matching=True, order=2., internal_p=2.)
+            groupings.append(edges)
+            energy += cost
+            log["groupings"] = groupings
+        energy = energy/m
+        log["energy"] = energy
+        log["nb_iter"] = nb_iter
+
+        return Y, log
+    else:
+        return Y
+
diff --git a/src/python/gudhi/wasserstein.py b/src/python/gudhi/wasserstein/wasserstein.py
index 3dd993f9..35315939 100644
--- a/src/python/gudhi/wasserstein.py
+++ b/src/python/gudhi/wasserstein/wasserstein.py
@@ -9,6 +9,7 @@
 
 import numpy as np
 import scipy.spatial.distance as sc
+
 try:
     import ot
 except ImportError:
@@ -29,9 +30,9 @@ def _build_dist_matrix(X, Y, order=2., internal_p=2.):
     :param Y: (m x 2) numpy.array encoding the second diagram.
     :param order: exponent for the Wasserstein metric.
     :param internal_p: Ground metric (i.e. norm L^p).
-    :returns: (n+1) x (m+1) np.array encoding the cost matrix C. 
-                For 0 <= i < n, 0 <= j < m, C[i,j] encodes the distance between X[i] and Y[j], 
-                while C[i, m] (resp. C[n, j]) encodes the distance (to the p) between X[i] (resp Y[j]) 
+    :returns: (n+1) x (m+1) np.array encoding the cost matrix C.
+                For 0 <= i < n, 0 <= j < m, C[i,j] encodes the distance between X[i] and Y[j],
+                while C[i, m] (resp. C[n, j]) encodes the distance (to the p) between X[i] (resp Y[j])
                 and its orthogonal projection onto the diagonal.
                 note also that C[n, m] = 0  (it costs nothing to move from the diagonal to the diagonal).
     '''
@@ -58,7 +59,7 @@ def _perstot(X, order, internal_p):
     :param X: (n x 2) numpy.array (points of a given diagram).
     :param order: exponent for Wasserstein. Default value is 2.
     :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); Default value is 2 (Euclidean norm).
-    :returns: float, the total persistence of the diagram (that is, its distance to the empty diagram).    
+    :returns: float, the total persistence of the diagram (that is, its distance to the empty diagram).
     '''
     Xdiag = _proj_on_diag(X)
     return (np.sum(np.linalg.norm(X - Xdiag, ord=internal_p, axis=1)**order))**(1./order)
@@ -66,16 +67,16 @@ def _perstot(X, order, internal_p):
 
 def wasserstein_distance(X, Y, matching=False, order=2., internal_p=2.):
     '''
-    :param X: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points 
+    :param X: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points
                 (i.e. with infinite coordinate).
     :param Y: (m x 2) numpy.array encoding the second diagram.
     :param matching: if True, computes and returns the optimal matching between X and Y, encoded as
                      a (n x 2) np.array  [...[i,j]...], meaning the i-th point in X is matched to
                      the j-th point in Y, with the convention (-1) represents the diagonal.
     :param order: exponent for Wasserstein; Default value is 2.
-    :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); 
+    :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2);
                        Default value is 2 (Euclidean norm).
-    :returns: the Wasserstein distance of order q (1 <= q < infinity) between persistence diagrams with 
+    :returns: the Wasserstein distance of order q (1 <= q < infinity) between persistence diagrams with
               respect to the internal_p-norm as ground metric.
               If matching is set to True, also returns the optimal matching between X and Y.
     '''
author	Marc Glisse <marc.glisse@inria.fr>	2020-04-14 19:49:49 +0200
committer	Marc Glisse <marc.glisse@inria.fr>	2020-04-14 19:49:49 +0200
commit	3f1e4bf5f139afe887ae501f20c5d3f40b5a6f79 (patch)
tree	936aa887b1e7a0e23854262f17a5d21cf2f604c1 /src/python/gudhi
parent	9518287cfa2a62948ede2e7d17d5c9f29092e0f4 (diff)
parent	6d02ca0e077cc9750275abdfc024429cec0ba5a5 (diff)