From 5877b4d3b7aca645ba906dfe0be598b1881d8798 Mon Sep 17 00:00:00 2001
From: tlacombe <lacombe1993@gmail.com>
Date: Mon, 16 Dec 2019 17:53:59 +0100
Subject: update CMakeLists and create test_wasserstein_bary

---
 src/python/test/test_wasserstein_barycenter.py | 33 ++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100755 src/python/test/test_wasserstein_barycenter.py

(limited to 'src/python/test/test_wasserstein_barycenter.py')

diff --git a/src/python/test/test_wasserstein_barycenter.py b/src/python/test/test_wasserstein_barycenter.py
new file mode 100755
index 00000000..6074f250
--- /dev/null
+++ b/src/python/test/test_wasserstein_barycenter.py
@@ -0,0 +1,33 @@
+from gudhi.barycenter import lagrangian_barycenter
+import numpy as np
+
+""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+    See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+    Author(s):       Theo Lacombe
+
+    Copyright (C) 2019 Inria
+
+    Modification(s):
+      - YYYY/MM Author: Description of the modification
+"""
+
+__author__ = "Theo Lacombe"
+__copyright__ = "Copyright (C) 2019 Inria"
+__license__ = "MIT"
+
+
+def test_lagrangian_barycenter():
+     
+    dg1 = np.array([[0.2, 0.5]])
+    dg2 = np.array([[0.2, 0.7]])
+    dg3 = np.array([[0.3, 0.6], [0.7, 0.8], [0.2, 0.3]])
+    dg4 = np.array([])
+    dg5 = np.array([])
+    dg6 = np.array([])
+    res = np.array([[0.27916667, 0.55416667], [0.7375,  0.7625], [0.2375, 0.2625]])
+
+    dg7 = np.array([[0.1, 0.15], [0.1, 0.7], [0.2, 0.22], [0.55, 0.84], [0.11, 0.91], [0.61, 0.75], [0.33, 0.46], [0.12, 0.41], [0.32, 0.48]])
+
+    assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg1, dg2, dg3, dg4],init=3, verbose=False) - res) < 0.001 
+    assert np.array_equal(lagrangian_barycenter(pdiagset=[dg4, dg5, dg6], verbose=False), np.array([]))
+    assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg7], verbose=False) - dg7) < 0.001   
-- 
cgit v1.2.3


From b4fcc875393df12f42aea84b918b5b35f99f7283 Mon Sep 17 00:00:00 2001
From: tlacombe <lacombe1993@gmail.com>
Date: Mon, 16 Dec 2019 18:11:27 +0100
Subject: correction of typo in _user.rst and of empty array shape in
 test_wasserstein_barycenter

---
 src/python/doc/barycenter_user.rst             | 2 +-
 src/python/test/test_wasserstein_barycenter.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/python/test/test_wasserstein_barycenter.py')

diff --git a/src/python/doc/barycenter_user.rst b/src/python/doc/barycenter_user.rst
index 1c4cb812..5344583f 100644
--- a/src/python/doc/barycenter_user.rst
+++ b/src/python/doc/barycenter_user.rst
@@ -33,7 +33,7 @@ Note that persistence diagrams must be submitted as (n x 2) numpy arrays and mus
     dg3 = np.array([[0.3, 0.6], [0.7, 0.8], [0.2, 0.3]])
     dg4 = np.array([])
     
-    bary = gudhi.barycenter.lagrangian_barycenter(pdiagset=[dg1, dg2, dg3, dg4],init=3))
+    bary = gudhi.barycenter.lagrangian_barycenter(pdiagset=[dg1, dg2, dg3, dg4],init=3)
 
     message = "Wasserstein barycenter estimated:"    
     print(message)
diff --git a/src/python/test/test_wasserstein_barycenter.py b/src/python/test/test_wasserstein_barycenter.py
index 6074f250..ae3f6579 100755
--- a/src/python/test/test_wasserstein_barycenter.py
+++ b/src/python/test/test_wasserstein_barycenter.py
@@ -29,5 +29,5 @@ def test_lagrangian_barycenter():
     dg7 = np.array([[0.1, 0.15], [0.1, 0.7], [0.2, 0.22], [0.55, 0.84], [0.11, 0.91], [0.61, 0.75], [0.33, 0.46], [0.12, 0.41], [0.32, 0.48]])
 
     assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg1, dg2, dg3, dg4],init=3, verbose=False) - res) < 0.001 
-    assert np.array_equal(lagrangian_barycenter(pdiagset=[dg4, dg5, dg6], verbose=False), np.array([]))
+    assert np.array_equal(lagrangian_barycenter(pdiagset=[dg4, dg5, dg6], verbose=False), shape=(0,2), np.array([]))
     assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg7], verbose=False) - dg7) < 0.001   
-- 
cgit v1.2.3


From 20047b94e693f31fd88ca142ba7256767ac753eb Mon Sep 17 00:00:00 2001
From: tlacombe <lacombe1993@gmail.com>
Date: Mon, 16 Dec 2019 18:34:55 +0100
Subject: correction of typo in test_wasserstein_barycenter

---
 src/python/test/test_wasserstein_barycenter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/python/test/test_wasserstein_barycenter.py')

diff --git a/src/python/test/test_wasserstein_barycenter.py b/src/python/test/test_wasserstein_barycenter.py
index ae3f6579..dc82a57c 100755
--- a/src/python/test/test_wasserstein_barycenter.py
+++ b/src/python/test/test_wasserstein_barycenter.py
@@ -29,5 +29,5 @@ def test_lagrangian_barycenter():
     dg7 = np.array([[0.1, 0.15], [0.1, 0.7], [0.2, 0.22], [0.55, 0.84], [0.11, 0.91], [0.61, 0.75], [0.33, 0.46], [0.12, 0.41], [0.32, 0.48]])
 
     assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg1, dg2, dg3, dg4],init=3, verbose=False) - res) < 0.001 
-    assert np.array_equal(lagrangian_barycenter(pdiagset=[dg4, dg5, dg6], verbose=False), shape=(0,2), np.array([]))
+    assert np.array_equal(lagrangian_barycenter(pdiagset=[dg4, dg5, dg6], verbose=False), np.array([], shape=(0,2)))
     assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg7], verbose=False) - dg7) < 0.001   
-- 
cgit v1.2.3


From b23813b90aaf1b0ce2b21bdfb33d2a6ea5bfe4cc Mon Sep 17 00:00:00 2001
From: tlacombe <lacombe1993@gmail.com>
Date: Mon, 16 Dec 2019 19:32:26 +0100
Subject: correction test

---
 src/python/gudhi/barycenter.py                 | 6 ++++--
 src/python/test/test_wasserstein_barycenter.py | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'src/python/test/test_wasserstein_barycenter.py')

diff --git a/src/python/gudhi/barycenter.py b/src/python/gudhi/barycenter.py
index 41418454..b76166c0 100644
--- a/src/python/gudhi/barycenter.py
+++ b/src/python/gudhi/barycenter.py
@@ -318,10 +318,12 @@ def _sanity_check(verbose):
     dg1 = np.array([[0.1, 0.12], [0.21, 0.7], [0.4, 0.5], [0.3, 0.4], [0.35, 0.7], [0.5, 0.55], [0.32, 0.42], [0.1, 0.4], [0.2, 0.4]])
     dg2 = np.array([[0.09, 0.11], [0.3, 0.43], [0.5, 0.61], [0.3, 0.7], [0.42, 0.5], [0.35, 0.41], [0.74, 0.9], [0.5, 0.95], [0.35, 0.45], [0.13, 0.48], [0.32, 0.45]])
     dg3 = np.array([[0.1, 0.15], [0.1, 0.7], [0.2, 0.22], [0.55, 0.84], [0.11, 0.91], [0.61, 0.75], [0.33, 0.46], [0.12, 0.41], [0.32, 0.48]])
-    X = [dg3]
+    dg4 = np.array([])
+    X = [dg4]
     Y, a = lagrangian_barycenter(X, verbose=verbose)
-    _plot_barycenter(X, Y, a)
+    #_plot_barycenter(X, Y, a)
     print(Y)
+    print(np.array_equal(Y, np.empty(shape=(0,2) )))
  
     
     #dg1 = np.array([[0.2, 0.5]])
diff --git a/src/python/test/test_wasserstein_barycenter.py b/src/python/test/test_wasserstein_barycenter.py
index dc82a57c..910d23ff 100755
--- a/src/python/test/test_wasserstein_barycenter.py
+++ b/src/python/test/test_wasserstein_barycenter.py
@@ -29,5 +29,5 @@ def test_lagrangian_barycenter():
     dg7 = np.array([[0.1, 0.15], [0.1, 0.7], [0.2, 0.22], [0.55, 0.84], [0.11, 0.91], [0.61, 0.75], [0.33, 0.46], [0.12, 0.41], [0.32, 0.48]])
 
     assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg1, dg2, dg3, dg4],init=3, verbose=False) - res) < 0.001 
-    assert np.array_equal(lagrangian_barycenter(pdiagset=[dg4, dg5, dg6], verbose=False), np.array([], shape=(0,2)))
+    assert np.array_equal(lagrangian_barycenter(pdiagset=[dg4, dg5, dg6], verbose=False), np.empty(shape=(0,2)))
     assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg7], verbose=False) - dg7) < 0.001   
-- 
cgit v1.2.3


From dc4442bc402ac25290eb529b57407607434bb7ae Mon Sep 17 00:00:00 2001
From: tlacombe <lacombe1993@gmail.com>
Date: Fri, 14 Feb 2020 14:53:51 +0100
Subject: barycenter update, adding more tests and details about log
 (assigments, cost, nb iter)

---
 src/python/gudhi/barycenter.py                 | 125 +++++++++++--------------
 src/python/test/test_wasserstein_barycenter.py |  15 ++-
 2 files changed, 69 insertions(+), 71 deletions(-)

(limited to 'src/python/test/test_wasserstein_barycenter.py')

diff --git a/src/python/gudhi/barycenter.py b/src/python/gudhi/barycenter.py
index 11098afe..4a00c457 100644
--- a/src/python/gudhi/barycenter.py
+++ b/src/python/gudhi/barycenter.py
@@ -2,6 +2,7 @@ import ot
 import numpy as np
 import scipy.spatial.distance as sc
 
+from wasserstein import _build_dist_matrix, _perstot
 
 # This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
 # See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
@@ -20,42 +21,19 @@ def _proj_on_diag(w):
     return np.array([(w[0] + w[1])/2 , (w[0] + w[1])/2])
 
 
-def _proj_on_diag_array(X):
-    '''
-    :param X: (n x 2) array encoding the points of a persistent diagram.
-    :returns: (n x 2) array encoding the (respective orthogonal) projections of the points onto the diagonal
-    '''
-    Z = (X[:,0] + X[:,1]) / 2.
-    return np.array([Z , Z]).T
-
-
-def _build_dist_matrix(X, Y, p=2., q=2.):
-    '''
-    :param X: (n x 2) numpy.array encoding the (points of the) first diagram.
-    :param Y: (m x 2) numpy.array encoding the second diagram.
-    :param q: Ground metric (i.e. norm l_q).
-    :param p: exponent for the Wasserstein metric.
-    :returns: (n+1) x (m+1) np.array encoding the cost matrix C. 
-                For 1 <= i <= n, 1 <= j <= m, C[i,j] encodes the distance between X[i] and Y[j], while C[i, m+1] (resp. C[n+1, j]) encodes the distance (to the p) between X[i] (resp Y[j]) and its orthogonal proj onto the diagonal.
-                note also that C[n+1, m+1] = 0  (it costs nothing to move from the diagonal to the diagonal).
-                Note that for lagrangian_barycenter, one must use p=q=2. 
-    '''
-    Xdiag = _proj_on_diag_array(X)
-    Ydiag = _proj_on_diag_array(Y)
-    if np.isinf(q):
-        C = sc.cdist(X, Y, metric='chebyshev')**p
-        Cxd = np.linalg.norm(X - Xdiag, ord=q, axis=1)**p
-        Cdy = np.linalg.norm(Y - Ydiag, ord=q, axis=1)**p
+def _mean(x, m):
+    """
+    :param x: a list of 2D-points, off diagonal, x_0... x_{k-1}
+    :param m: total amount of points taken into account, that is we have (m-k) copies of diagonal
+    :returns: the weighted mean of x with (m-k) copies of the diagonal
+    """
+    k = len(x)
+    if k > 0:
+        w = np.mean(x, axis=0)
+        w_delta = _proj_on_diag(w)
+        return (k * w + (m-k) * w_delta) / m
     else:
-        C = sc.cdist(X,Y, metric='minkowski', p=q)**p
-        Cxd = np.linalg.norm(X - Xdiag, ord=q, axis=1)**p
-        Cdy = np.linalg.norm(Y - Ydiag, ord=q, axis=1)**p
-    Cf = np.hstack((C, Cxd[:,None]))
-    Cdy = np.append(Cdy, 0)
-
-    Cf = np.vstack((Cf, Cdy[None,:]))
-
-    return Cf
+        return np.array([0, 0])
 
 
 def _optimal_matching(X, Y, withcost=False):
@@ -64,63 +42,63 @@ def _optimal_matching(X, Y, withcost=False):
     :param Y: numpy.array of size (m x 2)
     :param withcost: returns also the cost corresponding to this optimal matching
     :returns: numpy.array of shape (k x 2) encoding the list of edges in the optimal matching. 
-                That is, [(i, j) ...], where (i,j) indicates that X[i] is matched to Y[j]
-                if i > len(X) or j > len(Y), it means they represent the diagonal.
-              
+                That is, [[i, j] ...], where (i,j) indicates that X[i] is matched to Y[j]
+                if i >= len(X) or j >= len(Y), it means they represent the diagonal.
+                                            They will be encoded by -1 afterwards.
     """
 
     n = len(X)
     m = len(Y)
+    # Start by handling empty diagrams. Could it be shorten?
     if X.size == 0: # X is empty
         if Y.size == 0: # Y is empty
-            return np.array([[0,0]])  # the diagonal is matched to the diagonal and that's it...
-        else:
-            return np.column_stack([np.zeros(m+1, dtype=int), np.arange(m+1, dtype=int)])
+            res = np.array([[0,0]])  # the diagonal is matched to the diagonal and that's it...
+            if withcost:
+                return res, 0
+            else:
+                return res
+        else: # X is empty but not Y
+            res = np.array([[0, i] for i in range(m)]) 
+            cost = _perstot(Y, order=2, internal_p=2)**2
+            if withcost:
+                return res, cost
+            else:
+                return res
     elif Y.size == 0: # X is not empty but Y is empty
-        return np.column_stack([np.zeros(n+1, dtype=int), np.arange(n+1, dtype=int)])
-        
+        res = np.array([[i,0] for i in range(n)])        
+        cost = _perstot(X, order=2, internal_p=2)**2
+        if withcost:
+            return res, cost
+        else:
+            return res
+
     # we know X, Y are not empty diags now
-    M = _build_dist_matrix(X, Y)
+    M = _build_dist_matrix(X, Y, order=2, internal_p=2)
 
     a = np.full(n+1, 1. / (n + m) )  # weight vector of the input diagram. Uniform here.
     a[-1] = a[-1] * m                # normalized so that we have a probability measure, required by POT
     b = np.full(m+1, 1. / (n + m) )  # weight vector of the input diagram. Uniform here.
     b[-1] = b[-1] * n                # so that we have a probability measure, required by POT
     P = ot.emd(a=a, b=b, M=M)*(n+m)
-    # Note : it seems POT return a permutation matrix in this situation, ie a vertex of the constraint set (generically true).
+    # Note : it seems POT returns a permutation matrix in this situation, ie a vertex of the constraint set (generically true).
     if withcost:
-        cost = np.sqrt(np.sum(np.multiply(P, M)))
+        cost = np.sum(np.multiply(P, M))
     P[P < 0.5] = 0  # dirty trick to avoid some numerical issues... to be improved.
-    # return the list of (i,j) such that P[i,j] > 0, i.e. x_i is matched to y_j (should it be the diag). 
     res = np.nonzero(P)
 
+    # return the list of (i,j) such that P[i,j] > 0, i.e. x_i is matched to y_j (should it be the diag). 
     if withcost:
         return np.column_stack(res), cost
     
     return np.column_stack(res)
 
 
-def _mean(x, m):
-    """
-    :param x: a list of 2D-points, off diagonal, x_0... x_{k-1}
-    :param m: total amount of points taken into account, that is we have (m-k) copies of diagonal
-    :returns: the weighted mean of x with (m-k) copies of the diagonal
-    """
-    k = len(x)
-    if k > 0:
-        w = np.mean(x, axis=0)
-        w_delta = _proj_on_diag(w)
-        return (k * w + (m-k) * w_delta) / m
-    else:
-        return np.array([0, 0])
-
-
 def lagrangian_barycenter(pdiagset, init=None, verbose=False):
     """
         Compute the estimated barycenter computed with the algorithm provided
         by Turner et al (2014).
         It is a local minimum of the corresponding Frechet function.
-    :param pdiagset: a list of size N containing numpy.array of shape (n x 2) 
+    :param pdiagset: a list of size m containing numpy.array of shape (n x 2) 
                         (n can variate), encoding a set of 
                         persistence diagrams with only finite coordinates. 
     :param init: The initial value for barycenter estimate. 
@@ -134,10 +112,13 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False):
                     If verbose, returns a couple (Y, log)
                     where Y is the barycenter estimate,
                     and log is a dict that contains additional informations:
-                    - assigments, a list of list of pairs (i,j),
-                        That is, a[k] = [(i, j) ...], where (i,j) indicates that X[i] is matched to Y[j]
+                    - groupings, a list of list of pairs (i,j),
+                        That is, G[k] = [(i, j) ...], where (i,j) indicates that X[i] is matched to Y[j]
                         if i > len(X) or j > len(Y), it means they represent the diagonal.
-                    - energy, a float representing the Frechet mean value obtained.
+                    - energy, a float representing the Frechet energy value obtained,
+                                that is the mean of squared distances of observations to the output.
+                    - nb_iter, integer representing the number of iterations performed before convergence
+                                        of the algorithm.
     """
     X = pdiagset  # to shorten notations, not a copy
     m = len(X)  # number of diagrams we are averaging
@@ -157,8 +138,11 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False):
         else:
             Y = init.copy()
 
+    nb_iter = 0
+
     converged = False  # stoping criterion
     while not converged:
+        nb_iter += 1
         K = len(Y)  # current nb of points in Y (some might be on diagonal)
         G = np.zeros((K, m), dtype=int)-1  # will store for each j, the (index) point matched in each other diagram (might be the diagonal). 
                               # that is G[j, i] = k <=> y_j is matched to
@@ -185,7 +169,6 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False):
                         new_created_points.append(new_y)
 
         # Step 2 : Update current point position thanks to the groupings computed
-
         to_delete = []
         for j in range(K):
             matched_points = [X[i][G[j, i]] for i in range(m) if G[j, i] > -1]
@@ -214,12 +197,16 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False):
         n_y = len(Y)
         for i in range(m):
             edges, cost = _optimal_matching(Y, X[i], withcost=True)
-            print(edges)
-            groupings.append([x_i_j for (y_j, x_i_j) in enumerate(edges) if y_j < n_y])
+            n_x = len(X[i])
+            G = edges[np.where(edges[:,0]<n_y)]
+            idx = np.where(G[:,1] >= n_x)
+            G[idx,1] = -1  # -1 will encode the diagonal
+            groupings.append(G)
             energy += cost
             log["groupings"] = groupings
         energy = energy/m
         log["energy"] = energy
+        log["nb_iter"] = nb_iter
 
         return Y, log
     else:
diff --git a/src/python/test/test_wasserstein_barycenter.py b/src/python/test/test_wasserstein_barycenter.py
index 910d23ff..07242582 100755
--- a/src/python/test/test_wasserstein_barycenter.py
+++ b/src/python/test/test_wasserstein_barycenter.py
@@ -27,7 +27,18 @@ def test_lagrangian_barycenter():
     res = np.array([[0.27916667, 0.55416667], [0.7375,  0.7625], [0.2375, 0.2625]])
 
     dg7 = np.array([[0.1, 0.15], [0.1, 0.7], [0.2, 0.22], [0.55, 0.84], [0.11, 0.91], [0.61, 0.75], [0.33, 0.46], [0.12, 0.41], [0.32, 0.48]])
+    dg8 = np.array([[0., 4.]])
+   
+    # error crit.
+    eps = 0.000001
 
-    assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg1, dg2, dg3, dg4],init=3, verbose=False) - res) < 0.001 
+
+    assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg1, dg2, dg3, dg4],init=3, verbose=False) - res) < eps 
     assert np.array_equal(lagrangian_barycenter(pdiagset=[dg4, dg5, dg6], verbose=False), np.empty(shape=(0,2)))
-    assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg7], verbose=False) - dg7) < 0.001   
+    assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg7], verbose=False) - dg7) < eps
+    Y, log = lagrangian_barycenter(pdiagset=[dg4, dg8], verbose=True)
+    assert np.linalg.norm(Y - np.array([[1,3]])) < eps
+    assert np.abs(log["energy"] - 2) < eps
+    assert np.array_equal(log["groupings"][0] , np.array([[0, -1]]))
+    assert np.array_equal(log["groupings"][1] , np.array([[0, 0]]))
+    assert lagrangian_barycenter(pdiagset = []) is None
-- 
cgit v1.2.3


From dc5c7ac2167bfa467b52d0a36ecb9999fe03ba91 Mon Sep 17 00:00:00 2001
From: tlacombe <lacombe1993@gmail.com>
Date: Fri, 14 Feb 2020 14:58:53 +0100
Subject: added two more tests for barycenter

---
 src/python/test/test_wasserstein_barycenter.py | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/python/test/test_wasserstein_barycenter.py')

diff --git a/src/python/test/test_wasserstein_barycenter.py b/src/python/test/test_wasserstein_barycenter.py
index 07242582..a58a4d62 100755
--- a/src/python/test/test_wasserstein_barycenter.py
+++ b/src/python/test/test_wasserstein_barycenter.py
@@ -41,4 +41,5 @@ def test_lagrangian_barycenter():
     assert np.abs(log["energy"] - 2) < eps
     assert np.array_equal(log["groupings"][0] , np.array([[0, -1]]))
     assert np.array_equal(log["groupings"][1] , np.array([[0, 0]]))
+    assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg8, dg4], init=np.array([[0.2, 0.6], [0.5, 0.7]]), verbose=False) - np.array([[1, 3]])) < eps
     assert lagrangian_barycenter(pdiagset = []) is None
-- 
cgit v1.2.3


From 4b546a43fe14178dcfb2b327e27a580fc9811499 Mon Sep 17 00:00:00 2001
From: tlacombe <lacombe1993@gmail.com>
Date: Mon, 16 Mar 2020 13:16:04 +0100
Subject: update doc (indentation, mention of -1 for the diag) and added a few
 more tests

---
 src/python/gudhi/barycenter.py                 | 30 +++++++++++++-------------
 src/python/test/test_wasserstein_barycenter.py | 15 +++++++------
 2 files changed, 23 insertions(+), 22 deletions(-)

(limited to 'src/python/test/test_wasserstein_barycenter.py')

diff --git a/src/python/gudhi/barycenter.py b/src/python/gudhi/barycenter.py
index a41b5906..3af12c14 100644
--- a/src/python/gudhi/barycenter.py
+++ b/src/python/gudhi/barycenter.py
@@ -96,9 +96,8 @@ def _optimal_matching(X, Y, withcost=False):
 def lagrangian_barycenter(pdiagset, init=None, verbose=False):
     '''
     :param pdiagset: a list of size m containing numpy.array of shape (n x 2) 
-                        (n can variate), encoding a set of 
-                        persistence diagrams with only finite coordinates. 
-                        If empty, returns None.
+                    (n can variate), encoding a set of 
+                    persistence diagrams with only finite coordinates. 
     :param init: The initial value for barycenter estimate. 
                     If None, init is made on a random diagram from the dataset. 
                     Otherwise, it must be an int 
@@ -106,24 +105,25 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False):
                     or a (n x 2) numpy.array enconding 
                     a persistence diagram with n points.
     :param verbose: if True, returns additional information about the
-                        barycenter.
+                    barycenter.
     :returns: If not verbose (default), a numpy.array encoding
-                    the barycenter estimate 
+                    the barycenter estimate of pdiagset
                     (local minima of the energy function). 
+                    If pdiagset is empty, returns None.
                     If verbose, returns a couple (Y, log)
                     where Y is the barycenter estimate,
                     and log is a dict that contains additional informations:
                     - groupings, a list of list of pairs (i,j),
-                        That is, G[k] = [(i, j) ...], where (i,j) indicates 
-                        that X[i] is matched to Y[j]
-                        if i > len(X) or j > len(Y), it means they 
-                        represent the diagonal.
+                    That is, G[k] = [(i, j) ...], where (i,j) indicates 
+                    that X[i] is matched to Y[j]
+                    if i = -1 or j = -1, it means they 
+                    represent the diagonal.
                     - energy, a float representing the Frechet 
-                                energy value obtained,
-                                that is the mean of squared distances 
-                                of observations to the output.
+                    energy value obtained,
+                    that is the mean of squared distances 
+                    of observations to the output.
                     - nb_iter, integer representing the number of iterations 
-                               performed before convergence of the algorithm.
+                    performed before convergence of the algorithm.
     '''
     X = pdiagset  # to shorten notations, not a copy
     m = len(X)  # number of diagrams we are averaging
@@ -136,7 +136,7 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False):
     # Initialisation of barycenter
     if init is None:
         i0 = np.random.randint(m)  # Index of first state for the barycenter
-        Y = X[i0].copy() #copy() ensure that we do not modify X[i0]
+        Y = X[i0].copy() 
     else:
         if type(init)==int:
             Y = X[init].copy()
@@ -149,7 +149,7 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False):
     while not converged:
         nb_iter += 1
         K = len(Y)  # current nb of points in Y (some might be on diagonal)
-        G = np.zeros((K, m), dtype=int)-1  # will store for each j, the (index)
+        G = np.full((K, m), -1, dtype=int)  # will store for each j, the (index)
                               # point matched in each other diagram 
                               #(might be the diagonal). 
                               # that is G[j, i] = k <=> y_j is matched to
diff --git a/src/python/test/test_wasserstein_barycenter.py b/src/python/test/test_wasserstein_barycenter.py
index a58a4d62..5167cb84 100755
--- a/src/python/test/test_wasserstein_barycenter.py
+++ b/src/python/test/test_wasserstein_barycenter.py
@@ -27,19 +27,20 @@ def test_lagrangian_barycenter():
     res = np.array([[0.27916667, 0.55416667], [0.7375,  0.7625], [0.2375, 0.2625]])
 
     dg7 = np.array([[0.1, 0.15], [0.1, 0.7], [0.2, 0.22], [0.55, 0.84], [0.11, 0.91], [0.61, 0.75], [0.33, 0.46], [0.12, 0.41], [0.32, 0.48]])
-    dg8 = np.array([[0., 4.]])
+    dg8 = np.array([[0., 4.], [4, 8]])
    
     # error crit.
-    eps = 0.000001
+    eps = 1e-7
 
 
     assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg1, dg2, dg3, dg4],init=3, verbose=False) - res) < eps 
     assert np.array_equal(lagrangian_barycenter(pdiagset=[dg4, dg5, dg6], verbose=False), np.empty(shape=(0,2)))
     assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg7], verbose=False) - dg7) < eps
     Y, log = lagrangian_barycenter(pdiagset=[dg4, dg8], verbose=True)
-    assert np.linalg.norm(Y - np.array([[1,3]])) < eps
-    assert np.abs(log["energy"] - 2) < eps
-    assert np.array_equal(log["groupings"][0] , np.array([[0, -1]]))
-    assert np.array_equal(log["groupings"][1] , np.array([[0, 0]]))
-    assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg8, dg4], init=np.array([[0.2, 0.6], [0.5, 0.7]]), verbose=False) - np.array([[1, 3]])) < eps
+    assert np.linalg.norm(Y - np.array([[1,3], [5, 7]])) < eps
+    assert np.abs(log["energy"] - 4) < eps
+    assert np.array_equal(log["groupings"][0] , np.array([[0, -1], [1, -1]]))
+    assert np.array_equal(log["groupings"][1] , np.array([[0, 0], [1, 1]]))
+    assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg8, dg4], init=np.array([[0.2, 0.6], [0.5, 0.7]]), verbose=False) - np.array([[1, 3], [5, 7]])) < eps
     assert lagrangian_barycenter(pdiagset = []) is None
+
-- 
cgit v1.2.3


From cdc57712ca159f3044453cef41e31ebc03617a1b Mon Sep 17 00:00:00 2001
From: tlacombe <lacombe1993@gmail.com>
Date: Tue, 17 Mar 2020 10:55:14 +0100
Subject: removed _optimal_matching from barycenter as it is now handled by
 wasserstein_distance.

---
 src/python/gudhi/barycenter.py                 | 85 +++-----------------------
 src/python/test/test_wasserstein_barycenter.py |  2 +-
 2 files changed, 9 insertions(+), 78 deletions(-)

(limited to 'src/python/test/test_wasserstein_barycenter.py')

diff --git a/src/python/gudhi/barycenter.py b/src/python/gudhi/barycenter.py
index 517cdb2f..0490fdd1 100644
--- a/src/python/gudhi/barycenter.py
+++ b/src/python/gudhi/barycenter.py
@@ -12,8 +12,7 @@ import ot
 import numpy as np
 import scipy.spatial.distance as sc
 
-from gudhi.wasserstein import _build_dist_matrix, _perstot
-
+from gudhi.wasserstein import wasserstein_distance, _perstot
 
 
 def _mean(x, m):
@@ -32,70 +31,6 @@ def _mean(x, m):
         return np.array([0, 0])
 
 
-def _optimal_matching(X, Y, withcost=False):
-    '''
-    :param X: numpy.array of size (n x 2)
-    :param Y: numpy.array of size (m x 2)
-    :param withcost: returns also the cost corresponding to the optimal matching
-    :returns: numpy.array of shape (k x 2) encoding the list of edges 
-                                            in the optimal matching. 
-                That is, [[i, j] ...], where (i,j) indicates 
-                that X[i] is matched to Y[j]
-                if i >= len(X) or j >= len(Y), it means they 
-                represent the diagonal.
-                They will be encoded by -1 afterwards.
-
-    NOTE : this code will be removed for final merge, 
-    and wasserstein.optimal_matching will be used instead.
-    '''
-
-    n = len(X)
-    m = len(Y)
-    # Start by handling empty diagrams. Could it be shorten?
-    if X.size == 0: # X is empty
-        if Y.size == 0: # Y is empty
-            res = np.array([[0,0]])  # the diagonal is matched to the diagonal
-            if withcost:
-                return res, 0
-            else:
-                return res
-        else: # X is empty but not Y
-            res = np.array([[0, i] for i in range(m)]) 
-            cost = _perstot(Y, order=2, internal_p=2)**2
-            if withcost:
-                return res, cost
-            else:
-                return res
-    elif Y.size == 0: # X is not empty but Y is empty
-        res = np.array([[i,0] for i in range(n)])        
-        cost = _perstot(X, order=2, internal_p=2)**2
-        if withcost:
-            return res, cost
-        else:
-            return res
-
-    # we know X, Y are not empty diags now
-    M = _build_dist_matrix(X, Y, order=2, internal_p=2)
-
-    a = np.ones(n+1)
-    a[-1] = m
-    b = np.ones(m+1)
-    b[-1] = n
-    P = ot.emd(a=a, b=b, M=M) 
-    # Note : it seems POT returns a permutation matrix in this situation, 
-    # ie a vertex of the constraint set (generically true).
-    if withcost:
-        cost = np.sum(np.multiply(P, M))
-    P[P < 0.5] = 0  # dirty trick to avoid some numerical issues... to improve.
-    res = np.argwhere(P)
-
-    # return the list of (i,j) such that P[i,j] > 0, 
-    #i.e. x_i is matched to y_j (should it be the diag). 
-    if withcost:
-        return res, cost
-    return res
-
-
 def lagrangian_barycenter(pdiagset, init=None, verbose=False):
     '''
     :param pdiagset: a list of size m containing numpy.array of shape (n x 2) 
@@ -166,16 +101,15 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False):
         # Step 1 : compute optimal matching (Y, X_i) for each X_i
         #          and create new points in Y if needed
         for i in range(m):
-            indices = _optimal_matching(Y, X[i])
+            _, indices = wasserstein_distance(Y, X[i], matching=True, order=2., internal_p=2.)
             for y_j, x_i_j in indices:
-                if y_j < K:  # we matched an off diagonal point to x_i_j...
-                # ...which is also an off-diagonal point.
-                    if x_i_j < nb_off_diag[i]:  
+                if y_j >= 0:  # we matched an off diagonal point to x_i_j...
+                    if x_i_j >= 0:  # ...which is also an off-diagonal point.
                         G[y_j, i] = x_i_j
                     else:  # ...which is a diagonal point
                         G[y_j, i] = -1  # -1 stands for the diagonal (mask)
                 else:  # We matched a diagonal point to x_i_j...
-                    if x_i_j < nb_off_diag[i]:  # which is a off-diag point ! 
+                    if x_i_j >= 0:  # which is a off-diag point ! 
                                                 # need to create new point in Y
                         new_y = _mean(np.array([X[i][x_i_j]]), m)
                         # Average this point with (m-1) copies of Delta
@@ -209,15 +143,12 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False):
         log = {}
         n_y = len(Y)
         for i in range(m):
-            edges, cost = _optimal_matching(Y, X[i], withcost=True)
-            n_x = len(X[i])
-            G = edges[np.where(edges[:,0]<n_y)]
-            idx = np.where(G[:,1] >= n_x)
-            G[idx,1] = -1  # -1 will encode the diagonal
-            groupings.append(G)
+            cost, edges = wasserstein_distance(Y, X[i], matching=True, order=2., internal_p=2.)
+            groupings.append(edges)
             energy += cost
             log["groupings"] = groupings
         energy = energy/m
+        print(energy)
         log["energy"] = energy
         log["nb_iter"] = nb_iter
 
diff --git a/src/python/test/test_wasserstein_barycenter.py b/src/python/test/test_wasserstein_barycenter.py
index 5167cb84..4d18616b 100755
--- a/src/python/test/test_wasserstein_barycenter.py
+++ b/src/python/test/test_wasserstein_barycenter.py
@@ -38,7 +38,7 @@ def test_lagrangian_barycenter():
     assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg7], verbose=False) - dg7) < eps
     Y, log = lagrangian_barycenter(pdiagset=[dg4, dg8], verbose=True)
     assert np.linalg.norm(Y - np.array([[1,3], [5, 7]])) < eps
-    assert np.abs(log["energy"] - 4) < eps
+    assert np.abs(log["energy"] - 2) < eps
     assert np.array_equal(log["groupings"][0] , np.array([[0, -1], [1, -1]]))
     assert np.array_equal(log["groupings"][1] , np.array([[0, 0], [1, 1]]))
     assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg8, dg4], init=np.array([[0.2, 0.6], [0.5, 0.7]]), verbose=False) - np.array([[1, 3], [5, 7]])) < eps
-- 
cgit v1.2.3


From 7721ac6181fc394ae0136ee176d63210e727f06f Mon Sep 17 00:00:00 2001
From: tlacombe <lacombe1993@gmail.com>
Date: Tue, 31 Mar 2020 11:40:46 +0200
Subject: modified import in test to get consistent with
 gudhi.wasserstein.barycenter

---
 src/python/test/test_wasserstein_barycenter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/python/test/test_wasserstein_barycenter.py')

diff --git a/src/python/test/test_wasserstein_barycenter.py b/src/python/test/test_wasserstein_barycenter.py
index 4d18616b..f686aef5 100755
--- a/src/python/test/test_wasserstein_barycenter.py
+++ b/src/python/test/test_wasserstein_barycenter.py
@@ -1,4 +1,4 @@
-from gudhi.barycenter import lagrangian_barycenter
+from gudhi.wasserstein.barycenter import lagrangian_barycenter
 import numpy as np
 
 """ This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
-- 
cgit v1.2.3


From dd96965e521313b6210391f511c82cced9b2a950 Mon Sep 17 00:00:00 2001
From: Marc Glisse <marc.glisse@inria.fr>
Date: Mon, 6 Apr 2020 19:37:58 +0200
Subject: Remove trailing whitespace

---
 src/python/doc/wasserstein_distance_user.rst   | 72 +++++++++++++-------------
 src/python/gudhi/wasserstein/barycenter.py     | 42 +++++++--------
 src/python/gudhi/wasserstein/wasserstein.py    | 14 ++---
 src/python/test/test_wasserstein_barycenter.py |  6 +--
 src/python/test/test_wasserstein_distance.py   |  2 +-
 5 files changed, 68 insertions(+), 68 deletions(-)

(limited to 'src/python/test/test_wasserstein_barycenter.py')

diff --git a/src/python/doc/wasserstein_distance_user.rst b/src/python/doc/wasserstein_distance_user.rst
index b821b6fa..c24da74d 100644
--- a/src/python/doc/wasserstein_distance_user.rst
+++ b/src/python/doc/wasserstein_distance_user.rst
@@ -10,10 +10,10 @@ Definition
 .. include:: wasserstein_distance_sum.inc
 
 The q-Wasserstein distance is defined as the minimal value achieved
-by a perfect matching between the points of the two diagrams (+ all  
-diagonal points), where the value of a matching is defined as the    
+by a perfect matching between the points of the two diagrams (+ all
+diagonal points), where the value of a matching is defined as the
 q-th root of the sum of all edge lengths to the power q. Edge lengths
-are measured in norm p, for :math:`1 \leq p \leq \infty`.            
+are measured in norm p, for :math:`1 \leq p \leq \infty`.
 
 Distance Functions
 ------------------
@@ -54,9 +54,9 @@ The output is:
 
     Wasserstein distance value = 1.45
 
-We can also have access to the optimal matching by letting `matching=True`. 
+We can also have access to the optimal matching by letting `matching=True`.
 It is encoded as a list of indices (i,j), meaning that the i-th point in X
-is mapped to the j-th point in Y. 
+is mapped to the j-th point in Y.
 An index of -1 represents the diagonal.
 
 .. testcode::
@@ -84,7 +84,7 @@ An index of -1 represents the diagonal.
 The output is:
 
 .. testoutput::
-    
+
     Wasserstein distance value = 2.15
     point 0 in dgm1 is matched to point 0 in dgm2
     point 1 in dgm1 is matched to point 2 in dgm2
@@ -94,32 +94,32 @@ The output is:
 Barycenters
 -----------
 
-A Frechet mean (or barycenter) is a generalization of the arithmetic 
-mean in a non linear space such as the one of persistence diagrams.  
-Given a set of persistence diagrams :math:`\mu_1 \dots \mu_n`, it is 
-defined as a minimizer of the variance functional, that is of        
-:math:`\mu \mapsto \sum_{i=1}^n d_2(\mu,\mu_i)^2`.                   
-where :math:`d_2` denotes the Wasserstein-2 distance between         
-persistence diagrams.                                                
-It is known to exist and is generically unique. However, an exact    
-computation is in general untractable. Current implementation        
-available is based on (Turner et al., 2014), 
+A Frechet mean (or barycenter) is a generalization of the arithmetic
+mean in a non linear space such as the one of persistence diagrams.
+Given a set of persistence diagrams :math:`\mu_1 \dots \mu_n`, it is
+defined as a minimizer of the variance functional, that is of
+:math:`\mu \mapsto \sum_{i=1}^n d_2(\mu,\mu_i)^2`.
+where :math:`d_2` denotes the Wasserstein-2 distance between
+persistence diagrams.
+It is known to exist and is generically unique. However, an exact
+computation is in general untractable. Current implementation
+available is based on (Turner et al., 2014),
 :cite:`turner2014frechet`
-and uses an EM-scheme to 
-provide a local minimum of the variance functional (somewhat similar 
-to the Lloyd algorithm to estimate a solution to the k-means         
+and uses an EM-scheme to
+provide a local minimum of the variance functional (somewhat similar
+to the Lloyd algorithm to estimate a solution to the k-means
 problem). The local minimum returned depends on the initialization of
-the barycenter.                                                      
-The combinatorial structure of the algorithm limits its              
-performances on large scale problems (thousands of diagrams and of points 
-per diagram).                                                        
+the barycenter.
+The combinatorial structure of the algorithm limits its
+performances on large scale problems (thousands of diagrams and of points
+per diagram).
+
+.. figure::
+     ./img/barycenter.png
+     :figclass: align-center
 
-.. figure::                                                    
-     ./img/barycenter.png                                      
-     :figclass: align-center                                   
-                                                               
-     Illustration of Frechet mean between persistence          
-     diagrams.                                                 
+     Illustration of Frechet mean between persistence
+     diagrams.
 
 
 .. autofunction:: gudhi.wasserstein.barycenter.lagrangian_barycenter
@@ -127,16 +127,16 @@ per diagram).
 Basic example
 *************
 
-This example estimates the Frechet mean (aka Wasserstein barycenter) between 
+This example estimates the Frechet mean (aka Wasserstein barycenter) between
 four persistence diagrams.
 It is initialized on the 4th diagram.
-As the algorithm is not convex, its output depends on the initialization and 
+As the algorithm is not convex, its output depends on the initialization and
 is only a local minimum of the objective function.
-Initialization can be either given as an integer (in which case the i-th 
-diagram of the list is used as initial estimate) or as a diagram. 
-If None, it will randomly select one of the diagrams of the list 
+Initialization can be either given as an integer (in which case the i-th
+diagram of the list is used as initial estimate) or as a diagram.
+If None, it will randomly select one of the diagrams of the list
 as initial estimate.
-Note that persistence diagrams must be submitted as 
+Note that persistence diagrams must be submitted as
 (n x 2) numpy arrays and must not contain inf values.
 
 
@@ -152,7 +152,7 @@ Note that persistence diagrams must be submitted as
     pdiagset = [dg1, dg2, dg3, dg4]
     bary = lagrangian_barycenter(pdiagset=pdiagset,init=3)
 
-    message = "Wasserstein barycenter estimated:"    
+    message = "Wasserstein barycenter estimated:"
     print(message)
     print(bary)
 
diff --git a/src/python/gudhi/wasserstein/barycenter.py b/src/python/gudhi/wasserstein/barycenter.py
index 99f29a1e..de7aea81 100644
--- a/src/python/gudhi/wasserstein/barycenter.py
+++ b/src/python/gudhi/wasserstein/barycenter.py
@@ -18,7 +18,7 @@ from gudhi.wasserstein import wasserstein_distance
 def _mean(x, m):
     '''
     :param x: a list of 2D-points, off diagonal, x_0... x_{k-1}
-    :param m: total amount of points taken into account, 
+    :param m: total amount of points taken into account,
                 that is we have (m-k) copies of diagonal
     :returns: the weighted mean of x with (m-k) copies of the diagonal
     '''
@@ -33,14 +33,14 @@ def _mean(x, m):
 
 def lagrangian_barycenter(pdiagset, init=None, verbose=False):
     '''
-    :param pdiagset: a list of ``numpy.array`` of shape `(n x 2)` 
-                    (`n` can variate), encoding a set of 
-                    persistence diagrams with only finite coordinates. 
-    :param init: The initial value for barycenter estimate. 
-                    If ``None``, init is made on a random diagram from the dataset. 
-                    Otherwise, it can be an ``int`` 
+    :param pdiagset: a list of ``numpy.array`` of shape `(n x 2)`
+                    (`n` can variate), encoding a set of
+                    persistence diagrams with only finite coordinates.
+    :param init: The initial value for barycenter estimate.
+                    If ``None``, init is made on a random diagram from the dataset.
+                    Otherwise, it can be an ``int``
                     (then initialization is made on ``pdiagset[init]``)
-                    or a `(n x 2)` ``numpy.array`` enconding 
+                    or a `(n x 2)` ``numpy.array`` enconding
                     a persistence diagram with `n` points.
     :type init: ``int``, or (n x 2) ``np.array``
     :param verbose: if ``True``, returns additional information about the
@@ -48,16 +48,16 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False):
     :type verbose: boolean
     :returns: If not verbose (default), a ``numpy.array`` encoding
               the barycenter estimate of pdiagset
-              (local minimum of the energy function). 
+              (local minimum of the energy function).
               If ``pdiagset`` is empty, returns ``None``.
               If verbose, returns a couple ``(Y, log)``
               where ``Y`` is the barycenter estimate,
               and ``log`` is a ``dict`` that contains additional informations:
 
               - `"groupings"`, a list of list of pairs ``(i,j)``.
-              Namely, ``G[k] = [...(i, j)...]``, where ``(i,j)`` indicates 
+              Namely, ``G[k] = [...(i, j)...]``, where ``(i,j)`` indicates
               that ``pdiagset[k][i]`` is matched to ``Y[j]``
-              if ``i = -1`` or ``j = -1``, it means they 
+              if ``i = -1`` or ``j = -1``, it means they
               represent the diagonal.
 
               - `"energy"`, ``float`` representing the Frechet energy value obtained.
@@ -70,13 +70,13 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False):
     if m == 0:
         print("Warning: computing barycenter of empty diag set. Returns None")
         return None
-    
+
     # store the number of off-diagonal point for each of the X_i
-    nb_off_diag = np.array([len(X_i) for X_i in X])  
+    nb_off_diag = np.array([len(X_i) for X_i in X])
     # Initialisation of barycenter
     if init is None:
         i0 = np.random.randint(m)  # Index of first state for the barycenter
-        Y = X[i0].copy() 
+        Y = X[i0].copy()
     else:
         if type(init)==int:
             Y = X[init].copy()
@@ -90,8 +90,8 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False):
         nb_iter += 1
         K = len(Y)  # current nb of points in Y (some might be on diagonal)
         G = np.full((K, m), -1, dtype=int)  # will store for each j, the (index)
-                              # point matched in each other diagram 
-                              #(might be the diagonal). 
+                              # point matched in each other diagram
+                              #(might be the diagonal).
                               # that is G[j, i] = k <=> y_j is matched to
                               # x_k in the diagram i-th diagram X[i]
         updated_points = np.zeros((K, 2))  # will store the new positions of
@@ -111,7 +111,7 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False):
                     else:  # ...which is a diagonal point
                         G[y_j, i] = -1  # -1 stands for the diagonal (mask)
                 else:  # We matched a diagonal point to x_i_j...
-                    if x_i_j >= 0:  # which is a off-diag point ! 
+                    if x_i_j >= 0:  # which is a off-diag point !
                                                 # need to create new point in Y
                         new_y = _mean(np.array([X[i][x_i_j]]), m)
                         # Average this point with (m-1) copies of Delta
@@ -123,19 +123,19 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False):
             matched_points = [X[i][G[j, i]] for i in range(m) if G[j, i] > -1]
             new_y_j = _mean(matched_points, m)
             if not np.array_equal(new_y_j, np.array([0,0])):
-                updated_points[j] = new_y_j 
+                updated_points[j] = new_y_j
             else: # this points is no longer of any use.
                 to_delete.append(j)
         # we remove the point to be deleted now.
-        updated_points = np.delete(updated_points, to_delete, axis=0)  
+        updated_points = np.delete(updated_points, to_delete, axis=0)
 
         # we cannot converge if there have been new created points.
-        if new_created_points: 
+        if new_created_points:
             Y = np.concatenate((updated_points, new_created_points))
         else:
             # Step 3 : we check convergence
             if np.array_equal(updated_points, Y):
-                converged = True 
+                converged = True
             Y = updated_points
 
 
diff --git a/src/python/gudhi/wasserstein/wasserstein.py b/src/python/gudhi/wasserstein/wasserstein.py
index e1233eec..35315939 100644
--- a/src/python/gudhi/wasserstein/wasserstein.py
+++ b/src/python/gudhi/wasserstein/wasserstein.py
@@ -30,9 +30,9 @@ def _build_dist_matrix(X, Y, order=2., internal_p=2.):
     :param Y: (m x 2) numpy.array encoding the second diagram.
     :param order: exponent for the Wasserstein metric.
     :param internal_p: Ground metric (i.e. norm L^p).
-    :returns: (n+1) x (m+1) np.array encoding the cost matrix C. 
-                For 0 <= i < n, 0 <= j < m, C[i,j] encodes the distance between X[i] and Y[j], 
-                while C[i, m] (resp. C[n, j]) encodes the distance (to the p) between X[i] (resp Y[j]) 
+    :returns: (n+1) x (m+1) np.array encoding the cost matrix C.
+                For 0 <= i < n, 0 <= j < m, C[i,j] encodes the distance between X[i] and Y[j],
+                while C[i, m] (resp. C[n, j]) encodes the distance (to the p) between X[i] (resp Y[j])
                 and its orthogonal projection onto the diagonal.
                 note also that C[n, m] = 0  (it costs nothing to move from the diagonal to the diagonal).
     '''
@@ -59,7 +59,7 @@ def _perstot(X, order, internal_p):
     :param X: (n x 2) numpy.array (points of a given diagram).
     :param order: exponent for Wasserstein. Default value is 2.
     :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); Default value is 2 (Euclidean norm).
-    :returns: float, the total persistence of the diagram (that is, its distance to the empty diagram).    
+    :returns: float, the total persistence of the diagram (that is, its distance to the empty diagram).
     '''
     Xdiag = _proj_on_diag(X)
     return (np.sum(np.linalg.norm(X - Xdiag, ord=internal_p, axis=1)**order))**(1./order)
@@ -67,16 +67,16 @@ def _perstot(X, order, internal_p):
 
 def wasserstein_distance(X, Y, matching=False, order=2., internal_p=2.):
     '''
-    :param X: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points 
+    :param X: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points
                 (i.e. with infinite coordinate).
     :param Y: (m x 2) numpy.array encoding the second diagram.
     :param matching: if True, computes and returns the optimal matching between X and Y, encoded as
                      a (n x 2) np.array  [...[i,j]...], meaning the i-th point in X is matched to
                      the j-th point in Y, with the convention (-1) represents the diagonal.
     :param order: exponent for Wasserstein; Default value is 2.
-    :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); 
+    :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2);
                        Default value is 2 (Euclidean norm).
-    :returns: the Wasserstein distance of order q (1 <= q < infinity) between persistence diagrams with 
+    :returns: the Wasserstein distance of order q (1 <= q < infinity) between persistence diagrams with
               respect to the internal_p-norm as ground metric.
               If matching is set to True, also returns the optimal matching between X and Y.
     '''
diff --git a/src/python/test/test_wasserstein_barycenter.py b/src/python/test/test_wasserstein_barycenter.py
index f686aef5..f68c748e 100755
--- a/src/python/test/test_wasserstein_barycenter.py
+++ b/src/python/test/test_wasserstein_barycenter.py
@@ -17,7 +17,7 @@ __license__ = "MIT"
 
 
 def test_lagrangian_barycenter():
-     
+
     dg1 = np.array([[0.2, 0.5]])
     dg2 = np.array([[0.2, 0.7]])
     dg3 = np.array([[0.3, 0.6], [0.7, 0.8], [0.2, 0.3]])
@@ -28,12 +28,12 @@ def test_lagrangian_barycenter():
 
     dg7 = np.array([[0.1, 0.15], [0.1, 0.7], [0.2, 0.22], [0.55, 0.84], [0.11, 0.91], [0.61, 0.75], [0.33, 0.46], [0.12, 0.41], [0.32, 0.48]])
     dg8 = np.array([[0., 4.], [4, 8]])
-   
+
     # error crit.
     eps = 1e-7
 
 
-    assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg1, dg2, dg3, dg4],init=3, verbose=False) - res) < eps 
+    assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg1, dg2, dg3, dg4],init=3, verbose=False) - res) < eps
     assert np.array_equal(lagrangian_barycenter(pdiagset=[dg4, dg5, dg6], verbose=False), np.empty(shape=(0,2)))
     assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg7], verbose=False) - dg7) < eps
     Y, log = lagrangian_barycenter(pdiagset=[dg4, dg8], verbose=True)
diff --git a/src/python/test/test_wasserstein_distance.py b/src/python/test/test_wasserstein_distance.py
index 0d70e11a..7e0d0f5f 100755
--- a/src/python/test/test_wasserstein_distance.py
+++ b/src/python/test/test_wasserstein_distance.py
@@ -70,7 +70,7 @@ def _basic_wasserstein(wasserstein_distance, delta, test_infinity=True, test_mat
         assert np.array_equal(match , [[0, -1], [1, -1]])
         match = wasserstein_distance(diag1, diag2, matching=True, internal_p=2., order=2.)[1]
         assert np.array_equal(match, [[0, 0], [1, 1], [2, -1]])
-        
+
 
 
 def hera_wrap(delta):
-- 
cgit v1.2.3