From cdc57712ca159f3044453cef41e31ebc03617a1b Mon Sep 17 00:00:00 2001
From: tlacombe <lacombe1993@gmail.com>
Date: Tue, 17 Mar 2020 10:55:14 +0100
Subject: removed _optimal_matching from barycenter as it is now handled by
 wasserstein_distance.

---
 src/python/gudhi/barycenter.py                 | 85 +++-----------------------
 src/python/test/test_wasserstein_barycenter.py |  2 +-
 2 files changed, 9 insertions(+), 78 deletions(-)

(limited to 'src/python')

diff --git a/src/python/gudhi/barycenter.py b/src/python/gudhi/barycenter.py
index 517cdb2f..0490fdd1 100644
--- a/src/python/gudhi/barycenter.py
+++ b/src/python/gudhi/barycenter.py
@@ -12,8 +12,7 @@ import ot
 import numpy as np
 import scipy.spatial.distance as sc
 
-from gudhi.wasserstein import _build_dist_matrix, _perstot
-
+from gudhi.wasserstein import wasserstein_distance, _perstot
 
 
 def _mean(x, m):
@@ -32,70 +31,6 @@ def _mean(x, m):
         return np.array([0, 0])
 
 
-def _optimal_matching(X, Y, withcost=False):
-    '''
-    :param X: numpy.array of size (n x 2)
-    :param Y: numpy.array of size (m x 2)
-    :param withcost: returns also the cost corresponding to the optimal matching
-    :returns: numpy.array of shape (k x 2) encoding the list of edges 
-                                            in the optimal matching. 
-                That is, [[i, j] ...], where (i,j) indicates 
-                that X[i] is matched to Y[j]
-                if i >= len(X) or j >= len(Y), it means they 
-                represent the diagonal.
-                They will be encoded by -1 afterwards.
-
-    NOTE : this code will be removed for final merge, 
-    and wasserstein.optimal_matching will be used instead.
-    '''
-
-    n = len(X)
-    m = len(Y)
-    # Start by handling empty diagrams. Could it be shorten?
-    if X.size == 0: # X is empty
-        if Y.size == 0: # Y is empty
-            res = np.array([[0,0]])  # the diagonal is matched to the diagonal
-            if withcost:
-                return res, 0
-            else:
-                return res
-        else: # X is empty but not Y
-            res = np.array([[0, i] for i in range(m)]) 
-            cost = _perstot(Y, order=2, internal_p=2)**2
-            if withcost:
-                return res, cost
-            else:
-                return res
-    elif Y.size == 0: # X is not empty but Y is empty
-        res = np.array([[i,0] for i in range(n)])        
-        cost = _perstot(X, order=2, internal_p=2)**2
-        if withcost:
-            return res, cost
-        else:
-            return res
-
-    # we know X, Y are not empty diags now
-    M = _build_dist_matrix(X, Y, order=2, internal_p=2)
-
-    a = np.ones(n+1)
-    a[-1] = m
-    b = np.ones(m+1)
-    b[-1] = n
-    P = ot.emd(a=a, b=b, M=M) 
-    # Note : it seems POT returns a permutation matrix in this situation, 
-    # ie a vertex of the constraint set (generically true).
-    if withcost:
-        cost = np.sum(np.multiply(P, M))
-    P[P < 0.5] = 0  # dirty trick to avoid some numerical issues... to improve.
-    res = np.argwhere(P)
-
-    # return the list of (i,j) such that P[i,j] > 0, 
-    #i.e. x_i is matched to y_j (should it be the diag). 
-    if withcost:
-        return res, cost
-    return res
-
-
 def lagrangian_barycenter(pdiagset, init=None, verbose=False):
     '''
     :param pdiagset: a list of size m containing numpy.array of shape (n x 2) 
@@ -166,16 +101,15 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False):
         # Step 1 : compute optimal matching (Y, X_i) for each X_i
         #          and create new points in Y if needed
         for i in range(m):
-            indices = _optimal_matching(Y, X[i])
+            _, indices = wasserstein_distance(Y, X[i], matching=True, order=2., internal_p=2.)
             for y_j, x_i_j in indices:
-                if y_j < K:  # we matched an off diagonal point to x_i_j...
-                # ...which is also an off-diagonal point.
-                    if x_i_j < nb_off_diag[i]:  
+                if y_j >= 0:  # we matched an off diagonal point to x_i_j...
+                    if x_i_j >= 0:  # ...which is also an off-diagonal point.
                         G[y_j, i] = x_i_j
                     else:  # ...which is a diagonal point
                         G[y_j, i] = -1  # -1 stands for the diagonal (mask)
                 else:  # We matched a diagonal point to x_i_j...
-                    if x_i_j < nb_off_diag[i]:  # which is a off-diag point ! 
+                    if x_i_j >= 0:  # which is a off-diag point ! 
                                                 # need to create new point in Y
                         new_y = _mean(np.array([X[i][x_i_j]]), m)
                         # Average this point with (m-1) copies of Delta
@@ -209,15 +143,12 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False):
         log = {}
         n_y = len(Y)
         for i in range(m):
-            edges, cost = _optimal_matching(Y, X[i], withcost=True)
-            n_x = len(X[i])
-            G = edges[np.where(edges[:,0]<n_y)]
-            idx = np.where(G[:,1] >= n_x)
-            G[idx,1] = -1  # -1 will encode the diagonal
-            groupings.append(G)
+            cost, edges = wasserstein_distance(Y, X[i], matching=True, order=2., internal_p=2.)
+            groupings.append(edges)
             energy += cost
             log["groupings"] = groupings
         energy = energy/m
+        print(energy)
         log["energy"] = energy
         log["nb_iter"] = nb_iter
 
diff --git a/src/python/test/test_wasserstein_barycenter.py b/src/python/test/test_wasserstein_barycenter.py
index 5167cb84..4d18616b 100755
--- a/src/python/test/test_wasserstein_barycenter.py
+++ b/src/python/test/test_wasserstein_barycenter.py
@@ -38,7 +38,7 @@ def test_lagrangian_barycenter():
     assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg7], verbose=False) - dg7) < eps
     Y, log = lagrangian_barycenter(pdiagset=[dg4, dg8], verbose=True)
     assert np.linalg.norm(Y - np.array([[1,3], [5, 7]])) < eps
-    assert np.abs(log["energy"] - 4) < eps
+    assert np.abs(log["energy"] - 2) < eps
     assert np.array_equal(log["groupings"][0] , np.array([[0, -1], [1, -1]]))
     assert np.array_equal(log["groupings"][1] , np.array([[0, 0], [1, 1]]))
     assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg8, dg4], init=np.array([[0.2, 0.6], [0.5, 0.7]]), verbose=False) - np.array([[1, 3], [5, 7]])) < eps
-- 
cgit v1.2.3