updated wasserstein.py ; added _ in front of private functions, added q=np.inf, added emptydiagram management.

author: tlacombe <lacombe1993@gmail.com> 2019-09-23 18:11:34 +0200
committer: tlacombe <lacombe1993@gmail.com> 2019-09-23 18:11:34 +0200
commit: 3c98951fd157fe750f7df5b29258a19d4d314c1e (patch)
tree: 47e9df5e7263acc3ab872d9090b74ed9935855ac /src
parent: 36dfb09493f56f666367df39e5d1a170e49a1a23 (diff)
1 files changed, 36 insertions, 15 deletions
diff --git a/src/python/gudhi/wasserstein.py b/src/python/gudhi/wasserstein.py
index cc527ed8..db42cc08 100644
--- a/src/python/gudhi/wasserstein.py
+++ b/src/python/gudhi/wasserstein.py
@@ -9,13 +9,13 @@ except ImportError:
     See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
     Author(s):       Theo Lacombe
 
-    Copyright (C) 2016 Inria
+    Copyright (C) 2019 Inria
 
     Modification(s):
       - YYYY/MM Author: Description of the modification
 """
 
-def proj_on_diag(X):
+def _proj_on_diag(X):
     '''
     param X: (n x 2) array encoding the points of a persistent diagram.
     return: (n x 2) arary encoding the (respective orthogonal) projections of the points onto the diagonal
@@ -24,7 +24,7 @@ def proj_on_diag(X):
     return np.array([Z , Z]).T
 
 
-def build_dist_matrix(X, Y, p=2., q=2.):
+def _build_dist_matrix(X, Y, p=2., q=2.):
     '''
     param X: (n x 2) np.array encoding the (points of the) first diagram.
     param Y: (m x 2) np.array encoding the second diagram.
@@ -34,10 +34,10 @@ def build_dist_matrix(X, Y, p=2., q=2.):
                 For 1 <= i <= n, 1 <= j <= m, C[i,j] encodes the distance between X[i] and Y[j], while C[i, m+1] (resp. C[n+1, j]) encodes the distance (to the p) between X[i] (resp Y[j]) and its orthogonal proj onto the diagonal.
                 note also that C[n+1, m+1] = 0  (it costs nothing to move from the diagonal to the diagonal).
     '''
-    Xdiag = proj_on_diag(X)
-    Ydiag = proj_on_diag(Y)
-    if np.isinf(p):
-        C = sc.cdist(X,Y, metric='chebyshev', p=q)**p
+    Xdiag = _proj_on_diag(X)
+    Ydiag = _proj_on_diag(Y)
+    if np.isinf(q):
+        C = sc.cdist(X,Y, metric='chebyshev')**p
         Cxd = np.linalg.norm(X - Xdiag, ord=q, axis=1)**p
         Cdy = np.linalg.norm(Y - Ydiag, ord=q, axis=1)**p
     else:
@@ -52,24 +52,45 @@ def build_dist_matrix(X, Y, p=2., q=2.):
     return Cf
 
 
+def _perstot(X, p, q):
+    '''
+    param X: (n x 2) numpy array (points of a given diagram)
+    param q: Ground metric on the (upper-half) plane (i.e. norm l_q in R^2); Default value is 2 (euclidean norm).
+    param p: exponent for Wasserstein; Default value is 2.
+    return: float, the total persistence of the diagram (that is, its distance to the empty diagram).    
+    '''
+    Xdiag = _proj_on_diag(X)
+    return (np.sum(np.linalg.norm(X - Xdiag, ord=q, axis=1)**p))**(1/p)
+
+
 def wasserstein_distance(X, Y, p=2., q=2.):
     '''
     param X, Y: (n x 2) and (m x 2) numpy array (points of persistence diagrams)
-    param q: Ground metric (i.e. norm l_q); Default value is 2 (euclidean norm).
+    param q: Ground metric on the (upper-half) plane (i.e. norm l_q in R^2); Default value is 2 (euclidean norm).
     param p: exponent for Wasserstein; Default value is 2.
     return: float, the p-Wasserstein distance (1 <= p < infty) with respect to the q-norm as ground metric.
     '''
-    M = build_dist_matrix(X, Y, p=p, q=q)
     n = len(X)
     m = len(Y)
-    a = 1.0 / (n + m) * np.ones(n)  # weight vector of the input diagram. Uniform here.
-    hat_a = np.append(a, m/(n+m))  # so that we have a probability measure, required by POT
-    b = 1.0 / (n + m) * np.ones(m)  # weight vector of the input diagram. Uniform here.
-    hat_b = np.append(b, n/(m+n))  # so that we have a probability measure, required by POT
+
+    # handle empty diagrams
+    if X.size == 0:
+        if Y.size == 0:
+            return 0.
+        else:
+            return _perstot(Y, p, q)
+    elif Y.size == 0:
+        return _perstot(X, p, q)
+
+    M = _build_dist_matrix(X, Y, p=p, q=q)
+    a = np.full(n+1, 1. / (n + m) )  # weight vector of the input diagram. Uniform here.
+    a[-1] = a[-1] * m                # normalized so that we have a probability measure, required by POT
+    b = np.full(m+1, 1. / (n + m) )  # weight vector of the input diagram. Uniform here.
+    b[-1] = b[-1] * n                # so that we have a probability measure, required by POT
 
     # Comptuation of the otcost using the ot.emd2 library.
     # Note: it is the squared Wasserstein distance.
-    ot_cost = (n+m) * ot.emd2(hat_a, hat_b, M)
+    ot_cost = (n+m) * ot.emd2(a, b, M)
 
-    return np.power(ot_cost, 1./p)
+    return ot_cost ** (1./p)
author	tlacombe <lacombe1993@gmail.com>	2019-09-23 18:11:34 +0200
committer	tlacombe <lacombe1993@gmail.com>	2019-09-23 18:11:34 +0200
commit	3c98951fd157fe750f7df5b29258a19d4d314c1e (patch)
tree	47e9df5e7263acc3ab872d9090b74ed9935855ac /src
parent	36dfb09493f56f666367df39e5d1a170e49a1a23 (diff)