moved import after docstring + reduce lines < 80 char

author: tlacombe <lacombe1993@gmail.com> 2020-02-14 18:45:34 +0100
committer: tlacombe <lacombe1993@gmail.com> 2020-02-14 18:45:34 +0100
commit: f8fe3fdb01f6161b57da732a1c3f0c14a8b359a6 (patch)
tree: b1666d18be60489ca6225823b4782b054a03eb11 /src/python
parent: 3eaba12b66518717e90ffb1e410b7f8d769719cf (diff)
1 files changed, 59 insertions, 40 deletions
diff --git a/src/python/gudhi/barycenter.py b/src/python/gudhi/barycenter.py
index a2af7a58..4a877b4a 100644
--- a/src/python/gudhi/barycenter.py
+++ b/src/python/gudhi/barycenter.py
@@ -1,9 +1,3 @@
-import ot
-import numpy as np
-import scipy.spatial.distance as sc
-
-from gudhi.wasserstein import _build_dist_matrix, _perstot
-
 # This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
 # See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
 # Author(s):       Theo Lacombe
@@ -14,6 +8,13 @@ from gudhi.wasserstein import _build_dist_matrix, _perstot
 #   - YYYY/MM Author: Description of the modification
 
 
+import ot
+import numpy as np
+import scipy.spatial.distance as sc
+
+from gudhi.wasserstein import _build_dist_matrix, _perstot
+
+
 def _proj_on_diag(w):
     '''
         Util function to project a point on the diag.
@@ -24,7 +25,8 @@ def _proj_on_diag(w):
 def _mean(x, m):
     """
     :param x: a list of 2D-points, off diagonal, x_0... x_{k-1}
-    :param m: total amount of points taken into account, that is we have (m-k) copies of diagonal
+    :param m: total amount of points taken into account, 
+                that is we have (m-k) copies of diagonal
     :returns: the weighted mean of x with (m-k) copies of the diagonal
     """
     k = len(x)
@@ -40,11 +42,14 @@ def _optimal_matching(X, Y, withcost=False):
     """
     :param X: numpy.array of size (n x 2)
     :param Y: numpy.array of size (m x 2)
-    :param withcost: returns also the cost corresponding to this optimal matching
-    :returns: numpy.array of shape (k x 2) encoding the list of edges in the optimal matching. 
-                That is, [[i, j] ...], where (i,j) indicates that X[i] is matched to Y[j]
-                if i >= len(X) or j >= len(Y), it means they represent the diagonal.
-                                            They will be encoded by -1 afterwards.
+    :param withcost: returns also the cost corresponding to the optimal matching
+    :returns: numpy.array of shape (k x 2) encoding the list of edges 
+                                            in the optimal matching. 
+                That is, [[i, j] ...], where (i,j) indicates 
+                that X[i] is matched to Y[j]
+                if i >= len(X) or j >= len(Y), it means they 
+                represent the diagonal.
+                They will be encoded by -1 afterwards.
     """
 
     n = len(X)
@@ -52,7 +57,7 @@ def _optimal_matching(X, Y, withcost=False):
     # Start by handling empty diagrams. Could it be shorten?
     if X.size == 0: # X is empty
         if Y.size == 0: # Y is empty
-            res = np.array([[0,0]])  # the diagonal is matched to the diagonal and that's it...
+            res = np.array([[0,0]])  # the diagonal is matched to the diagonal
             if withcost:
                 return res, 0
             else:
@@ -75,18 +80,20 @@ def _optimal_matching(X, Y, withcost=False):
     # we know X, Y are not empty diags now
     M = _build_dist_matrix(X, Y, order=2, internal_p=2)
 
-    a = np.full(n+1, 1. / (n + m) )  # weight vector of the input diagram. Uniform here.
-    a[-1] = a[-1] * m                # normalized so that we have a probability measure, required by POT
-    b = np.full(m+1, 1. / (n + m) )  # weight vector of the input diagram. Uniform here.
-    b[-1] = b[-1] * n                # so that we have a probability measure, required by POT
+    a = np.full(n+1, 1. / (n + m) )  
+    a[-1] = a[-1] * m                
+    b = np.full(m+1, 1. / (n + m) )  
+    b[-1] = b[-1] * n                
     P = ot.emd(a=a, b=b, M=M)*(n+m)
-    # Note : it seems POT returns a permutation matrix in this situation, ie a vertex of the constraint set (generically true).
+    # Note : it seems POT returns a permutation matrix in this situation, 
+    # ie a vertex of the constraint set (generically true).
     if withcost:
         cost = np.sum(np.multiply(P, M))
-    P[P < 0.5] = 0  # dirty trick to avoid some numerical issues... to be improved.
+    P[P < 0.5] = 0  # dirty trick to avoid some numerical issues... to improve.
     res = np.nonzero(P)
 
-    # return the list of (i,j) such that P[i,j] > 0, i.e. x_i is matched to y_j (should it be the diag). 
+    # return the list of (i,j) such that P[i,j] > 0, 
+    #i.e. x_i is matched to y_j (should it be the diag). 
     if withcost:
         return np.column_stack(res), cost
     
@@ -103,31 +110,38 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False):
                         persistence diagrams with only finite coordinates. 
     :param init: The initial value for barycenter estimate. 
                     If None, init is made on a random diagram from the dataset. 
-                    Otherwise, it must be an int (then we init with diagset[init])
-                    or a (n x 2) numpy.array enconding a persistence diagram with n points.
+                    Otherwise, it must be an int 
+                    (then we init with diagset[init])
+                    or a (n x 2) numpy.array enconding 
+                    a persistence diagram with n points.
     :param verbose: if True, returns additional information about the
                         barycenter.
     :returns: If not verbose (default), a numpy.array encoding
-                    the barycenter estimate (local minima of the energy function). 
+                    the barycenter estimate 
+                    (local minima of the energy function). 
                     If verbose, returns a couple (Y, log)
                     where Y is the barycenter estimate,
                     and log is a dict that contains additional informations:
                     - groupings, a list of list of pairs (i,j),
-                        That is, G[k] = [(i, j) ...], where (i,j) indicates that X[i] is matched to Y[j]
-                        if i > len(X) or j > len(Y), it means they represent the diagonal.
-                    - energy, a float representing the Frechet energy value obtained,
-                                that is the mean of squared distances of observations to the output.
-                    - nb_iter, integer representing the number of iterations performed before convergence
-                                        of the algorithm.
+                        That is, G[k] = [(i, j) ...], where (i,j) indicates 
+                        that X[i] is matched to Y[j]
+                        if i > len(X) or j > len(Y), it means they 
+                        represent the diagonal.
+                    - energy, a float representing the Frechet 
+                                energy value obtained,
+                                that is the mean of squared distances 
+                                of observations to the output.
+                    - nb_iter, integer representing the number of iterations 
+                               performed before convergence of the algorithm.
     """
     X = pdiagset  # to shorten notations, not a copy
     m = len(X)  # number of diagrams we are averaging
     if m == 0:
         print("Warning: computing barycenter of empty diag set. Returns None")
         return None
-
-    nb_off_diag = np.array([len(X_i) for X_i in X])  # store the number of off-diagonal point for each of the X_i
-
+    
+    # store the number of off-diagonal point for each of the X_i
+    nb_off_diag = np.array([len(X_i) for X_i in X])  
     # Initialisation of barycenter
     if init is None:
         i0 = np.random.randint(m)  # Index of first state for the barycenter
@@ -144,7 +158,9 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False):
     while not converged:
         nb_iter += 1
         K = len(Y)  # current nb of points in Y (some might be on diagonal)
-        G = np.zeros((K, m), dtype=int)-1  # will store for each j, the (index) point matched in each other diagram (might be the diagonal). 
+        G = np.zeros((K, m), dtype=int)-1  # will store for each j, the (index)
+                              # point matched in each other diagram 
+                              #(might be the diagonal). 
                               # that is G[j, i] = k <=> y_j is matched to
                               # x_k in the diagram i-th diagram X[i]
         updated_points = np.zeros((K, 2))  # will store the new positions of
@@ -159,16 +175,19 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False):
             indices = _optimal_matching(Y, X[i])
             for y_j, x_i_j in indices:
                 if y_j < K:  # we matched an off diagonal point to x_i_j...
-                    if x_i_j < nb_off_diag[i]:  # ...which is also an off-diagonal point
+                # ...which is also an off-diagonal point.
+                    if x_i_j < nb_off_diag[i]:  
                         G[y_j, i] = x_i_j
                     else:  # ...which is a diagonal point
                         G[y_j, i] = -1  # -1 stands for the diagonal (mask)
                 else:  # We matched a diagonal point to x_i_j...
-                    if x_i_j < nb_off_diag[i]:  # which is a off-diag point ! so we need to create a new point in Y
-                        new_y = _mean(np.array([X[i][x_i_j]]), m)  # Average this point with (m-1) copies of Delta
+                    if x_i_j < nb_off_diag[i]:  # which is a off-diag point ! 
+                                                # need to create new point in Y
+                        new_y = _mean(np.array([X[i][x_i_j]]), m)
+                        # Average this point with (m-1) copies of Delta
                         new_created_points.append(new_y)
 
-        # Step 2 : Update current point position thanks to the groupings computed
+        # Step 2 : Update current point position thanks to groupings computed
         to_delete = []
         for j in range(K):
             matched_points = [X[i][G[j, i]] for i in range(m) if G[j, i] > -1]
@@ -178,10 +197,10 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False):
             else: # this points is no longer of any use.
                 to_delete.append(j)
         # we remove the point to be deleted now.
-        updated_points = np.delete(updated_points, to_delete, axis=0)  # cannot be done in-place.
-
+        updated_points = np.delete(updated_points, to_delete, axis=0)  
 
-        if new_created_points: # we cannot converge if there have been new created points.
+        # we cannot converge if there have been new created points.
+        if new_created_points: 
             Y = np.concatenate((updated_points, new_created_points))
         else:
             # Step 3 : we check convergence
author	tlacombe <lacombe1993@gmail.com>	2020-02-14 18:45:34 +0100
committer	tlacombe <lacombe1993@gmail.com>	2020-02-14 18:45:34 +0100
commit	f8fe3fdb01f6161b57da732a1c3f0c14a8b359a6 (patch)
tree	b1666d18be60489ca6225823b4782b054a03eb11 /src/python
parent	3eaba12b66518717e90ffb1e410b7f8d769719cf (diff)