summaryrefslogtreecommitdiff
path: root/src/python
diff options
context:
space:
mode:
authortlacombe <lacombe1993@gmail.com>2020-02-14 18:45:34 +0100
committertlacombe <lacombe1993@gmail.com>2020-02-14 18:45:34 +0100
commitf8fe3fdb01f6161b57da732a1c3f0c14a8b359a6 (patch)
treeb1666d18be60489ca6225823b4782b054a03eb11 /src/python
parent3eaba12b66518717e90ffb1e410b7f8d769719cf (diff)
moved import after docstring + reduce lines < 80 char
Diffstat (limited to 'src/python')
-rw-r--r--src/python/gudhi/barycenter.py99
1 files changed, 59 insertions, 40 deletions
diff --git a/src/python/gudhi/barycenter.py b/src/python/gudhi/barycenter.py
index a2af7a58..4a877b4a 100644
--- a/src/python/gudhi/barycenter.py
+++ b/src/python/gudhi/barycenter.py
@@ -1,9 +1,3 @@
-import ot
-import numpy as np
-import scipy.spatial.distance as sc
-
-from gudhi.wasserstein import _build_dist_matrix, _perstot
-
# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
# Author(s): Theo Lacombe
@@ -14,6 +8,13 @@ from gudhi.wasserstein import _build_dist_matrix, _perstot
# - YYYY/MM Author: Description of the modification
+import ot
+import numpy as np
+import scipy.spatial.distance as sc
+
+from gudhi.wasserstein import _build_dist_matrix, _perstot
+
+
def _proj_on_diag(w):
'''
Util function to project a point on the diag.
@@ -24,7 +25,8 @@ def _proj_on_diag(w):
def _mean(x, m):
"""
:param x: a list of 2D-points, off diagonal, x_0... x_{k-1}
- :param m: total amount of points taken into account, that is we have (m-k) copies of diagonal
+ :param m: total amount of points taken into account,
+ that is we have (m-k) copies of diagonal
:returns: the weighted mean of x with (m-k) copies of the diagonal
"""
k = len(x)
@@ -40,11 +42,14 @@ def _optimal_matching(X, Y, withcost=False):
"""
:param X: numpy.array of size (n x 2)
:param Y: numpy.array of size (m x 2)
- :param withcost: returns also the cost corresponding to this optimal matching
- :returns: numpy.array of shape (k x 2) encoding the list of edges in the optimal matching.
- That is, [[i, j] ...], where (i,j) indicates that X[i] is matched to Y[j]
- if i >= len(X) or j >= len(Y), it means they represent the diagonal.
- They will be encoded by -1 afterwards.
+ :param withcost: returns also the cost corresponding to the optimal matching
+ :returns: numpy.array of shape (k x 2) encoding the list of edges
+ in the optimal matching.
+ That is, [[i, j] ...], where (i,j) indicates
+ that X[i] is matched to Y[j]
+ if i >= len(X) or j >= len(Y), it means they
+ represent the diagonal.
+ They will be encoded by -1 afterwards.
"""
n = len(X)
@@ -52,7 +57,7 @@ def _optimal_matching(X, Y, withcost=False):
# Start by handling empty diagrams. Could it be shorten?
if X.size == 0: # X is empty
if Y.size == 0: # Y is empty
- res = np.array([[0,0]]) # the diagonal is matched to the diagonal and that's it...
+ res = np.array([[0,0]]) # the diagonal is matched to the diagonal
if withcost:
return res, 0
else:
@@ -75,18 +80,20 @@ def _optimal_matching(X, Y, withcost=False):
# we know X, Y are not empty diags now
M = _build_dist_matrix(X, Y, order=2, internal_p=2)
- a = np.full(n+1, 1. / (n + m) ) # weight vector of the input diagram. Uniform here.
- a[-1] = a[-1] * m # normalized so that we have a probability measure, required by POT
- b = np.full(m+1, 1. / (n + m) ) # weight vector of the input diagram. Uniform here.
- b[-1] = b[-1] * n # so that we have a probability measure, required by POT
+ a = np.full(n+1, 1. / (n + m) )
+ a[-1] = a[-1] * m
+ b = np.full(m+1, 1. / (n + m) )
+ b[-1] = b[-1] * n
P = ot.emd(a=a, b=b, M=M)*(n+m)
- # Note : it seems POT returns a permutation matrix in this situation, ie a vertex of the constraint set (generically true).
+ # Note : it seems POT returns a permutation matrix in this situation,
+ # ie a vertex of the constraint set (generically true).
if withcost:
cost = np.sum(np.multiply(P, M))
- P[P < 0.5] = 0 # dirty trick to avoid some numerical issues... to be improved.
+ P[P < 0.5] = 0 # dirty trick to avoid some numerical issues... to improve.
res = np.nonzero(P)
- # return the list of (i,j) such that P[i,j] > 0, i.e. x_i is matched to y_j (should it be the diag).
+ # return the list of (i,j) such that P[i,j] > 0,
+ #i.e. x_i is matched to y_j (should it be the diag).
if withcost:
return np.column_stack(res), cost
@@ -103,31 +110,38 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False):
persistence diagrams with only finite coordinates.
:param init: The initial value for barycenter estimate.
If None, init is made on a random diagram from the dataset.
- Otherwise, it must be an int (then we init with diagset[init])
- or a (n x 2) numpy.array enconding a persistence diagram with n points.
+ Otherwise, it must be an int
+ (then we init with diagset[init])
+ or a (n x 2) numpy.array enconding
+ a persistence diagram with n points.
:param verbose: if True, returns additional information about the
barycenter.
:returns: If not verbose (default), a numpy.array encoding
- the barycenter estimate (local minima of the energy function).
+ the barycenter estimate
+ (local minima of the energy function).
If verbose, returns a couple (Y, log)
where Y is the barycenter estimate,
and log is a dict that contains additional informations:
- groupings, a list of list of pairs (i,j),
- That is, G[k] = [(i, j) ...], where (i,j) indicates that X[i] is matched to Y[j]
- if i > len(X) or j > len(Y), it means they represent the diagonal.
- - energy, a float representing the Frechet energy value obtained,
- that is the mean of squared distances of observations to the output.
- - nb_iter, integer representing the number of iterations performed before convergence
- of the algorithm.
+ That is, G[k] = [(i, j) ...], where (i,j) indicates
+ that X[i] is matched to Y[j]
+ if i > len(X) or j > len(Y), it means they
+ represent the diagonal.
+ - energy, a float representing the Frechet
+ energy value obtained,
+ that is the mean of squared distances
+ of observations to the output.
+ - nb_iter, integer representing the number of iterations
+ performed before convergence of the algorithm.
"""
X = pdiagset # to shorten notations, not a copy
m = len(X) # number of diagrams we are averaging
if m == 0:
print("Warning: computing barycenter of empty diag set. Returns None")
return None
-
- nb_off_diag = np.array([len(X_i) for X_i in X]) # store the number of off-diagonal point for each of the X_i
-
+
+ # store the number of off-diagonal point for each of the X_i
+ nb_off_diag = np.array([len(X_i) for X_i in X])
# Initialisation of barycenter
if init is None:
i0 = np.random.randint(m) # Index of first state for the barycenter
@@ -144,7 +158,9 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False):
while not converged:
nb_iter += 1
K = len(Y) # current nb of points in Y (some might be on diagonal)
- G = np.zeros((K, m), dtype=int)-1 # will store for each j, the (index) point matched in each other diagram (might be the diagonal).
+ G = np.zeros((K, m), dtype=int)-1 # will store for each j, the (index)
+ # point matched in each other diagram
+ #(might be the diagonal).
# that is G[j, i] = k <=> y_j is matched to
# x_k in the diagram i-th diagram X[i]
updated_points = np.zeros((K, 2)) # will store the new positions of
@@ -159,16 +175,19 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False):
indices = _optimal_matching(Y, X[i])
for y_j, x_i_j in indices:
if y_j < K: # we matched an off diagonal point to x_i_j...
- if x_i_j < nb_off_diag[i]: # ...which is also an off-diagonal point
+ # ...which is also an off-diagonal point.
+ if x_i_j < nb_off_diag[i]:
G[y_j, i] = x_i_j
else: # ...which is a diagonal point
G[y_j, i] = -1 # -1 stands for the diagonal (mask)
else: # We matched a diagonal point to x_i_j...
- if x_i_j < nb_off_diag[i]: # which is a off-diag point ! so we need to create a new point in Y
- new_y = _mean(np.array([X[i][x_i_j]]), m) # Average this point with (m-1) copies of Delta
+ if x_i_j < nb_off_diag[i]: # which is a off-diag point !
+ # need to create new point in Y
+ new_y = _mean(np.array([X[i][x_i_j]]), m)
+ # Average this point with (m-1) copies of Delta
new_created_points.append(new_y)
- # Step 2 : Update current point position thanks to the groupings computed
+ # Step 2 : Update current point position thanks to groupings computed
to_delete = []
for j in range(K):
matched_points = [X[i][G[j, i]] for i in range(m) if G[j, i] > -1]
@@ -178,10 +197,10 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False):
else: # this points is no longer of any use.
to_delete.append(j)
# we remove the point to be deleted now.
- updated_points = np.delete(updated_points, to_delete, axis=0) # cannot be done in-place.
-
+ updated_points = np.delete(updated_points, to_delete, axis=0)
- if new_created_points: # we cannot converge if there have been new created points.
+ # we cannot converge if there have been new created points.
+ if new_created_points:
Y = np.concatenate((updated_points, new_created_points))
else:
# Step 3 : we check convergence