summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarc Glisse <marc.glisse@inria.fr>2020-04-06 19:37:58 +0200
committerMarc Glisse <marc.glisse@inria.fr>2020-04-06 19:37:58 +0200
commitdd96965e521313b6210391f511c82cced9b2a950 (patch)
tree6f7d28b0b3dce6df1d62e1bfbf29dc424eb8bba5
parent689563c163c453dea8ca50b4ec6f171a61d28301 (diff)
Remove trailing whitespace
-rw-r--r--src/python/doc/wasserstein_distance_user.rst72
-rw-r--r--src/python/gudhi/wasserstein/barycenter.py42
-rw-r--r--src/python/gudhi/wasserstein/wasserstein.py14
-rwxr-xr-xsrc/python/test/test_wasserstein_barycenter.py6
-rwxr-xr-xsrc/python/test/test_wasserstein_distance.py2
5 files changed, 68 insertions, 68 deletions
diff --git a/src/python/doc/wasserstein_distance_user.rst b/src/python/doc/wasserstein_distance_user.rst
index b821b6fa..c24da74d 100644
--- a/src/python/doc/wasserstein_distance_user.rst
+++ b/src/python/doc/wasserstein_distance_user.rst
@@ -10,10 +10,10 @@ Definition
.. include:: wasserstein_distance_sum.inc
The q-Wasserstein distance is defined as the minimal value achieved
-by a perfect matching between the points of the two diagrams (+ all
-diagonal points), where the value of a matching is defined as the
+by a perfect matching between the points of the two diagrams (+ all
+diagonal points), where the value of a matching is defined as the
q-th root of the sum of all edge lengths to the power q. Edge lengths
-are measured in norm p, for :math:`1 \leq p \leq \infty`.
+are measured in norm p, for :math:`1 \leq p \leq \infty`.
Distance Functions
------------------
@@ -54,9 +54,9 @@ The output is:
Wasserstein distance value = 1.45
-We can also have access to the optimal matching by letting `matching=True`.
+We can also have access to the optimal matching by letting `matching=True`.
It is encoded as a list of indices (i,j), meaning that the i-th point in X
-is mapped to the j-th point in Y.
+is mapped to the j-th point in Y.
An index of -1 represents the diagonal.
.. testcode::
@@ -84,7 +84,7 @@ An index of -1 represents the diagonal.
The output is:
.. testoutput::
-
+
Wasserstein distance value = 2.15
point 0 in dgm1 is matched to point 0 in dgm2
point 1 in dgm1 is matched to point 2 in dgm2
@@ -94,32 +94,32 @@ The output is:
Barycenters
-----------
-A Frechet mean (or barycenter) is a generalization of the arithmetic
-mean in a non linear space such as the one of persistence diagrams.
-Given a set of persistence diagrams :math:`\mu_1 \dots \mu_n`, it is
-defined as a minimizer of the variance functional, that is of
-:math:`\mu \mapsto \sum_{i=1}^n d_2(\mu,\mu_i)^2`.
-where :math:`d_2` denotes the Wasserstein-2 distance between
-persistence diagrams.
-It is known to exist and is generically unique. However, an exact
-computation is in general untractable. Current implementation
-available is based on (Turner et al., 2014),
+A Frechet mean (or barycenter) is a generalization of the arithmetic
+mean in a non linear space such as the one of persistence diagrams.
+Given a set of persistence diagrams :math:`\mu_1 \dots \mu_n`, it is
+defined as a minimizer of the variance functional, that is of
+:math:`\mu \mapsto \sum_{i=1}^n d_2(\mu,\mu_i)^2`.
+where :math:`d_2` denotes the Wasserstein-2 distance between
+persistence diagrams.
+It is known to exist and is generically unique. However, an exact
+computation is in general untractable. Current implementation
+available is based on (Turner et al., 2014),
:cite:`turner2014frechet`
-and uses an EM-scheme to
-provide a local minimum of the variance functional (somewhat similar
-to the Lloyd algorithm to estimate a solution to the k-means
+and uses an EM-scheme to
+provide a local minimum of the variance functional (somewhat similar
+to the Lloyd algorithm to estimate a solution to the k-means
problem). The local minimum returned depends on the initialization of
-the barycenter.
-The combinatorial structure of the algorithm limits its
-performances on large scale problems (thousands of diagrams and of points
-per diagram).
+the barycenter.
+The combinatorial structure of the algorithm limits its
+performances on large scale problems (thousands of diagrams and of points
+per diagram).
+
+.. figure::
+ ./img/barycenter.png
+ :figclass: align-center
-.. figure::
- ./img/barycenter.png
- :figclass: align-center
-
- Illustration of Frechet mean between persistence
- diagrams.
+ Illustration of Frechet mean between persistence
+ diagrams.
.. autofunction:: gudhi.wasserstein.barycenter.lagrangian_barycenter
@@ -127,16 +127,16 @@ per diagram).
Basic example
*************
-This example estimates the Frechet mean (aka Wasserstein barycenter) between
+This example estimates the Frechet mean (aka Wasserstein barycenter) between
four persistence diagrams.
It is initialized on the 4th diagram.
-As the algorithm is not convex, its output depends on the initialization and
+As the algorithm is not convex, its output depends on the initialization and
is only a local minimum of the objective function.
-Initialization can be either given as an integer (in which case the i-th
-diagram of the list is used as initial estimate) or as a diagram.
-If None, it will randomly select one of the diagrams of the list
+Initialization can be either given as an integer (in which case the i-th
+diagram of the list is used as initial estimate) or as a diagram.
+If None, it will randomly select one of the diagrams of the list
as initial estimate.
-Note that persistence diagrams must be submitted as
+Note that persistence diagrams must be submitted as
(n x 2) numpy arrays and must not contain inf values.
@@ -152,7 +152,7 @@ Note that persistence diagrams must be submitted as
pdiagset = [dg1, dg2, dg3, dg4]
bary = lagrangian_barycenter(pdiagset=pdiagset,init=3)
- message = "Wasserstein barycenter estimated:"
+ message = "Wasserstein barycenter estimated:"
print(message)
print(bary)
diff --git a/src/python/gudhi/wasserstein/barycenter.py b/src/python/gudhi/wasserstein/barycenter.py
index 99f29a1e..de7aea81 100644
--- a/src/python/gudhi/wasserstein/barycenter.py
+++ b/src/python/gudhi/wasserstein/barycenter.py
@@ -18,7 +18,7 @@ from gudhi.wasserstein import wasserstein_distance
def _mean(x, m):
'''
:param x: a list of 2D-points, off diagonal, x_0... x_{k-1}
- :param m: total amount of points taken into account,
+ :param m: total amount of points taken into account,
that is we have (m-k) copies of diagonal
:returns: the weighted mean of x with (m-k) copies of the diagonal
'''
@@ -33,14 +33,14 @@ def _mean(x, m):
def lagrangian_barycenter(pdiagset, init=None, verbose=False):
'''
- :param pdiagset: a list of ``numpy.array`` of shape `(n x 2)`
- (`n` can variate), encoding a set of
- persistence diagrams with only finite coordinates.
- :param init: The initial value for barycenter estimate.
- If ``None``, init is made on a random diagram from the dataset.
- Otherwise, it can be an ``int``
+ :param pdiagset: a list of ``numpy.array`` of shape `(n x 2)`
+ (`n` can variate), encoding a set of
+ persistence diagrams with only finite coordinates.
+ :param init: The initial value for barycenter estimate.
+ If ``None``, init is made on a random diagram from the dataset.
+ Otherwise, it can be an ``int``
(then initialization is made on ``pdiagset[init]``)
- or a `(n x 2)` ``numpy.array`` enconding
+ or a `(n x 2)` ``numpy.array`` enconding
a persistence diagram with `n` points.
:type init: ``int``, or (n x 2) ``np.array``
:param verbose: if ``True``, returns additional information about the
@@ -48,16 +48,16 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False):
:type verbose: boolean
:returns: If not verbose (default), a ``numpy.array`` encoding
the barycenter estimate of pdiagset
- (local minimum of the energy function).
+ (local minimum of the energy function).
If ``pdiagset`` is empty, returns ``None``.
If verbose, returns a couple ``(Y, log)``
where ``Y`` is the barycenter estimate,
and ``log`` is a ``dict`` that contains additional informations:
- `"groupings"`, a list of list of pairs ``(i,j)``.
- Namely, ``G[k] = [...(i, j)...]``, where ``(i,j)`` indicates
+ Namely, ``G[k] = [...(i, j)...]``, where ``(i,j)`` indicates
that ``pdiagset[k][i]`` is matched to ``Y[j]``
- if ``i = -1`` or ``j = -1``, it means they
+ if ``i = -1`` or ``j = -1``, it means they
represent the diagonal.
- `"energy"`, ``float`` representing the Frechet energy value obtained.
@@ -70,13 +70,13 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False):
if m == 0:
print("Warning: computing barycenter of empty diag set. Returns None")
return None
-
+
# store the number of off-diagonal point for each of the X_i
- nb_off_diag = np.array([len(X_i) for X_i in X])
+ nb_off_diag = np.array([len(X_i) for X_i in X])
# Initialisation of barycenter
if init is None:
i0 = np.random.randint(m) # Index of first state for the barycenter
- Y = X[i0].copy()
+ Y = X[i0].copy()
else:
if type(init)==int:
Y = X[init].copy()
@@ -90,8 +90,8 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False):
nb_iter += 1
K = len(Y) # current nb of points in Y (some might be on diagonal)
G = np.full((K, m), -1, dtype=int) # will store for each j, the (index)
- # point matched in each other diagram
- #(might be the diagonal).
+ # point matched in each other diagram
+ #(might be the diagonal).
# that is G[j, i] = k <=> y_j is matched to
# x_k in the diagram i-th diagram X[i]
updated_points = np.zeros((K, 2)) # will store the new positions of
@@ -111,7 +111,7 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False):
else: # ...which is a diagonal point
G[y_j, i] = -1 # -1 stands for the diagonal (mask)
else: # We matched a diagonal point to x_i_j...
- if x_i_j >= 0: # which is a off-diag point !
+ if x_i_j >= 0: # which is a off-diag point !
# need to create new point in Y
new_y = _mean(np.array([X[i][x_i_j]]), m)
# Average this point with (m-1) copies of Delta
@@ -123,19 +123,19 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False):
matched_points = [X[i][G[j, i]] for i in range(m) if G[j, i] > -1]
new_y_j = _mean(matched_points, m)
if not np.array_equal(new_y_j, np.array([0,0])):
- updated_points[j] = new_y_j
+ updated_points[j] = new_y_j
else: # this points is no longer of any use.
to_delete.append(j)
# we remove the point to be deleted now.
- updated_points = np.delete(updated_points, to_delete, axis=0)
+ updated_points = np.delete(updated_points, to_delete, axis=0)
# we cannot converge if there have been new created points.
- if new_created_points:
+ if new_created_points:
Y = np.concatenate((updated_points, new_created_points))
else:
# Step 3 : we check convergence
if np.array_equal(updated_points, Y):
- converged = True
+ converged = True
Y = updated_points
diff --git a/src/python/gudhi/wasserstein/wasserstein.py b/src/python/gudhi/wasserstein/wasserstein.py
index e1233eec..35315939 100644
--- a/src/python/gudhi/wasserstein/wasserstein.py
+++ b/src/python/gudhi/wasserstein/wasserstein.py
@@ -30,9 +30,9 @@ def _build_dist_matrix(X, Y, order=2., internal_p=2.):
:param Y: (m x 2) numpy.array encoding the second diagram.
:param order: exponent for the Wasserstein metric.
:param internal_p: Ground metric (i.e. norm L^p).
- :returns: (n+1) x (m+1) np.array encoding the cost matrix C.
- For 0 <= i < n, 0 <= j < m, C[i,j] encodes the distance between X[i] and Y[j],
- while C[i, m] (resp. C[n, j]) encodes the distance (to the p) between X[i] (resp Y[j])
+ :returns: (n+1) x (m+1) np.array encoding the cost matrix C.
+ For 0 <= i < n, 0 <= j < m, C[i,j] encodes the distance between X[i] and Y[j],
+ while C[i, m] (resp. C[n, j]) encodes the distance (to the p) between X[i] (resp Y[j])
and its orthogonal projection onto the diagonal.
note also that C[n, m] = 0 (it costs nothing to move from the diagonal to the diagonal).
'''
@@ -59,7 +59,7 @@ def _perstot(X, order, internal_p):
:param X: (n x 2) numpy.array (points of a given diagram).
:param order: exponent for Wasserstein. Default value is 2.
:param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); Default value is 2 (Euclidean norm).
- :returns: float, the total persistence of the diagram (that is, its distance to the empty diagram).
+ :returns: float, the total persistence of the diagram (that is, its distance to the empty diagram).
'''
Xdiag = _proj_on_diag(X)
return (np.sum(np.linalg.norm(X - Xdiag, ord=internal_p, axis=1)**order))**(1./order)
@@ -67,16 +67,16 @@ def _perstot(X, order, internal_p):
def wasserstein_distance(X, Y, matching=False, order=2., internal_p=2.):
'''
- :param X: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points
+ :param X: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points
(i.e. with infinite coordinate).
:param Y: (m x 2) numpy.array encoding the second diagram.
:param matching: if True, computes and returns the optimal matching between X and Y, encoded as
a (n x 2) np.array [...[i,j]...], meaning the i-th point in X is matched to
the j-th point in Y, with the convention (-1) represents the diagonal.
:param order: exponent for Wasserstein; Default value is 2.
- :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2);
+ :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2);
Default value is 2 (Euclidean norm).
- :returns: the Wasserstein distance of order q (1 <= q < infinity) between persistence diagrams with
+ :returns: the Wasserstein distance of order q (1 <= q < infinity) between persistence diagrams with
respect to the internal_p-norm as ground metric.
If matching is set to True, also returns the optimal matching between X and Y.
'''
diff --git a/src/python/test/test_wasserstein_barycenter.py b/src/python/test/test_wasserstein_barycenter.py
index f686aef5..f68c748e 100755
--- a/src/python/test/test_wasserstein_barycenter.py
+++ b/src/python/test/test_wasserstein_barycenter.py
@@ -17,7 +17,7 @@ __license__ = "MIT"
def test_lagrangian_barycenter():
-
+
dg1 = np.array([[0.2, 0.5]])
dg2 = np.array([[0.2, 0.7]])
dg3 = np.array([[0.3, 0.6], [0.7, 0.8], [0.2, 0.3]])
@@ -28,12 +28,12 @@ def test_lagrangian_barycenter():
dg7 = np.array([[0.1, 0.15], [0.1, 0.7], [0.2, 0.22], [0.55, 0.84], [0.11, 0.91], [0.61, 0.75], [0.33, 0.46], [0.12, 0.41], [0.32, 0.48]])
dg8 = np.array([[0., 4.], [4, 8]])
-
+
# error crit.
eps = 1e-7
- assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg1, dg2, dg3, dg4],init=3, verbose=False) - res) < eps
+ assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg1, dg2, dg3, dg4],init=3, verbose=False) - res) < eps
assert np.array_equal(lagrangian_barycenter(pdiagset=[dg4, dg5, dg6], verbose=False), np.empty(shape=(0,2)))
assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg7], verbose=False) - dg7) < eps
Y, log = lagrangian_barycenter(pdiagset=[dg4, dg8], verbose=True)
diff --git a/src/python/test/test_wasserstein_distance.py b/src/python/test/test_wasserstein_distance.py
index 0d70e11a..7e0d0f5f 100755
--- a/src/python/test/test_wasserstein_distance.py
+++ b/src/python/test/test_wasserstein_distance.py
@@ -70,7 +70,7 @@ def _basic_wasserstein(wasserstein_distance, delta, test_infinity=True, test_mat
assert np.array_equal(match , [[0, -1], [1, -1]])
match = wasserstein_distance(diag1, diag2, matching=True, internal_p=2., order=2.)[1]
assert np.array_equal(match, [[0, 0], [1, 1], [2, -1]])
-
+
def hera_wrap(delta):