From cce93208f383969d718c92c526c5e834cd3a2733 Mon Sep 17 00:00:00 2001 From: tlacombe Date: Fri, 18 Oct 2019 22:43:09 +0200 Subject: commit first draft of barycenter.py --- src/python/gudhi/barycenter.py | 187 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 187 insertions(+) create mode 100644 src/python/gudhi/barycenter.py (limited to 'src') diff --git a/src/python/gudhi/barycenter.py b/src/python/gudhi/barycenter.py new file mode 100644 index 00000000..c46f6926 --- /dev/null +++ b/src/python/gudhi/barycenter.py @@ -0,0 +1,187 @@ +import ot +import numpy as np +import matplotlib.pyplot as plt +from matplotlib.patches import Polygon + +def _proj_on_diag(x): + return np.array([(x[0] + x[1]) / 2, (x[0] + x[1]) / 2]) + + +def _norm2(x, y): + return (y[0] - x[0])**2 + (y[1] - x[1])**2 + + +def _norm_inf(x, y): + return np.max(np.abs(y[0] - x[0]), np.abs(y[1] - x[1])) + + +def _cost_matrix(X, Y): + """ + :param X: (n x 2) numpy.array encoding the first diagram + :param Y: (m x 2) numpy.array encoding the second diagram + :return: The cost matrix with size (k x k) where k = |d_1| + |d_2| in order to encode matching to diagonal + """ + n, m = len(X), len(Y) + k = n + m + M = np.zeros((k, k)) + for i in range(n): # go throught X points + x_i = X[i] + p_x_i = _proj_on_diag(x_i) # proj of x_i on the diagonal + dist_x_delta = _norm2(x_i, p_x_i) # distance to the diagonal regarding the ground norm + for j in range(m): # go throught d_2 points + y_j = Y[j] + p_y_j = _proj_on_diag(y_j) + M[i, j] = _norm2(x_i, y_j) + dist_y_delta = _norm2(y_j, p_y_j) + for it in range(m): + M[n + it, j] = dist_y_delta + for it in range(n): + M[i, m + it] = dist_x_delta + + return M + + +def _optimal_matching(M): + n = len(M) + # if input weights are empty lists, pot treat the uniform assignement problem and returns a bistochastic matrix (up to *n). + P = ot.emd(a=[], b=[], M=M) * n + # return the list of indices j such that L[i] = j iff P[i,j] = 1 + return np.nonzero(P)[1] + + +def _mean(x, m): + """ + :param x: a list of 2D-points, of diagonal, x_0... x_{k-1} + :param m: total amount of points taken into account, that is we have (m-k) copies of diagonal + :returns: the weighted mean of x with (m-k) copies of Delta taken into account (defined by mukherjee etc.) + """ + k = len(x) + if k > 0: + w = np.mean(x, axis=0) + w_delta = _proj_on_diag(w) + return (k * w + (m-k) * w_delta) / m + else: + return np.array([0, 0]) + + +def lagrangian_barycenter(pdiagset, init=None, verbose=False): + """ + Compute the estimated barycenter computed with the Hungarian algorithm provided by Mukherjee et al + It is a local minima of the corresponding Frechet function. + It exactly belongs to the persistence diagram space (because all computations are made on it). + :param pdiagset: a list of size N containing numpy.array of shape (n x + 2) (n can variate), encoding a set of persistence diagrams with only finite + coordinates. + :param init: The initial value for barycenter estimate. If None, init is made on a random diagram from the dataset. Otherwise, it must be a (n x 2) numpy.array enconding a persistence diagram with n points. + :returns: If not verbose (default), the barycenter estimate (local minima of the energy function). If verbose, returns a triplet (Y, a, e) where Y is the barycenter estimate, a is the assignments between the points of Y and thoses of the diagrams, and e is the energy value reached by the estimate. + """ + m = len(pdiagset) # number of diagrams we are averaging + X = pdiagset # to shorten notations + nb_off_diag = np.array([len(X_i) for X_i in X]) # store the number of off-diagonal point for each of the X_i + + # Initialisation of barycenter + if init is None: + i0 = np.random.randint(m) # Index of first state for the barycenter + Y = X[i0].copy() + else: + Y = init.copy() + + not_converged = True # stoping criterion + while not_converged: + K = len(Y) # current nb of points in Y (some might be on diagonal) + G = np.zeros((K, m)) # will store for each j, the (index) point matched in each other diagram (might be the diagonal). + updated_points = np.zeros((K, 2)) # will store the new positions of the points of Y + new_created_points = [] # will store eventual new points. + + # Step 1 : compute optimal matching (Y, X_i) for each X_i + for i in range(m): + M = _cost_matrix(Y, X[i]) + indices = _optimal_matching(M) + for y_j, x_i_j in enumerate(indices): + if y_j < K: # we matched an off diagonal point to x_i_j... + if x_i_j < nb_off_diag[i]: # ...which is also an off-diagonal point + G[y_j, i] = x_i_j + else: # ...which is a diagonal point + G[y_j, i] = -1 # -1 stands for the diagonal (mask) + else: # We matched a diagonal point to x_i_j... + if x_i_j < nb_off_diag[i]: # which is a off-diag point ! so we need to create a new point in Y + new_y = _mean(np.array([X[i][x_i_j]]), m) # Average this point with (m-1) copies of Delta + new_created_points.append(new_y) + + # Step 2 : Compute new points (mean) + for j in range(K): + matched_points = [X[i][int(G[j, i])] for i in range(m) if G[j, i] > -1] + updated_points[j] = _mean(matched_points, m) + + if new_created_points: + Y = np.concatenate((updated_points, new_created_points)) + else: + Y = updated_points + + # Step 3 : we update our estimation of the barycenter + if len(new_created_points) == 0 and np.array_equal(updated_points, Y): + not_converged = False + + if verbose: + matchings = [] + energy = 0 + n_y = len(Y) + for i in range(m): + M = _cost_matrix(Y, X[i]) + edges = _optimal_matching(M) + matchings.append([x_i_j for (y_j, x_i_j) in enumerate(edges) if y_j < n_y]) + #energy += total_cost + + #energy /= m + _plot_barycenter(X, Y, matchings) + plt.show() + return Y, matchings, energy + else: + return Y + +def _plot_barycenter(X, Y, matchings): + fig = plt.figure() + ax = fig.add_subplot(111) + + # n_y = len(Y.points) + for i in range(len(X)): + indices = matchings[i] + n_i = len(X[i]) + + for (y_j, x_i_j) in enumerate(indices): + y = Y[y_j] + if y[0] != y[1]: + if x_i_j < n_i: # not mapped with the diag + x = X[i][x_i_j] + else: # y_j is matched to the diagonal + x = _proj_on_diag(y) + ax.plot([y[0], x[0]], [y[1], x[1]], c='black', + linestyle="dashed") + + ax.scatter(Y[:,0], Y[:,1], color='purple', marker='d') + + for dgm in X: + ax.scatter(dgm[:,0], dgm[:,1], marker ='o') + + shift = 0.1 # for improved rendering + xmin = min([np.min(x[:,0]) for x in X]) - shift + xmax = max([np.max(x[:,0]) for x in X]) + shift + ymin = min([np.max(x[:,1]) for x in X]) - shift + ymax = max([np.max(x[:,1]) for x in X]) + shift + themin = min(xmin, ymin) + themax = max(xmax, ymax) + ax.set_xlim(themin, themax) + ax.set_ylim(themin, themax) + ax.add_patch(Polygon([[themin,themin], [themax,themin], [themax,themax]], fill=True, color='lightgrey')) + ax.set_xticks([]) + ax.set_yticks([]) + ax.set_aspect('equal', adjustable='box') + ax.set_title("example of (estimated) barycenter") + + +if __name__=="__main__": + dg1 = np.array([[0.1, 0.12], [0.21, 0.7], [0.4, 0.5], [0.3, 0.4], [0.35, 0.7], [0.5, 0.55], [0.32, 0.42], [0.1, 0.4], [0.2, 0.4]]) + dg2 = np.array([[0.09, 0.11], [0.3, 0.43], [0.5, 0.61], [0.3, 0.7], [0.42, 0.5], [0.35, 0.41], [0.74, 0.9], [0.5, 0.95], [0.35, 0.45], [0.13, 0.48], [0.32, 0.45]]) + dg3 = np.array([[0.1, 0.15], [0.1, 0.7], [0.2, 0.22], [0.55, 0.84], [0.11, 0.91], [0.61, 0.75], [0.33, 0.46], [0.12, 0.41], [0.32, 0.48]]) + X = [dg1, dg2, dg3] + Y, a, e = lagrangian_barycenter(X, verbose=True) -- cgit v1.2.3 From 48f7e17c5e9d4f6936bfdf6384015fe833e30c74 Mon Sep 17 00:00:00 2001 From: tlacombe Date: Fri, 18 Oct 2019 23:18:53 +0200 Subject: updated documentation in barycenter.py --- src/python/gudhi/barycenter.py | 78 ++++++++++++++++++++++++++++++------------ 1 file changed, 57 insertions(+), 21 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/barycenter.py b/src/python/gudhi/barycenter.py index c46f6926..85666631 100644 --- a/src/python/gudhi/barycenter.py +++ b/src/python/gudhi/barycenter.py @@ -4,22 +4,30 @@ import matplotlib.pyplot as plt from matplotlib.patches import Polygon def _proj_on_diag(x): + """ + :param x: numpy.array of length 2, encoding a point on the upper half plane. + :returns: numpy.array of length 2, orthogonal projection of the point onto + the diagonal. + """ return np.array([(x[0] + x[1]) / 2, (x[0] + x[1]) / 2]) def _norm2(x, y): + """ + :param x: numpy.array of length 2, encoding a point on the upper half plane. + :param y: numpy.array of length 2, encoding a point on the upper half plane. + :returns: distance between the two points for the euclidean norm. + """ return (y[0] - x[0])**2 + (y[1] - x[1])**2 -def _norm_inf(x, y): - return np.max(np.abs(y[0] - x[0]), np.abs(y[1] - x[1])) - - def _cost_matrix(X, Y): """ :param X: (n x 2) numpy.array encoding the first diagram :param Y: (m x 2) numpy.array encoding the second diagram - :return: The cost matrix with size (k x k) where k = |d_1| + |d_2| in order to encode matching to diagonal + :return: numpy.array with size (k x k) where k = |X| + |Y|, encoding the + cost matrix between points (including the diagonal, with repetition to + ensure one-to-one matchings. """ n, m = len(X), len(Y) k = n + m @@ -42,8 +50,15 @@ def _cost_matrix(X, Y): def _optimal_matching(M): + """ + :param M: numpy.array of size (k x k), encoding the cost matrix between the + points of two diagrams. + :returns: list of length (k) such that L[i] = j if and only if P[i,j]=1 + where P is a bi-stochastic matrix that minimize . + """ n = len(M) - # if input weights are empty lists, pot treat the uniform assignement problem and returns a bistochastic matrix (up to *n). + # if input weights are empty lists, pot treats the uniform assignement + # problem and returns a bistochastic matrix (up to *n). P = ot.emd(a=[], b=[], M=M) * n # return the list of indices j such that L[i] = j iff P[i,j] = 1 return np.nonzero(P)[1] @@ -53,7 +68,8 @@ def _mean(x, m): """ :param x: a list of 2D-points, of diagonal, x_0... x_{k-1} :param m: total amount of points taken into account, that is we have (m-k) copies of diagonal - :returns: the weighted mean of x with (m-k) copies of Delta taken into account (defined by mukherjee etc.) + :returns: the weighted mean of x with (m-k) copies of Delta taken into + account. """ k = len(x) if k > 0: @@ -66,14 +82,23 @@ def _mean(x, m): def lagrangian_barycenter(pdiagset, init=None, verbose=False): """ - Compute the estimated barycenter computed with the Hungarian algorithm provided by Mukherjee et al - It is a local minima of the corresponding Frechet function. - It exactly belongs to the persistence diagram space (because all computations are made on it). - :param pdiagset: a list of size N containing numpy.array of shape (n x - 2) (n can variate), encoding a set of persistence diagrams with only finite - coordinates. - :param init: The initial value for barycenter estimate. If None, init is made on a random diagram from the dataset. Otherwise, it must be a (n x 2) numpy.array enconding a persistence diagram with n points. - :returns: If not verbose (default), the barycenter estimate (local minima of the energy function). If verbose, returns a triplet (Y, a, e) where Y is the barycenter estimate, a is the assignments between the points of Y and thoses of the diagrams, and e is the energy value reached by the estimate. + Compute the estimated barycenter computed with the algorithm provided + by Turner et al (2014). + It is a local minima of the corresponding Frechet function. + :param pdiagset: a list of size N containing numpy.array of shape (n x 2) + (n can variate), encoding a set of + persistence diagrams with only finite coordinates. + :param init: The initial value for barycenter estimate. + If None, init is made on a random diagram from the dataset. + Otherwise, it must be a (n x 2) numpy.array enconding a persistence diagram with n points. + :param verbose: if True, returns additional information about the + barycenters (assignment and energy). + :returns: If not verbose (default), a numpy.array encoding + the barycenter estimate (local minima of the energy function). + If verbose, returns a triplet (Y, a, e) + where Y is the barycenter estimate, a is the assignments between the + points of Y and thoses of the diagrams, + and e is the energy value reached by the estimate. """ m = len(pdiagset) # number of diagrams we are averaging X = pdiagset # to shorten notations @@ -90,7 +115,10 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False): while not_converged: K = len(Y) # current nb of points in Y (some might be on diagonal) G = np.zeros((K, m)) # will store for each j, the (index) point matched in each other diagram (might be the diagonal). - updated_points = np.zeros((K, 2)) # will store the new positions of the points of Y + updated_points = np.zeros((K, 2)) # will store the new positions of + # the points of Y. + # If points disappear, there thrown + # on [0,0] by default. new_created_points = [] # will store eventual new points. # Step 1 : compute optimal matching (Y, X_i) for each X_i @@ -130,16 +158,22 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False): M = _cost_matrix(Y, X[i]) edges = _optimal_matching(M) matchings.append([x_i_j for (y_j, x_i_j) in enumerate(edges) if y_j < n_y]) - #energy += total_cost + energy += sum([M[i,j] for i,j in enumerate(edges)]) - #energy /= m - _plot_barycenter(X, Y, matchings) - plt.show() + energy = energy/m return Y, matchings, energy else: return Y def _plot_barycenter(X, Y, matchings): + """ + :param X: list of persistence diagrams. + :param Y: numpy.array of (n x 2). Aims to be an estimate of the barycenter + returned by lagrangian_barycenter(X, verbose=True). + :param matchings: list of lists, such that L[k][i] = j if and only if + the i-th point of the barycenter is grouped with the j-th point of the k-th + diagram. + """ fig = plt.figure() ax = fig.add_subplot(111) @@ -176,7 +210,7 @@ def _plot_barycenter(X, Y, matchings): ax.set_xticks([]) ax.set_yticks([]) ax.set_aspect('equal', adjustable='box') - ax.set_title("example of (estimated) barycenter") + ax.set_title("Estimated barycenter") if __name__=="__main__": @@ -185,3 +219,5 @@ if __name__=="__main__": dg3 = np.array([[0.1, 0.15], [0.1, 0.7], [0.2, 0.22], [0.55, 0.84], [0.11, 0.91], [0.61, 0.75], [0.33, 0.46], [0.12, 0.41], [0.32, 0.48]]) X = [dg1, dg2, dg3] Y, a, e = lagrangian_barycenter(X, verbose=True) + _plot_barycenter(X, Y, a) + plt.show() -- cgit v1.2.3 From 80aa14d1b92d1a61366d798b07073289d4db4fda Mon Sep 17 00:00:00 2001 From: tlacombe Date: Thu, 5 Dec 2019 18:42:48 +0100 Subject: first version of barycenter for persistence diagrams --- src/python/doc/barycenter_sum.inc | 22 +++ src/python/doc/barycenter_user.rst | 51 ++++++ src/python/gudhi/barycenter.py | 322 +++++++++++++++++++++++++------------ 3 files changed, 292 insertions(+), 103 deletions(-) create mode 100644 src/python/doc/barycenter_sum.inc create mode 100644 src/python/doc/barycenter_user.rst (limited to 'src') diff --git a/src/python/doc/barycenter_sum.inc b/src/python/doc/barycenter_sum.inc new file mode 100644 index 00000000..7801a845 --- /dev/null +++ b/src/python/doc/barycenter_sum.inc @@ -0,0 +1,22 @@ +.. table:: + :widths: 30 50 20 + + +-----------------------------------------------------------------+----------------------------------------------------------------------+------------------------------------------------------------------+ + | .. figure:: | A Frechet mean (or barycenter) is a generalization of the arithmetic | :Author: Theo Lacombe | + | ../../doc/Barycenter/barycenter.png | mean in a non linear space such as the one of persistence diagrams. | | + | :figclass: align-center | Given a set of persistence diagrams :math:`\mu_1 \dots \mu_n`, it is | :Introduced in: GUDHI 3.1.0 | + | | defined as a minimizer of the variance functional, that is of | | + | Illustration of Frechet mean between persistence | :math:`\mu \mapsto \sum_{i=1}^n d_2(\mu,\mu_i)^2`. | :Copyright: MIT | + | diagrams. | where :math:`d_2` denotes the Wasserstein-2 distance between persis- | | + | | tence diagrams. | | + | | It is known to exist and is generically unique. However, an exact | | + | | computation is in general untractable. Current implementation avai- | :Requires: Python Optimal Transport (POT) :math:`\geq` 0.5.1 | + | | -lable is based on [Turner et al, 2014], and uses an EM-scheme to | | + | | provide a local minimum of the variance functional (somewhat similar | | + | | to the Lloyd algorithm to estimate a solution to the k-means | | + | | problem). The combinatorial structure of the algorithm limits its | | + | | scaling on large scale problems (thousands of diagrams and of points | | + | | per diagram). | | + +-----------------------------------------------------------------+----------------------------------------------------------------------+------------------------------------------------------------------+ + | * :doc:`barycenter_user` | | + +-----------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/src/python/doc/barycenter_user.rst b/src/python/doc/barycenter_user.rst new file mode 100644 index 00000000..fae2854a --- /dev/null +++ b/src/python/doc/barycenter_user.rst @@ -0,0 +1,51 @@ +:orphan: + +.. To get rid of WARNING: document isn't included in any toctree + +Wasserstein distance user manual +================================ +Definition +---------- + +.. include:: wasserstein_distance_sum.inc + +This implementation is based on ideas from "Large Scale Computation of Means and Cluster for Persistence Diagrams via Optimal Transport". + +Function +-------- +.. autofunction:: gudhi.barycenter.lagrangian_barycenter + + +Basic example +------------- + +This example computes the Frechet mean (aka Wasserstein barycenter) between four persistence diagrams. +It is initialized on the 4th diagram, which is the empty diagram. It is encoded by np.array([]). +Note that persistence diagrams must be submitted as (n x 2) numpy arrays and must not contain inf values. + +.. testcode:: + + import gudhi.barycenter + import numpy as np + + dg1 = np.array([[0.2, 0.5]]) + dg2 = np.array([[0.2, 0.7]]) + dg3 = np.array([[0.3, 0.6], [0.7, 0.8], [0.2, 0.3]]) + dg4 = np.array([]) + + bary = gudhi.barycenter.lagrangian_barycenter(pdiagset=[dg1, dg2, dg3, dg4],init=3)) + + message = "Wasserstein barycenter estimated:" + print(message) + print(bary) + +The output is: + +.. testoutput:: + + Wasserstein barycenter estimated: + [[0.27916667 0.55416667] + [0.7375 0.7625 ] + [0.2375 0.2625 ]] + + diff --git a/src/python/gudhi/barycenter.py b/src/python/gudhi/barycenter.py index 85666631..3cd214a7 100644 --- a/src/python/gudhi/barycenter.py +++ b/src/python/gudhi/barycenter.py @@ -1,75 +1,105 @@ import ot import numpy as np -import matplotlib.pyplot as plt -from matplotlib.patches import Polygon +import scipy.spatial.distance as sc -def _proj_on_diag(x): - """ - :param x: numpy.array of length 2, encoding a point on the upper half plane. - :returns: numpy.array of length 2, orthogonal projection of the point onto - the diagonal. - """ - return np.array([(x[0] + x[1]) / 2, (x[0] + x[1]) / 2]) +# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. +# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. +# Author(s): Theo Lacombe +# +# Copyright (C) 2019 Inria +# +# Modification(s): +# - YYYY/MM Author: Description of the modification -def _norm2(x, y): - """ - :param x: numpy.array of length 2, encoding a point on the upper half plane. - :param y: numpy.array of length 2, encoding a point on the upper half plane. - :returns: distance between the two points for the euclidean norm. - """ - return (y[0] - x[0])**2 + (y[1] - x[1])**2 +def _proj_on_diag(w): + ''' + Util function to project a point on the diag. + ''' + return np.array([(w[0] + w[1])/2 , (w[0] + w[1])/2]) -def _cost_matrix(X, Y): - """ - :param X: (n x 2) numpy.array encoding the first diagram - :param Y: (m x 2) numpy.array encoding the second diagram - :return: numpy.array with size (k x k) where k = |X| + |Y|, encoding the - cost matrix between points (including the diagonal, with repetition to - ensure one-to-one matchings. - """ - n, m = len(X), len(Y) - k = n + m - M = np.zeros((k, k)) - for i in range(n): # go throught X points - x_i = X[i] - p_x_i = _proj_on_diag(x_i) # proj of x_i on the diagonal - dist_x_delta = _norm2(x_i, p_x_i) # distance to the diagonal regarding the ground norm - for j in range(m): # go throught d_2 points - y_j = Y[j] - p_y_j = _proj_on_diag(y_j) - M[i, j] = _norm2(x_i, y_j) - dist_y_delta = _norm2(y_j, p_y_j) - for it in range(m): - M[n + it, j] = dist_y_delta - for it in range(n): - M[i, m + it] = dist_x_delta - - return M - - -def _optimal_matching(M): + +def _proj_on_diag_array(X): + ''' + :param X: (n x 2) array encoding the points of a persistent diagram. + :returns: (n x 2) array encoding the (respective orthogonal) projections of the points onto the diagonal + ''' + Z = (X[:,0] + X[:,1]) / 2. + return np.array([Z , Z]).T + + +def _build_dist_matrix(X, Y, p=2., q=2.): + ''' + :param X: (n x 2) numpy.array encoding the (points of the) first diagram. + :param Y: (m x 2) numpy.array encoding the second diagram. + :param q: Ground metric (i.e. norm l_q). + :param p: exponent for the Wasserstein metric. + :returns: (n+1) x (m+1) np.array encoding the cost matrix C. + For 1 <= i <= n, 1 <= j <= m, C[i,j] encodes the distance between X[i] and Y[j], while C[i, m+1] (resp. C[n+1, j]) encodes the distance (to the p) between X[i] (resp Y[j]) and its orthogonal proj onto the diagonal. + note also that C[n+1, m+1] = 0 (it costs nothing to move from the diagonal to the diagonal). + Note that for lagrangian_barycenter, one must use p=q=2. + ''' + Xdiag = _proj_on_diag_array(X) + Ydiag = _proj_on_diag_array(Y) + if np.isinf(q): + C = sc.cdist(X, Y, metric='chebyshev')**p + Cxd = np.linalg.norm(X - Xdiag, ord=q, axis=1)**p + Cdy = np.linalg.norm(Y - Ydiag, ord=q, axis=1)**p + else: + C = sc.cdist(X,Y, metric='minkowski', p=q)**p + Cxd = np.linalg.norm(X - Xdiag, ord=q, axis=1)**p + Cdy = np.linalg.norm(Y - Ydiag, ord=q, axis=1)**p + Cf = np.hstack((C, Cxd[:,None])) + Cdy = np.append(Cdy, 0) + + Cf = np.vstack((Cf, Cdy[None,:])) + + return Cf + + +def _optimal_matching(X, Y): """ - :param M: numpy.array of size (k x k), encoding the cost matrix between the - points of two diagrams. - :returns: list of length (k) such that L[i] = j if and only if P[i,j]=1 - where P is a bi-stochastic matrix that minimize . + :param X: numpy.array of size (n x 2) + :param Y: numpy.array of size (m x 2) + :returns: numpy.array of shape (k x 2) encoding the list of edges in the optimal matching. + That is, [[(i, j) ...], where (i,j) indicates that X[i] is matched to Y[j] + if i > len(X) or j > len(Y), it means they represent the diagonal. + """ - n = len(M) - # if input weights are empty lists, pot treats the uniform assignement - # problem and returns a bistochastic matrix (up to *n). - P = ot.emd(a=[], b=[], M=M) * n - # return the list of indices j such that L[i] = j iff P[i,j] = 1 - return np.nonzero(P)[1] + + n = len(X) + m = len(Y) + if X.size == 0: # X is empty + if Y.size == 0: # Y is empty + return np.array([[0,0]]) # the diagonal is matched to the diagonal and that's it... + else: + return np.column_stack([np.zeros(m+1, dtype=int), np.arange(m+1, dtype=int)]) # TO BE CORRECTED + elif Y.size == 0: # X is not empty but Y is empty + return np.column_stack([np.zeros(n+1, dtype=int), np.arange(n+1, dtype=int)]) # TO BE CORRECTED + + # we know X, Y are not empty diags now + M = _build_dist_matrix(X, Y) + + a = np.full(n+1, 1. / (n + m) ) # weight vector of the input diagram. Uniform here. + a[-1] = a[-1] * m # normalized so that we have a probability measure, required by POT + b = np.full(m+1, 1. / (n + m) ) # weight vector of the input diagram. Uniform here. + b[-1] = b[-1] * n # so that we have a probability measure, required by POT + P = ot.emd(a=a, b=b, M=M)*(n+m) + # Note : it seems POT return a permutation matrix in this situation, + # ...guarantee...? + # It should be enough to check that the algorithm only iterates on vertices of the transportation polytope. + P[P < 0.5] = 0 # dirty trick to avoid some numerical issues... to be improved. + # return the list of (i,j) such that P[i,j] > 0, i.e. x_i is matched to y_j (should it be the diag). + res = np.nonzero(P) + return np.column_stack(res) def _mean(x, m): """ - :param x: a list of 2D-points, of diagonal, x_0... x_{k-1} + :param x: a list of 2D-points, off diagonal, x_0... x_{k-1} :param m: total amount of points taken into account, that is we have (m-k) copies of diagonal - :returns: the weighted mean of x with (m-k) copies of Delta taken into - account. + :returns: the weighted mean of x with (m-k) copies of the diagonal """ k = len(x) if k > 0: @@ -88,44 +118,54 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False): :param pdiagset: a list of size N containing numpy.array of shape (n x 2) (n can variate), encoding a set of persistence diagrams with only finite coordinates. - :param init: The initial value for barycenter estimate. - If None, init is made on a random diagram from the dataset. - Otherwise, it must be a (n x 2) numpy.array enconding a persistence diagram with n points. - :param verbose: if True, returns additional information about the - barycenters (assignment and energy). - :returns: If not verbose (default), a numpy.array encoding + :param init: The initial value for barycenter estimate. + If None, init is made on a random diagram from the dataset. + Otherwise, it must be an int (then we init with diagset[init]) + or a (n x 2) numpy.array enconding a persistence diagram with n points. + :param verbose: if True, returns additional information about the + barycenters (assignment and energy). + :returns: If not verbose (default), a numpy.array encoding the barycenter estimate (local minima of the energy function). If verbose, returns a triplet (Y, a, e) where Y is the barycenter estimate, a is the assignments between the points of Y and thoses of the diagrams, and e is the energy value reached by the estimate. """ - m = len(pdiagset) # number of diagrams we are averaging - X = pdiagset # to shorten notations + X = pdiagset # to shorten notations, not a copy + m = len(X) # number of diagrams we are averaging + if m == 0: + print("Warning: computing barycenter of empty diag set. Returns None") + return None + nb_off_diag = np.array([len(X_i) for X_i in X]) # store the number of off-diagonal point for each of the X_i # Initialisation of barycenter if init is None: i0 = np.random.randint(m) # Index of first state for the barycenter - Y = X[i0].copy() + Y = X[i0].copy() #copy() ensure that we do not modify X[i0] else: - Y = init.copy() + if type(init)==int: + Y = X[init].copy() + else: + Y = init.copy() - not_converged = True # stoping criterion - while not_converged: + converged = False # stoping criterion + while not converged: K = len(Y) # current nb of points in Y (some might be on diagonal) - G = np.zeros((K, m)) # will store for each j, the (index) point matched in each other diagram (might be the diagonal). + G = np.zeros((K, m), dtype=int)-1 # will store for each j, the (index) point matched in each other diagram (might be the diagonal). + # that is G[j, i] = k <=> y_j is matched to + # x_k in the diagram i-th diagram X[i] updated_points = np.zeros((K, 2)) # will store the new positions of # the points of Y. # If points disappear, there thrown # on [0,0] by default. - new_created_points = [] # will store eventual new points. + new_created_points = [] # will store potential new points. # Step 1 : compute optimal matching (Y, X_i) for each X_i + # and create new points in Y if needed for i in range(m): - M = _cost_matrix(Y, X[i]) - indices = _optimal_matching(M) - for y_j, x_i_j in enumerate(indices): + indices = _optimal_matching(Y, X[i]) + for y_j, x_i_j in indices: if y_j < K: # we matched an off diagonal point to x_i_j... if x_i_j < nb_off_diag[i]: # ...which is also an off-diagonal point G[y_j, i] = x_i_j @@ -136,32 +176,40 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False): new_y = _mean(np.array([X[i][x_i_j]]), m) # Average this point with (m-1) copies of Delta new_created_points.append(new_y) - # Step 2 : Compute new points (mean) + # Step 2 : Update current point position thanks to the groupings computed + + to_delete = [] for j in range(K): - matched_points = [X[i][int(G[j, i])] for i in range(m) if G[j, i] > -1] - updated_points[j] = _mean(matched_points, m) + matched_points = [X[i][G[j, i]] for i in range(m) if G[j, i] > -1] + new_y_j = _mean(matched_points, m) + if not np.array_equal(new_y_j, np.array([0,0])): + updated_points[j] = new_y_j + else: # this points is no longer of any use. + to_delete.append(j) + # we remove the point to be deleted now. + updated_points = np.delete(updated_points, to_delete, axis=0) # cannot be done in-place. - if new_created_points: + + if new_created_points: # we cannot converge if there have been new created points. Y = np.concatenate((updated_points, new_created_points)) else: + # Step 3 : we check convergence + if np.array_equal(updated_points, Y): + converged = True Y = updated_points - # Step 3 : we update our estimation of the barycenter - if len(new_created_points) == 0 and np.array_equal(updated_points, Y): - not_converged = False if verbose: matchings = [] - energy = 0 + #energy = 0 n_y = len(Y) for i in range(m): - M = _cost_matrix(Y, X[i]) - edges = _optimal_matching(M) + edges = _optimal_matching(Y, X[i]) matchings.append([x_i_j for (y_j, x_i_j) in enumerate(edges) if y_j < n_y]) - energy += sum([M[i,j] for i,j in enumerate(edges)]) + # energy += sum([M[i,j] for i,j in enumerate(edges)]) - energy = energy/m - return Y, matchings, energy + # energy = energy/m + return Y, matchings #, energy else: return Y @@ -174,6 +222,11 @@ def _plot_barycenter(X, Y, matchings): the i-th point of the barycenter is grouped with the j-th point of the k-th diagram. """ + # import matplotlib now to avoid useless dependancies + + import matplotlib.pyplot as plt + from matplotlib.patches import Polygon + fig = plt.figure() ax = fig.add_subplot(111) @@ -182,7 +235,7 @@ def _plot_barycenter(X, Y, matchings): indices = matchings[i] n_i = len(X[i]) - for (y_j, x_i_j) in enumerate(indices): + for (y_j, x_i_j) in indices: y = Y[y_j] if y[0] != y[1]: if x_i_j < n_i: # not mapped with the diag @@ -192,16 +245,20 @@ def _plot_barycenter(X, Y, matchings): ax.plot([y[0], x[0]], [y[1], x[1]], c='black', linestyle="dashed") - ax.scatter(Y[:,0], Y[:,1], color='purple', marker='d') + ax.scatter(Y[:,0], Y[:,1], color='purple', marker='d', zorder=2) - for dgm in X: - ax.scatter(dgm[:,0], dgm[:,1], marker ='o') + for X_i in X: + if X_i.size > 0: + ax.scatter(X_i[:,0], X_i[:,1], marker ='o', zorder=2) shift = 0.1 # for improved rendering - xmin = min([np.min(x[:,0]) for x in X]) - shift - xmax = max([np.max(x[:,0]) for x in X]) + shift - ymin = min([np.max(x[:,1]) for x in X]) - shift - ymax = max([np.max(x[:,1]) for x in X]) + shift + try: + xmin = np.min(np.array([np.min(x[:,0]) for x in X if len(x) > 0]) - shift) + xmax = np.max(np.array([np.max(x[:,0]) for x in X if len(x) > 0]) + shift) + ymin = np.min(np.array([np.max(x[:,1]) for x in X if len(x) > 0]) - shift) + ymax = np.max(np.array([np.max(x[:,1]) for x in X if len(x) > 0]) + shift) + except ValueError: # to handle the pecular case where we only average empty diagrams. + xmin, xmax, ymin, ymax = 0, 1, 0, 1 themin = min(xmin, ymin) themax = max(xmax, ymax) ax.set_xlim(themin, themax) @@ -212,12 +269,71 @@ def _plot_barycenter(X, Y, matchings): ax.set_aspect('equal', adjustable='box') ax.set_title("Estimated barycenter") + plt.show() + + +def _test_perf(): + nb_repeat = 10 + nb_points_in_dgm = [5, 10, 20, 50, 100] + nb_dmg = [3, 5, 10, 20] + + from time import time + for m in nb_dmg: + for n in nb_points_in_dgm: + tstart = time() + for _ in range(nb_repeat): + X = [np.random.rand(n, 2) for _ in range(m)] + for diag in X: + #enforce having diagrams + diag[:,1] = diag[:,1] + diag[:,0] + _ = lagrangian_barycenter(X) + tend = time() + print("Computation of barycenter in %s sec, with k = %s diags and n = %s points per diag."%(np.round((tend - tstart)/nb_repeat, 2), m, n)) + print("********************") + + +def _sanity_check(verbose): + #dg1 = np.array([[0.2, 0.5]]) + #dg2 = np.array([[0.2, 0.7]]) + #dg3 = np.array([[0.3, 0.6], [0.7, 0.8], [0.2, 0.3]]) + #dg4 = np.array([[0.72, 0.82]]) + #X = [dg1, dg2, dg3, dg4] + #Y, a = lagrangian_barycenter(X, verbose=verbose) + #_plot_barycenter(X, Y, a) + + #dg1 = np.array([[0.2, 0.5]]) + #dg2 = np.array([]) # The empty diagram + #dg3 = np.array([[0.4, 0.8]]) + #X = [dg1, dg2, dg3] + #Y, a = lagrangian_barycenter(X, verbose=verbose) + #_plot_barycenter(X, Y, a) + + #dg1 = np.array([]) + #dg2 = np.array([]) # The empty diagram + #dg3 = np.array([]) + #X = [dg1, dg2, dg3] + #Y, a = lagrangian_barycenter(X, verbose=verbose) + #_plot_barycenter(X, Y, a) + + #dg1 = np.array([[0.1, 0.12], [0.21, 0.7], [0.4, 0.5], [0.3, 0.4], [0.35, 0.7], [0.5, 0.55], [0.32, 0.42], [0.1, 0.4], [0.2, 0.4]]) + #dg2 = np.array([[0.09, 0.11], [0.3, 0.43], [0.5, 0.61], [0.3, 0.7], [0.42, 0.5], [0.35, 0.41], [0.74, 0.9], [0.5, 0.95], [0.35, 0.45], [0.13, 0.48], [0.32, 0.45]]) + #dg3 = np.array([[0.1, 0.15], [0.1, 0.7], [0.2, 0.22], [0.55, 0.84], [0.11, 0.91], [0.61, 0.75], [0.33, 0.46], [0.12, 0.41], [0.32, 0.48]]) + #X = [dg1, dg2, dg3] + #Y, a = lagrangian_barycenter(X, init=1, verbose=verbose) + #_plot_barycenter(X, Y, a) + + + dg1 = np.array([[0.2, 0.5]]) + dg2 = np.array([[0.2, 0.7]]) + dg3 = np.array([[0.3, 0.6], [0.7, 0.8], [0.2, 0.3]]) + dg4 = np.array([]) + + bary = lagrangian_barycenter(pdiagset=[dg1, dg2, dg3, dg4],init=3) + + message = "Wasserstein barycenter estimated:" + print(message) + print(bary) if __name__=="__main__": - dg1 = np.array([[0.1, 0.12], [0.21, 0.7], [0.4, 0.5], [0.3, 0.4], [0.35, 0.7], [0.5, 0.55], [0.32, 0.42], [0.1, 0.4], [0.2, 0.4]]) - dg2 = np.array([[0.09, 0.11], [0.3, 0.43], [0.5, 0.61], [0.3, 0.7], [0.42, 0.5], [0.35, 0.41], [0.74, 0.9], [0.5, 0.95], [0.35, 0.45], [0.13, 0.48], [0.32, 0.45]]) - dg3 = np.array([[0.1, 0.15], [0.1, 0.7], [0.2, 0.22], [0.55, 0.84], [0.11, 0.91], [0.61, 0.75], [0.33, 0.46], [0.12, 0.41], [0.32, 0.48]]) - X = [dg1, dg2, dg3] - Y, a, e = lagrangian_barycenter(X, verbose=True) - _plot_barycenter(X, Y, a) - plt.show() + _sanity_check(verbose = True) + #_test_perf() -- cgit v1.2.3 From 56a9294ede73d0660ba724b4f448c02dcd5e3dcc Mon Sep 17 00:00:00 2001 From: tlacombe Date: Thu, 5 Dec 2019 18:52:16 +0100 Subject: added image for barycenter in the /img repository --- src/python/doc/barycenter_sum.inc | 6 ++++-- src/python/doc/img/barycenter.png | Bin 0 -> 12433 bytes src/python/gudhi/barycenter.py | 33 ++++++++++++++++----------------- 3 files changed, 20 insertions(+), 19 deletions(-) create mode 100644 src/python/doc/img/barycenter.png (limited to 'src') diff --git a/src/python/doc/barycenter_sum.inc b/src/python/doc/barycenter_sum.inc index 7801a845..afac07d7 100644 --- a/src/python/doc/barycenter_sum.inc +++ b/src/python/doc/barycenter_sum.inc @@ -3,7 +3,7 @@ +-----------------------------------------------------------------+----------------------------------------------------------------------+------------------------------------------------------------------+ | .. figure:: | A Frechet mean (or barycenter) is a generalization of the arithmetic | :Author: Theo Lacombe | - | ../../doc/Barycenter/barycenter.png | mean in a non linear space such as the one of persistence diagrams. | | + | ./img/barycenter.png | mean in a non linear space such as the one of persistence diagrams. | | | :figclass: align-center | Given a set of persistence diagrams :math:`\mu_1 \dots \mu_n`, it is | :Introduced in: GUDHI 3.1.0 | | | defined as a minimizer of the variance functional, that is of | | | Illustration of Frechet mean between persistence | :math:`\mu \mapsto \sum_{i=1}^n d_2(\mu,\mu_i)^2`. | :Copyright: MIT | @@ -14,7 +14,9 @@ | | -lable is based on [Turner et al, 2014], and uses an EM-scheme to | | | | provide a local minimum of the variance functional (somewhat similar | | | | to the Lloyd algorithm to estimate a solution to the k-means | | - | | problem). The combinatorial structure of the algorithm limits its | | + | | problem). The local minimum returned depends on the initialization of| | + | | the barycenter. | | + | | The combinatorial structure of the algorithm limits its | | | | scaling on large scale problems (thousands of diagrams and of points | | | | per diagram). | | +-----------------------------------------------------------------+----------------------------------------------------------------------+------------------------------------------------------------------+ diff --git a/src/python/doc/img/barycenter.png b/src/python/doc/img/barycenter.png new file mode 100644 index 00000000..cad6af70 Binary files /dev/null and b/src/python/doc/img/barycenter.png differ diff --git a/src/python/gudhi/barycenter.py b/src/python/gudhi/barycenter.py index 3cd214a7..b4afdb6a 100644 --- a/src/python/gudhi/barycenter.py +++ b/src/python/gudhi/barycenter.py @@ -293,13 +293,12 @@ def _test_perf(): def _sanity_check(verbose): - #dg1 = np.array([[0.2, 0.5]]) - #dg2 = np.array([[0.2, 0.7]]) - #dg3 = np.array([[0.3, 0.6], [0.7, 0.8], [0.2, 0.3]]) - #dg4 = np.array([[0.72, 0.82]]) - #X = [dg1, dg2, dg3, dg4] - #Y, a = lagrangian_barycenter(X, verbose=verbose) - #_plot_barycenter(X, Y, a) + dg1 = np.array([[0.2, 0.5]]) + dg2 = np.array([[0.2, 0.7], [0.73, 0.88]]) + dg3 = np.array([[0.3, 0.6], [0.7, 0.85], [0.2, 0.3]]) + X = [dg1, dg2, dg3] + Y, a = lagrangian_barycenter(X, verbose=verbose) + _plot_barycenter(X, Y, a) #dg1 = np.array([[0.2, 0.5]]) #dg2 = np.array([]) # The empty diagram @@ -323,16 +322,16 @@ def _sanity_check(verbose): #_plot_barycenter(X, Y, a) - dg1 = np.array([[0.2, 0.5]]) - dg2 = np.array([[0.2, 0.7]]) - dg3 = np.array([[0.3, 0.6], [0.7, 0.8], [0.2, 0.3]]) - dg4 = np.array([]) - - bary = lagrangian_barycenter(pdiagset=[dg1, dg2, dg3, dg4],init=3) - - message = "Wasserstein barycenter estimated:" - print(message) - print(bary) + #dg1 = np.array([[0.2, 0.5]]) + #dg2 = np.array([[0.2, 0.7]]) + #dg3 = np.array([[0.3, 0.6], [0.7, 0.8], [0.2, 0.3]]) + #dg4 = np.array([]) + # + #bary, a = lagrangian_barycenter(pdiagset=[dg1, dg2, dg3, dg4],init=3, verbose=True) + #_plot_barycenter([dg1, dg2, dg3, dg4], bary, a) + #message = "Wasserstein barycenter estimated:" + #print(message) + #print(bary) if __name__=="__main__": _sanity_check(verbose = True) -- cgit v1.2.3 From aba9ad68394b0c5aae22c450cac7162733132002 Mon Sep 17 00:00:00 2001 From: tlacombe Date: Thu, 5 Dec 2019 18:55:46 +0100 Subject: correction of bibliography --- src/python/doc/barycenter_user.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/python/doc/barycenter_user.rst b/src/python/doc/barycenter_user.rst index fae2854a..1c4cb812 100644 --- a/src/python/doc/barycenter_user.rst +++ b/src/python/doc/barycenter_user.rst @@ -9,7 +9,7 @@ Definition .. include:: wasserstein_distance_sum.inc -This implementation is based on ideas from "Large Scale Computation of Means and Cluster for Persistence Diagrams via Optimal Transport". +This implementation is based on ideas from "Frechet means for distribution of persistence diagrams", Turner et al. 2014. Function -------- -- cgit v1.2.3 From 5877b4d3b7aca645ba906dfe0be598b1881d8798 Mon Sep 17 00:00:00 2001 From: tlacombe Date: Mon, 16 Dec 2019 17:53:59 +0100 Subject: update CMakeLists and create test_wasserstein_bary --- src/python/CMakeLists.txt | 3 +++ src/python/gudhi/barycenter.py | 26 ++++++++++---------- src/python/test/test_wasserstein_barycenter.py | 33 ++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 12 deletions(-) create mode 100755 src/python/test/test_wasserstein_barycenter.py (limited to 'src') diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index 9af85eac..7f9ff38f 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -52,6 +52,7 @@ if(PYTHONINTERP_FOUND) # Modules that should not be auto-imported in __init__.py set(GUDHI_PYTHON_MODULES_EXTRA "${GUDHI_PYTHON_MODULES_EXTRA}'representations', ") set(GUDHI_PYTHON_MODULES_EXTRA "${GUDHI_PYTHON_MODULES_EXTRA}'wasserstein', ") + set(GUDHI_PYTHON_MODULES_EXTRA "${GUDHI_PYTHON_MODULES_EXTRA}'barycenter', ") add_gudhi_debug_info("Python version ${PYTHON_VERSION_STRING}") add_gudhi_debug_info("Cython version ${CYTHON_VERSION}") @@ -210,6 +211,7 @@ if(PYTHONINTERP_FOUND) file(COPY "gudhi/persistence_graphical_tools.py" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi") file(COPY "gudhi/representations" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi/") file(COPY "gudhi/wasserstein.py" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi") + file(COPY "gudhi/barycenter.py" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi") add_custom_command( OUTPUT gudhi.so @@ -385,6 +387,7 @@ if(PYTHONINTERP_FOUND) # Wasserstein if(OT_FOUND) add_gudhi_py_test(test_wasserstein_distance) + add_gudhi_py_test(test_wasserstein_barycenter) endif(OT_FOUND) # Representations diff --git a/src/python/gudhi/barycenter.py b/src/python/gudhi/barycenter.py index b4afdb6a..41418454 100644 --- a/src/python/gudhi/barycenter.py +++ b/src/python/gudhi/barycenter.py @@ -293,12 +293,12 @@ def _test_perf(): def _sanity_check(verbose): - dg1 = np.array([[0.2, 0.5]]) - dg2 = np.array([[0.2, 0.7], [0.73, 0.88]]) - dg3 = np.array([[0.3, 0.6], [0.7, 0.85], [0.2, 0.3]]) - X = [dg1, dg2, dg3] - Y, a = lagrangian_barycenter(X, verbose=verbose) - _plot_barycenter(X, Y, a) + #dg1 = np.array([[0.2, 0.5]]) + #dg2 = np.array([[0.2, 0.7], [0.73, 0.88]]) + #dg3 = np.array([[0.3, 0.6], [0.7, 0.85], [0.2, 0.3]]) + #X = [dg1, dg2, dg3] + #Y, a = lagrangian_barycenter(X, verbose=verbose) + #_plot_barycenter(X, Y, a) #dg1 = np.array([[0.2, 0.5]]) #dg2 = np.array([]) # The empty diagram @@ -313,13 +313,15 @@ def _sanity_check(verbose): #X = [dg1, dg2, dg3] #Y, a = lagrangian_barycenter(X, verbose=verbose) #_plot_barycenter(X, Y, a) + #print(Y) - #dg1 = np.array([[0.1, 0.12], [0.21, 0.7], [0.4, 0.5], [0.3, 0.4], [0.35, 0.7], [0.5, 0.55], [0.32, 0.42], [0.1, 0.4], [0.2, 0.4]]) - #dg2 = np.array([[0.09, 0.11], [0.3, 0.43], [0.5, 0.61], [0.3, 0.7], [0.42, 0.5], [0.35, 0.41], [0.74, 0.9], [0.5, 0.95], [0.35, 0.45], [0.13, 0.48], [0.32, 0.45]]) - #dg3 = np.array([[0.1, 0.15], [0.1, 0.7], [0.2, 0.22], [0.55, 0.84], [0.11, 0.91], [0.61, 0.75], [0.33, 0.46], [0.12, 0.41], [0.32, 0.48]]) - #X = [dg1, dg2, dg3] - #Y, a = lagrangian_barycenter(X, init=1, verbose=verbose) - #_plot_barycenter(X, Y, a) + dg1 = np.array([[0.1, 0.12], [0.21, 0.7], [0.4, 0.5], [0.3, 0.4], [0.35, 0.7], [0.5, 0.55], [0.32, 0.42], [0.1, 0.4], [0.2, 0.4]]) + dg2 = np.array([[0.09, 0.11], [0.3, 0.43], [0.5, 0.61], [0.3, 0.7], [0.42, 0.5], [0.35, 0.41], [0.74, 0.9], [0.5, 0.95], [0.35, 0.45], [0.13, 0.48], [0.32, 0.45]]) + dg3 = np.array([[0.1, 0.15], [0.1, 0.7], [0.2, 0.22], [0.55, 0.84], [0.11, 0.91], [0.61, 0.75], [0.33, 0.46], [0.12, 0.41], [0.32, 0.48]]) + X = [dg3] + Y, a = lagrangian_barycenter(X, verbose=verbose) + _plot_barycenter(X, Y, a) + print(Y) #dg1 = np.array([[0.2, 0.5]]) diff --git a/src/python/test/test_wasserstein_barycenter.py b/src/python/test/test_wasserstein_barycenter.py new file mode 100755 index 00000000..6074f250 --- /dev/null +++ b/src/python/test/test_wasserstein_barycenter.py @@ -0,0 +1,33 @@ +from gudhi.barycenter import lagrangian_barycenter +import numpy as np + +""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. + See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. + Author(s): Theo Lacombe + + Copyright (C) 2019 Inria + + Modification(s): + - YYYY/MM Author: Description of the modification +""" + +__author__ = "Theo Lacombe" +__copyright__ = "Copyright (C) 2019 Inria" +__license__ = "MIT" + + +def test_lagrangian_barycenter(): + + dg1 = np.array([[0.2, 0.5]]) + dg2 = np.array([[0.2, 0.7]]) + dg3 = np.array([[0.3, 0.6], [0.7, 0.8], [0.2, 0.3]]) + dg4 = np.array([]) + dg5 = np.array([]) + dg6 = np.array([]) + res = np.array([[0.27916667, 0.55416667], [0.7375, 0.7625], [0.2375, 0.2625]]) + + dg7 = np.array([[0.1, 0.15], [0.1, 0.7], [0.2, 0.22], [0.55, 0.84], [0.11, 0.91], [0.61, 0.75], [0.33, 0.46], [0.12, 0.41], [0.32, 0.48]]) + + assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg1, dg2, dg3, dg4],init=3, verbose=False) - res) < 0.001 + assert np.array_equal(lagrangian_barycenter(pdiagset=[dg4, dg5, dg6], verbose=False), np.array([])) + assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg7], verbose=False) - dg7) < 0.001 -- cgit v1.2.3 From b4fcc875393df12f42aea84b918b5b35f99f7283 Mon Sep 17 00:00:00 2001 From: tlacombe Date: Mon, 16 Dec 2019 18:11:27 +0100 Subject: correction of typo in _user.rst and of empty array shape in test_wasserstein_barycenter --- src/python/doc/barycenter_user.rst | 2 +- src/python/test/test_wasserstein_barycenter.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/python/doc/barycenter_user.rst b/src/python/doc/barycenter_user.rst index 1c4cb812..5344583f 100644 --- a/src/python/doc/barycenter_user.rst +++ b/src/python/doc/barycenter_user.rst @@ -33,7 +33,7 @@ Note that persistence diagrams must be submitted as (n x 2) numpy arrays and mus dg3 = np.array([[0.3, 0.6], [0.7, 0.8], [0.2, 0.3]]) dg4 = np.array([]) - bary = gudhi.barycenter.lagrangian_barycenter(pdiagset=[dg1, dg2, dg3, dg4],init=3)) + bary = gudhi.barycenter.lagrangian_barycenter(pdiagset=[dg1, dg2, dg3, dg4],init=3) message = "Wasserstein barycenter estimated:" print(message) diff --git a/src/python/test/test_wasserstein_barycenter.py b/src/python/test/test_wasserstein_barycenter.py index 6074f250..ae3f6579 100755 --- a/src/python/test/test_wasserstein_barycenter.py +++ b/src/python/test/test_wasserstein_barycenter.py @@ -29,5 +29,5 @@ def test_lagrangian_barycenter(): dg7 = np.array([[0.1, 0.15], [0.1, 0.7], [0.2, 0.22], [0.55, 0.84], [0.11, 0.91], [0.61, 0.75], [0.33, 0.46], [0.12, 0.41], [0.32, 0.48]]) assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg1, dg2, dg3, dg4],init=3, verbose=False) - res) < 0.001 - assert np.array_equal(lagrangian_barycenter(pdiagset=[dg4, dg5, dg6], verbose=False), np.array([])) + assert np.array_equal(lagrangian_barycenter(pdiagset=[dg4, dg5, dg6], verbose=False), shape=(0,2), np.array([])) assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg7], verbose=False) - dg7) < 0.001 -- cgit v1.2.3 From 0c2fdc65cc1ea676fa8d11c24bba0d34eb5b7a3c Mon Sep 17 00:00:00 2001 From: tlacombe Date: Mon, 16 Dec 2019 18:34:24 +0100 Subject: Correction of typo in barycenter_user --- src/python/doc/barycenter_user.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/python/doc/barycenter_user.rst b/src/python/doc/barycenter_user.rst index 5344583f..714d807e 100644 --- a/src/python/doc/barycenter_user.rst +++ b/src/python/doc/barycenter_user.rst @@ -2,12 +2,12 @@ .. To get rid of WARNING: document isn't included in any toctree -Wasserstein distance user manual +Barycenter user manual ================================ Definition ---------- -.. include:: wasserstein_distance_sum.inc +.. include:: barycenter_sum.inc This implementation is based on ideas from "Frechet means for distribution of persistence diagrams", Turner et al. 2014. -- cgit v1.2.3 From 20047b94e693f31fd88ca142ba7256767ac753eb Mon Sep 17 00:00:00 2001 From: tlacombe Date: Mon, 16 Dec 2019 18:34:55 +0100 Subject: correction of typo in test_wasserstein_barycenter --- src/python/test/test_wasserstein_barycenter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/python/test/test_wasserstein_barycenter.py b/src/python/test/test_wasserstein_barycenter.py index ae3f6579..dc82a57c 100755 --- a/src/python/test/test_wasserstein_barycenter.py +++ b/src/python/test/test_wasserstein_barycenter.py @@ -29,5 +29,5 @@ def test_lagrangian_barycenter(): dg7 = np.array([[0.1, 0.15], [0.1, 0.7], [0.2, 0.22], [0.55, 0.84], [0.11, 0.91], [0.61, 0.75], [0.33, 0.46], [0.12, 0.41], [0.32, 0.48]]) assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg1, dg2, dg3, dg4],init=3, verbose=False) - res) < 0.001 - assert np.array_equal(lagrangian_barycenter(pdiagset=[dg4, dg5, dg6], verbose=False), shape=(0,2), np.array([])) + assert np.array_equal(lagrangian_barycenter(pdiagset=[dg4, dg5, dg6], verbose=False), np.array([], shape=(0,2))) assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg7], verbose=False) - dg7) < 0.001 -- cgit v1.2.3 From b23813b90aaf1b0ce2b21bdfb33d2a6ea5bfe4cc Mon Sep 17 00:00:00 2001 From: tlacombe Date: Mon, 16 Dec 2019 19:32:26 +0100 Subject: correction test --- src/python/gudhi/barycenter.py | 6 ++++-- src/python/test/test_wasserstein_barycenter.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/barycenter.py b/src/python/gudhi/barycenter.py index 41418454..b76166c0 100644 --- a/src/python/gudhi/barycenter.py +++ b/src/python/gudhi/barycenter.py @@ -318,10 +318,12 @@ def _sanity_check(verbose): dg1 = np.array([[0.1, 0.12], [0.21, 0.7], [0.4, 0.5], [0.3, 0.4], [0.35, 0.7], [0.5, 0.55], [0.32, 0.42], [0.1, 0.4], [0.2, 0.4]]) dg2 = np.array([[0.09, 0.11], [0.3, 0.43], [0.5, 0.61], [0.3, 0.7], [0.42, 0.5], [0.35, 0.41], [0.74, 0.9], [0.5, 0.95], [0.35, 0.45], [0.13, 0.48], [0.32, 0.45]]) dg3 = np.array([[0.1, 0.15], [0.1, 0.7], [0.2, 0.22], [0.55, 0.84], [0.11, 0.91], [0.61, 0.75], [0.33, 0.46], [0.12, 0.41], [0.32, 0.48]]) - X = [dg3] + dg4 = np.array([]) + X = [dg4] Y, a = lagrangian_barycenter(X, verbose=verbose) - _plot_barycenter(X, Y, a) + #_plot_barycenter(X, Y, a) print(Y) + print(np.array_equal(Y, np.empty(shape=(0,2) ))) #dg1 = np.array([[0.2, 0.5]]) diff --git a/src/python/test/test_wasserstein_barycenter.py b/src/python/test/test_wasserstein_barycenter.py index dc82a57c..910d23ff 100755 --- a/src/python/test/test_wasserstein_barycenter.py +++ b/src/python/test/test_wasserstein_barycenter.py @@ -29,5 +29,5 @@ def test_lagrangian_barycenter(): dg7 = np.array([[0.1, 0.15], [0.1, 0.7], [0.2, 0.22], [0.55, 0.84], [0.11, 0.91], [0.61, 0.75], [0.33, 0.46], [0.12, 0.41], [0.32, 0.48]]) assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg1, dg2, dg3, dg4],init=3, verbose=False) - res) < 0.001 - assert np.array_equal(lagrangian_barycenter(pdiagset=[dg4, dg5, dg6], verbose=False), np.array([], shape=(0,2))) + assert np.array_equal(lagrangian_barycenter(pdiagset=[dg4, dg5, dg6], verbose=False), np.empty(shape=(0,2))) assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg7], verbose=False) - dg7) < 0.001 -- cgit v1.2.3 From d91585af64805a11a4d446d9e3f6467f3394d0c6 Mon Sep 17 00:00:00 2001 From: Théo Lacombe Date: Tue, 17 Dec 2019 18:58:48 +0100 Subject: Update src/python/gudhi/barycenter.py correction of typo Co-Authored-By: Marc Glisse --- src/python/gudhi/barycenter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/python/gudhi/barycenter.py b/src/python/gudhi/barycenter.py index b76166c0..43602a6e 100644 --- a/src/python/gudhi/barycenter.py +++ b/src/python/gudhi/barycenter.py @@ -114,7 +114,7 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False): """ Compute the estimated barycenter computed with the algorithm provided by Turner et al (2014). - It is a local minima of the corresponding Frechet function. + It is a local minimum of the corresponding Frechet function. :param pdiagset: a list of size N containing numpy.array of shape (n x 2) (n can variate), encoding a set of persistence diagrams with only finite coordinates. -- cgit v1.2.3 From 180add9067bc9bd0609362717972eeeb8d2f6713 Mon Sep 17 00:00:00 2001 From: tlacombe Date: Thu, 19 Dec 2019 17:25:01 +0100 Subject: clean code and doc --- src/python/gudhi/barycenter.py | 129 ++++++++++++----------------------------- 1 file changed, 36 insertions(+), 93 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/barycenter.py b/src/python/gudhi/barycenter.py index 43602a6e..c2173dba 100644 --- a/src/python/gudhi/barycenter.py +++ b/src/python/gudhi/barycenter.py @@ -58,12 +58,13 @@ def _build_dist_matrix(X, Y, p=2., q=2.): return Cf -def _optimal_matching(X, Y): +def _optimal_matching(X, Y, withcost=False): """ :param X: numpy.array of size (n x 2) :param Y: numpy.array of size (m x 2) + :param withcost: returns also the cost corresponding to this optimal matching :returns: numpy.array of shape (k x 2) encoding the list of edges in the optimal matching. - That is, [[(i, j) ...], where (i,j) indicates that X[i] is matched to Y[j] + That is, [(i, j) ...], where (i,j) indicates that X[i] is matched to Y[j] if i > len(X) or j > len(Y), it means they represent the diagonal. """ @@ -74,10 +75,10 @@ def _optimal_matching(X, Y): if Y.size == 0: # Y is empty return np.array([[0,0]]) # the diagonal is matched to the diagonal and that's it... else: - return np.column_stack([np.zeros(m+1, dtype=int), np.arange(m+1, dtype=int)]) # TO BE CORRECTED + return np.column_stack([np.zeros(m+1, dtype=int), np.arange(m+1, dtype=int)]) elif Y.size == 0: # X is not empty but Y is empty - return np.column_stack([np.zeros(n+1, dtype=int), np.arange(n+1, dtype=int)]) # TO BE CORRECTED - + return np.column_stack([np.zeros(n+1, dtype=int), np.arange(n+1, dtype=int)]) + # we know X, Y are not empty diags now M = _build_dist_matrix(X, Y) @@ -86,12 +87,16 @@ def _optimal_matching(X, Y): b = np.full(m+1, 1. / (n + m) ) # weight vector of the input diagram. Uniform here. b[-1] = b[-1] * n # so that we have a probability measure, required by POT P = ot.emd(a=a, b=b, M=M)*(n+m) - # Note : it seems POT return a permutation matrix in this situation, - # ...guarantee...? - # It should be enough to check that the algorithm only iterates on vertices of the transportation polytope. + # Note : it seems POT return a permutation matrix in this situation, ie a vertex of the constraint set (generically true). + if withcost: + cost = np.sqrt(np.sum(np.multiply(P, M))) P[P < 0.5] = 0 # dirty trick to avoid some numerical issues... to be improved. # return the list of (i,j) such that P[i,j] > 0, i.e. x_i is matched to y_j (should it be the diag). res = np.nonzero(P) + + if withcost: + return np.column_stack(res), cost + return np.column_stack(res) @@ -123,13 +128,16 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False): Otherwise, it must be an int (then we init with diagset[init]) or a (n x 2) numpy.array enconding a persistence diagram with n points. :param verbose: if True, returns additional information about the - barycenters (assignment and energy). + barycenter. :returns: If not verbose (default), a numpy.array encoding the barycenter estimate (local minima of the energy function). - If verbose, returns a triplet (Y, a, e) - where Y is the barycenter estimate, a is the assignments between the - points of Y and thoses of the diagrams, - and e is the energy value reached by the estimate. + If verbose, returns a couple (Y, log) + where Y is the barycenter estimate, + and log is a dict that contains additional informations: + - assigments, a list of list of pairs (i,j), + That is, a[k] = [(i, j) ...], where (i,j) indicates that X[i] is matched to Y[j] + if i > len(X) or j > len(Y), it means they represent the diagonal. + - energy, a float representing the Frechet mean value obtained. """ X = pdiagset # to shorten notations, not a copy m = len(X) # number of diagrams we are averaging @@ -200,25 +208,29 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False): if verbose: - matchings = [] - #energy = 0 + groupings = [] + energy = 0 + log = {} n_y = len(Y) for i in range(m): - edges = _optimal_matching(Y, X[i]) - matchings.append([x_i_j for (y_j, x_i_j) in enumerate(edges) if y_j < n_y]) - # energy += sum([M[i,j] for i,j in enumerate(edges)]) - - # energy = energy/m - return Y, matchings #, energy + edges, cost = _optimal_matching(Y, X[i], withcost=True) + print(edges) + groupings.append([x_i_j for (y_j, x_i_j) in enumerate(edges) if y_j < n_y]) + energy += cost + log["groupings"] = groupings + energy = energy/m + log["energy"] = energy + + return Y, log else: return Y -def _plot_barycenter(X, Y, matchings): +def _plot_barycenter(X, Y, groupings): """ :param X: list of persistence diagrams. :param Y: numpy.array of (n x 2). Aims to be an estimate of the barycenter returned by lagrangian_barycenter(X, verbose=True). - :param matchings: list of lists, such that L[k][i] = j if and only if + :param groupings: list of lists, such that L[k][i] = j if and only if the i-th point of the barycenter is grouped with the j-th point of the k-th diagram. """ @@ -232,7 +244,7 @@ def _plot_barycenter(X, Y, matchings): # n_y = len(Y.points) for i in range(len(X)): - indices = matchings[i] + indices = groupings[i] n_i = len(X[i]) for (y_j, x_i_j) in indices: @@ -271,72 +283,3 @@ def _plot_barycenter(X, Y, matchings): plt.show() - -def _test_perf(): - nb_repeat = 10 - nb_points_in_dgm = [5, 10, 20, 50, 100] - nb_dmg = [3, 5, 10, 20] - - from time import time - for m in nb_dmg: - for n in nb_points_in_dgm: - tstart = time() - for _ in range(nb_repeat): - X = [np.random.rand(n, 2) for _ in range(m)] - for diag in X: - #enforce having diagrams - diag[:,1] = diag[:,1] + diag[:,0] - _ = lagrangian_barycenter(X) - tend = time() - print("Computation of barycenter in %s sec, with k = %s diags and n = %s points per diag."%(np.round((tend - tstart)/nb_repeat, 2), m, n)) - print("********************") - - -def _sanity_check(verbose): - #dg1 = np.array([[0.2, 0.5]]) - #dg2 = np.array([[0.2, 0.7], [0.73, 0.88]]) - #dg3 = np.array([[0.3, 0.6], [0.7, 0.85], [0.2, 0.3]]) - #X = [dg1, dg2, dg3] - #Y, a = lagrangian_barycenter(X, verbose=verbose) - #_plot_barycenter(X, Y, a) - - #dg1 = np.array([[0.2, 0.5]]) - #dg2 = np.array([]) # The empty diagram - #dg3 = np.array([[0.4, 0.8]]) - #X = [dg1, dg2, dg3] - #Y, a = lagrangian_barycenter(X, verbose=verbose) - #_plot_barycenter(X, Y, a) - - #dg1 = np.array([]) - #dg2 = np.array([]) # The empty diagram - #dg3 = np.array([]) - #X = [dg1, dg2, dg3] - #Y, a = lagrangian_barycenter(X, verbose=verbose) - #_plot_barycenter(X, Y, a) - #print(Y) - - dg1 = np.array([[0.1, 0.12], [0.21, 0.7], [0.4, 0.5], [0.3, 0.4], [0.35, 0.7], [0.5, 0.55], [0.32, 0.42], [0.1, 0.4], [0.2, 0.4]]) - dg2 = np.array([[0.09, 0.11], [0.3, 0.43], [0.5, 0.61], [0.3, 0.7], [0.42, 0.5], [0.35, 0.41], [0.74, 0.9], [0.5, 0.95], [0.35, 0.45], [0.13, 0.48], [0.32, 0.45]]) - dg3 = np.array([[0.1, 0.15], [0.1, 0.7], [0.2, 0.22], [0.55, 0.84], [0.11, 0.91], [0.61, 0.75], [0.33, 0.46], [0.12, 0.41], [0.32, 0.48]]) - dg4 = np.array([]) - X = [dg4] - Y, a = lagrangian_barycenter(X, verbose=verbose) - #_plot_barycenter(X, Y, a) - print(Y) - print(np.array_equal(Y, np.empty(shape=(0,2) ))) - - - #dg1 = np.array([[0.2, 0.5]]) - #dg2 = np.array([[0.2, 0.7]]) - #dg3 = np.array([[0.3, 0.6], [0.7, 0.8], [0.2, 0.3]]) - #dg4 = np.array([]) - # - #bary, a = lagrangian_barycenter(pdiagset=[dg1, dg2, dg3, dg4],init=3, verbose=True) - #_plot_barycenter([dg1, dg2, dg3, dg4], bary, a) - #message = "Wasserstein barycenter estimated:" - #print(message) - #print(bary) - -if __name__=="__main__": - _sanity_check(verbose = True) - #_test_perf() -- cgit v1.2.3 From b7138871d42197c94c58b9938279455b75723606 Mon Sep 17 00:00:00 2001 From: tlacombe Date: Thu, 19 Dec 2019 17:28:06 +0100 Subject: removed plot barycenter. Will be integrated in a tutorial --- src/python/gudhi/barycenter.py | 58 ------------------------------------------ 1 file changed, 58 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/barycenter.py b/src/python/gudhi/barycenter.py index c2173dba..11098afe 100644 --- a/src/python/gudhi/barycenter.py +++ b/src/python/gudhi/barycenter.py @@ -225,61 +225,3 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False): else: return Y -def _plot_barycenter(X, Y, groupings): - """ - :param X: list of persistence diagrams. - :param Y: numpy.array of (n x 2). Aims to be an estimate of the barycenter - returned by lagrangian_barycenter(X, verbose=True). - :param groupings: list of lists, such that L[k][i] = j if and only if - the i-th point of the barycenter is grouped with the j-th point of the k-th - diagram. - """ - # import matplotlib now to avoid useless dependancies - - import matplotlib.pyplot as plt - from matplotlib.patches import Polygon - - fig = plt.figure() - ax = fig.add_subplot(111) - - # n_y = len(Y.points) - for i in range(len(X)): - indices = groupings[i] - n_i = len(X[i]) - - for (y_j, x_i_j) in indices: - y = Y[y_j] - if y[0] != y[1]: - if x_i_j < n_i: # not mapped with the diag - x = X[i][x_i_j] - else: # y_j is matched to the diagonal - x = _proj_on_diag(y) - ax.plot([y[0], x[0]], [y[1], x[1]], c='black', - linestyle="dashed") - - ax.scatter(Y[:,0], Y[:,1], color='purple', marker='d', zorder=2) - - for X_i in X: - if X_i.size > 0: - ax.scatter(X_i[:,0], X_i[:,1], marker ='o', zorder=2) - - shift = 0.1 # for improved rendering - try: - xmin = np.min(np.array([np.min(x[:,0]) for x in X if len(x) > 0]) - shift) - xmax = np.max(np.array([np.max(x[:,0]) for x in X if len(x) > 0]) + shift) - ymin = np.min(np.array([np.max(x[:,1]) for x in X if len(x) > 0]) - shift) - ymax = np.max(np.array([np.max(x[:,1]) for x in X if len(x) > 0]) + shift) - except ValueError: # to handle the pecular case where we only average empty diagrams. - xmin, xmax, ymin, ymax = 0, 1, 0, 1 - themin = min(xmin, ymin) - themax = max(xmax, ymax) - ax.set_xlim(themin, themax) - ax.set_ylim(themin, themax) - ax.add_patch(Polygon([[themin,themin], [themax,themin], [themax,themax]], fill=True, color='lightgrey')) - ax.set_xticks([]) - ax.set_yticks([]) - ax.set_aspect('equal', adjustable='box') - ax.set_title("Estimated barycenter") - - plt.show() - -- cgit v1.2.3 From 85ceea9512634a62664208cd2d0f1ce48bafa171 Mon Sep 17 00:00:00 2001 From: mathieu Date: Thu, 16 Jan 2020 17:02:55 -0500 Subject: added wasserstein class --- .../diagram_vectorizations_distances_kernels.py | 7 ++- src/python/gudhi/representations/metrics.py | 59 ++++++++++++++++++++++ 2 files changed, 65 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/python/example/diagram_vectorizations_distances_kernels.py b/src/python/example/diagram_vectorizations_distances_kernels.py index 119072eb..66c32cc2 100755 --- a/src/python/example/diagram_vectorizations_distances_kernels.py +++ b/src/python/example/diagram_vectorizations_distances_kernels.py @@ -9,7 +9,7 @@ from gudhi.representations import DiagramSelector, Clamping, Landscape, Silhouet TopologicalVector, DiagramScaler, BirthPersistenceTransform,\ PersistenceImage, PersistenceWeightedGaussianKernel, Entropy, \ PersistenceScaleSpaceKernel, SlicedWassersteinDistance,\ - SlicedWassersteinKernel, BottleneckDistance, PersistenceFisherKernel + SlicedWassersteinKernel, BottleneckDistance, WassersteinDistance, PersistenceFisherKernel D = np.array([[0.,4.],[1.,2.],[3.,8.],[6.,8.], [0., np.inf], [5., np.inf]]) diags = [D] @@ -117,6 +117,11 @@ X = SW.fit(diags) Y = SW.transform(diags2) print("SW kernel is " + str(Y[0][0])) +W = WassersteinDistance(order=2, internal_p=2) +X = W.fit(diags) +Y = W.transform(diags2) +print("Wasserstein distance is " + str(Y[0][0])) + W = BottleneckDistance(epsilon=.001) X = W.fit(diags) Y = W.transform(diags2) diff --git a/src/python/gudhi/representations/metrics.py b/src/python/gudhi/representations/metrics.py index 5f9ec6ab..290c1d07 100644 --- a/src/python/gudhi/representations/metrics.py +++ b/src/python/gudhi/representations/metrics.py @@ -10,6 +10,7 @@ import numpy as np from sklearn.base import BaseEstimator, TransformerMixin from sklearn.metrics import pairwise_distances +from gudhi.wasserstein import wasserstein_distance try: from .. import bottleneck_distance USE_GUDHI = True @@ -145,6 +146,64 @@ class BottleneckDistance(BaseEstimator, TransformerMixin): return Xfit +class WassersteinDistance(BaseEstimator, TransformerMixin): + """ + This is a class for computing the Wasserstein distance matrix from a list of persistence diagrams. + """ + def __init__(self, order=2, internal_p=2): + """ + Constructor for the WassersteinDistance class. + + Parameters: + order (int): exponent for Wasserstein, default value is 2., see :func:`gudhi.wasserstein.wasserstein_distance`. + internal_p (int): ground metric on the (upper-half) plane (i.e. norm l_p in R^2), default value is 2 (euclidean norm), see :func:`gudhi.wasserstein.wasserstein_distance`. + """ + self.order, self.internal_p = order, internal_p + + def fit(self, X, y=None): + """ + Fit the WassersteinDistance class on a list of persistence diagrams: persistence diagrams are stored in a numpy array called **diagrams**. + + Parameters: + X (list of n x 2 numpy arrays): input persistence diagrams. + y (n x 1 array): persistence diagram labels (unused). + """ + self.diagrams_ = X + return self + + def transform(self, X): + """ + Compute all Wasserstein distances between the persistence diagrams that were stored after calling the fit() method, and a given list of (possibly different) persistence diagrams. + + Parameters: + X (list of n x 2 numpy arrays): input persistence diagrams. + + Returns: + numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise Wasserstein distances. + """ + num_diag1 = len(X) + + #if len(self.diagrams_) == len(X) and np.all([np.array_equal(self.diagrams_[i], X[i]) for i in range(len(X))]): + if X is self.diagrams_: + matrix = np.zeros((num_diag1, num_diag1)) + + for i in range(num_diag1): + for j in range(i+1, num_diag1): + matrix[i,j] = wasserstein_distance(X[i], X[j], self.order, self.internal_p) + matrix[j,i] = matrix[i,j] + + else: + num_diag2 = len(self.diagrams_) + matrix = np.zeros((num_diag1, num_diag2)) + + for i in range(num_diag1): + for j in range(num_diag2): + matrix[i,j] = wasserstein_distance(X[i], self.diagrams_[j], self.order, self.internal_p) + + Xfit = matrix + + return Xfit + class PersistenceFisherDistance(BaseEstimator, TransformerMixin): """ This is a class for computing the persistence Fisher distance matrix from a list of persistence diagrams. The persistence Fisher distance is obtained by computing the original Fisher distance between the probability distributions associated to the persistence diagrams given by convolving them with a Gaussian kernel. See http://papers.nips.cc/paper/8205-persistence-fisher-kernel-a-riemannian-manifold-kernel-for-persistence-diagrams for more details. -- cgit v1.2.3 From 6a6bed7ca21c1ffcf6de9ed09c2a6512ecb66585 Mon Sep 17 00:00:00 2001 From: tlacombe Date: Fri, 17 Jan 2020 15:37:03 +0100 Subject: improving doc output --- src/python/doc/barycenter_sum.inc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/python/doc/barycenter_sum.inc b/src/python/doc/barycenter_sum.inc index afac07d7..da2bdd84 100644 --- a/src/python/doc/barycenter_sum.inc +++ b/src/python/doc/barycenter_sum.inc @@ -7,11 +7,11 @@ | :figclass: align-center | Given a set of persistence diagrams :math:`\mu_1 \dots \mu_n`, it is | :Introduced in: GUDHI 3.1.0 | | | defined as a minimizer of the variance functional, that is of | | | Illustration of Frechet mean between persistence | :math:`\mu \mapsto \sum_{i=1}^n d_2(\mu,\mu_i)^2`. | :Copyright: MIT | - | diagrams. | where :math:`d_2` denotes the Wasserstein-2 distance between persis- | | - | | tence diagrams. | | + | diagrams. | where :math:`d_2` denotes the Wasserstein-2 distance between | | + | | persistence diagrams. | | | | It is known to exist and is generically unique. However, an exact | | - | | computation is in general untractable. Current implementation avai- | :Requires: Python Optimal Transport (POT) :math:`\geq` 0.5.1 | - | | -lable is based on [Turner et al, 2014], and uses an EM-scheme to | | + | | computation is in general untractable. Current implementation | :Requires: Python Optimal Transport (POT) :math:`\geq` 0.5.1 | + | | available is based on [Turner et al, 2014], and uses an EM-scheme to | | | | provide a local minimum of the variance functional (somewhat similar | | | | to the Lloyd algorithm to estimate a solution to the k-means | | | | problem). The local minimum returned depends on the initialization of| | -- cgit v1.2.3 From 4c0e6e4144dd3cf6da9600fd4b9bbcac5e664b73 Mon Sep 17 00:00:00 2001 From: MathieuCarriere Date: Sun, 26 Jan 2020 02:54:35 -0500 Subject: added extended persistence function --- src/Simplex_tree/include/gudhi/Simplex_tree.h | 71 +++++++++++++++++++++++++++ src/python/gudhi/simplex_tree.pxd | 2 + src/python/gudhi/simplex_tree.pyx | 14 ++++++ 3 files changed, 87 insertions(+) (limited to 'src') diff --git a/src/Simplex_tree/include/gudhi/Simplex_tree.h b/src/Simplex_tree/include/gudhi/Simplex_tree.h index 76608008..4786b244 100644 --- a/src/Simplex_tree/include/gudhi/Simplex_tree.h +++ b/src/Simplex_tree/include/gudhi/Simplex_tree.h @@ -125,6 +125,8 @@ class Simplex_tree { private: typedef typename Dictionary::iterator Dictionary_it; typedef typename Dictionary_it::value_type Dit_value_t; + double minval_; + double maxval_; struct return_first { Vertex_handle operator()(const Dit_value_t& p_sh) const { @@ -1465,6 +1467,75 @@ class Simplex_tree { } } + /** \brief Retrieve good values for extended persistence, and separate the diagrams into the ordinary, relative, extended+ and extended- subdiagrams. Need extend_filtration to be called first! + * @param[in] dgm Persistence diagram obtained after calling this->extend_filtration and this->get_persistence. + * @return A vector of four persistence diagrams. The first one is Ordinary, the second one is Relative, the third one is Extended+ and the fourth one is Extended-. + */ + std::vector>>> convert(const std::vector>>& dgm){ + std::vector>>> new_dgm(4); double x, y; + for(unsigned int i = 0; i < dgm.size(); i++){ int h = dgm[i].first; double px = dgm[i].second.first; double py = dgm[i].second.second; + if(std::isinf(py)) continue; + else{ + if ((px <= -1) & (py <= -1)){x = minval_ + (maxval_-minval_)*(px + 2); y = minval_ + (maxval_-minval_)*(py + 2); new_dgm[0].push_back(std::make_pair(h, std::make_pair(x,y))); } + if ((px >= 1) & (py >= 1)){x = minval_ - (maxval_-minval_)*(px - 2); y = minval_ - (maxval_-minval_)*(py - 2); new_dgm[1].push_back(std::make_pair(h, std::make_pair(x,y))); } + if ((px <= -1) & (py >= 1)){x = minval_ + (maxval_-minval_)*(px + 2); y = minval_ - (maxval_-minval_)*(py - 2); + if (x <= y) new_dgm[2].push_back(std::make_pair(h, std::make_pair(x,y))); + else new_dgm[3].push_back(std::make_pair(h, std::make_pair(x,y))); + } + } + } + return new_dgm; + } + + /** \brief Extend filtration for computing extended persistence. + */ + void extend_filtration() { + + // Compute maximum and minimum of filtration values + int maxvert = -std::numeric_limits::infinity(); + std::vector filt; + for (auto sh : this->complex_simplex_range()) {if (this->dimension(sh) == 0){filt.push_back(this->filtration(sh)); maxvert = std::max(*this->simplex_vertex_range(sh).begin(), maxvert);}} + minval_ = *std::min_element(filt.begin(), filt.end()); + maxval_ = *std::max_element(filt.begin(), filt.end()); + maxvert += 1; + + // Compute vectors of integers corresponding to the Simplex handles + std::vector > splxs; + for (auto sh : this->complex_simplex_range()) { + std::vector vr; for (auto vh : this->simplex_vertex_range(sh)){vr.push_back(vh);} + splxs.push_back(vr); + } + + // Add point for coning the simplicial complex + int count = this->num_simplices(); + std::vector cone; cone.push_back(maxvert); auto ins = this->insert_simplex(cone, -3); this->assign_key(ins.first, count); count++; + + // For each simplex + for (auto vr : splxs){ + // Create cone on simplex + auto sh = this->find(vr); vr.push_back(maxvert); + if (this->dimension(sh) == 0){ + // Assign ascending value between -2 and -1 to vertex + double v = this->filtration(sh); + this->assign_filtration(sh, -2 + (v-minval_)/(maxval_-minval_)); + // Assign descending value between 1 and 2 to cone on vertex + auto ins = this->insert_simplex(vr, 2 - (v-minval_)/(maxval_-minval_)); + this->assign_key(ins.first, count); + } + else{ + // Assign value -3 to simplex and cone on simplex + this->assign_filtration(sh, -3); + auto ins = this->insert_simplex(vr, -3); + this->assign_key(ins.first, count); + } + count++; + } + + this->make_filtration_non_decreasing(); this->initialize_filtration(); + + } + + private: Vertex_handle null_vertex_; /** \brief Total number of simplices in the complex, without the empty simplex.*/ diff --git a/src/python/gudhi/simplex_tree.pxd b/src/python/gudhi/simplex_tree.pxd index 1066d44b..39f2a45f 100644 --- a/src/python/gudhi/simplex_tree.pxd +++ b/src/python/gudhi/simplex_tree.pxd @@ -43,6 +43,8 @@ cdef extern from "Simplex_tree_interface.h" namespace "Gudhi": void remove_maximal_simplex(vector[int] simplex) bool prune_above_filtration(double filtration) bool make_filtration_non_decreasing() + void extend_filtration() + vector[vector[pair[int, pair[double, double]]]] convert(vector[pair[int, pair[double, double]]]) cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi": cdef cppclass Simplex_tree_persistence_interface "Gudhi::Persistent_cohomology_interface>": diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index b18627c4..cfab14f4 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -386,6 +386,20 @@ cdef class SimplexTree: """ return self.get_ptr().make_filtration_non_decreasing() + def extend_filtration(self): + """ This function extends filtration for computing extended persistence. + """ + return self.get_ptr().extend_filtration() + + def convert(self, dgm): + """This function retrieves good values for extended persistence, and separate the diagrams into the ordinary, relative, extended+ and extended- subdiagrams. Need extend_filtration to be called first! + + :param dgm: Persistence diagram obtained after calling this->extend_filtration and this->get_persistence. + :returns: A vector of four persistence diagrams. The first one is Ordinary, the second one is Relative, the third one is Extended+ and the fourth one is Extended-. + """ + return self.get_ptr().convert(dgm) + + def persistence(self, homology_coeff_field=11, min_persistence=0, persistence_dim_max = False): """This function returns the persistence of the simplicial complex. -- cgit v1.2.3 From 1dd1c554a962db70809eadb470eb2eaa733970d4 Mon Sep 17 00:00:00 2001 From: MathieuCarriere Date: Fri, 31 Jan 2020 14:59:32 -0500 Subject: revert first commit --- .../diagram_vectorizations_distances_kernels.py | 7 +-- src/python/gudhi/representations/metrics.py | 59 ---------------------- 2 files changed, 1 insertion(+), 65 deletions(-) (limited to 'src') diff --git a/src/python/example/diagram_vectorizations_distances_kernels.py b/src/python/example/diagram_vectorizations_distances_kernels.py index 66c32cc2..119072eb 100755 --- a/src/python/example/diagram_vectorizations_distances_kernels.py +++ b/src/python/example/diagram_vectorizations_distances_kernels.py @@ -9,7 +9,7 @@ from gudhi.representations import DiagramSelector, Clamping, Landscape, Silhouet TopologicalVector, DiagramScaler, BirthPersistenceTransform,\ PersistenceImage, PersistenceWeightedGaussianKernel, Entropy, \ PersistenceScaleSpaceKernel, SlicedWassersteinDistance,\ - SlicedWassersteinKernel, BottleneckDistance, WassersteinDistance, PersistenceFisherKernel + SlicedWassersteinKernel, BottleneckDistance, PersistenceFisherKernel D = np.array([[0.,4.],[1.,2.],[3.,8.],[6.,8.], [0., np.inf], [5., np.inf]]) diags = [D] @@ -117,11 +117,6 @@ X = SW.fit(diags) Y = SW.transform(diags2) print("SW kernel is " + str(Y[0][0])) -W = WassersteinDistance(order=2, internal_p=2) -X = W.fit(diags) -Y = W.transform(diags2) -print("Wasserstein distance is " + str(Y[0][0])) - W = BottleneckDistance(epsilon=.001) X = W.fit(diags) Y = W.transform(diags2) diff --git a/src/python/gudhi/representations/metrics.py b/src/python/gudhi/representations/metrics.py index 290c1d07..5f9ec6ab 100644 --- a/src/python/gudhi/representations/metrics.py +++ b/src/python/gudhi/representations/metrics.py @@ -10,7 +10,6 @@ import numpy as np from sklearn.base import BaseEstimator, TransformerMixin from sklearn.metrics import pairwise_distances -from gudhi.wasserstein import wasserstein_distance try: from .. import bottleneck_distance USE_GUDHI = True @@ -146,64 +145,6 @@ class BottleneckDistance(BaseEstimator, TransformerMixin): return Xfit -class WassersteinDistance(BaseEstimator, TransformerMixin): - """ - This is a class for computing the Wasserstein distance matrix from a list of persistence diagrams. - """ - def __init__(self, order=2, internal_p=2): - """ - Constructor for the WassersteinDistance class. - - Parameters: - order (int): exponent for Wasserstein, default value is 2., see :func:`gudhi.wasserstein.wasserstein_distance`. - internal_p (int): ground metric on the (upper-half) plane (i.e. norm l_p in R^2), default value is 2 (euclidean norm), see :func:`gudhi.wasserstein.wasserstein_distance`. - """ - self.order, self.internal_p = order, internal_p - - def fit(self, X, y=None): - """ - Fit the WassersteinDistance class on a list of persistence diagrams: persistence diagrams are stored in a numpy array called **diagrams**. - - Parameters: - X (list of n x 2 numpy arrays): input persistence diagrams. - y (n x 1 array): persistence diagram labels (unused). - """ - self.diagrams_ = X - return self - - def transform(self, X): - """ - Compute all Wasserstein distances between the persistence diagrams that were stored after calling the fit() method, and a given list of (possibly different) persistence diagrams. - - Parameters: - X (list of n x 2 numpy arrays): input persistence diagrams. - - Returns: - numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise Wasserstein distances. - """ - num_diag1 = len(X) - - #if len(self.diagrams_) == len(X) and np.all([np.array_equal(self.diagrams_[i], X[i]) for i in range(len(X))]): - if X is self.diagrams_: - matrix = np.zeros((num_diag1, num_diag1)) - - for i in range(num_diag1): - for j in range(i+1, num_diag1): - matrix[i,j] = wasserstein_distance(X[i], X[j], self.order, self.internal_p) - matrix[j,i] = matrix[i,j] - - else: - num_diag2 = len(self.diagrams_) - matrix = np.zeros((num_diag1, num_diag2)) - - for i in range(num_diag1): - for j in range(num_diag2): - matrix[i,j] = wasserstein_distance(X[i], self.diagrams_[j], self.order, self.internal_p) - - Xfit = matrix - - return Xfit - class PersistenceFisherDistance(BaseEstimator, TransformerMixin): """ This is a class for computing the persistence Fisher distance matrix from a list of persistence diagrams. The persistence Fisher distance is obtained by computing the original Fisher distance between the probability distributions associated to the persistence diagrams given by convolving them with a Gaussian kernel. See http://papers.nips.cc/paper/8205-persistence-fisher-kernel-a-riemannian-manifold-kernel-for-persistence-diagrams for more details. -- cgit v1.2.3 From f2020f6bb3a4d2bbd774aa630151ef1db53ac4f8 Mon Sep 17 00:00:00 2001 From: MathieuCarriere Date: Sun, 2 Feb 2020 15:23:03 -0500 Subject: fixed Marc's comments --- src/Simplex_tree/include/gudhi/Simplex_tree.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/Simplex_tree/include/gudhi/Simplex_tree.h b/src/Simplex_tree/include/gudhi/Simplex_tree.h index 4786b244..301f7aae 100644 --- a/src/Simplex_tree/include/gudhi/Simplex_tree.h +++ b/src/Simplex_tree/include/gudhi/Simplex_tree.h @@ -1471,7 +1471,7 @@ class Simplex_tree { * @param[in] dgm Persistence diagram obtained after calling this->extend_filtration and this->get_persistence. * @return A vector of four persistence diagrams. The first one is Ordinary, the second one is Relative, the third one is Extended+ and the fourth one is Extended-. */ - std::vector>>> convert(const std::vector>>& dgm){ + std::vector>>> compute_extended_persistence_subdiagrams(const std::vector>>& dgm){ std::vector>>> new_dgm(4); double x, y; for(unsigned int i = 0; i < dgm.size(); i++){ int h = dgm[i].first; double px = dgm[i].second.first; double py = dgm[i].second.second; if(std::isinf(py)) continue; @@ -1487,7 +1487,7 @@ class Simplex_tree { return new_dgm; } - /** \brief Extend filtration for computing extended persistence. + /** \brief Extend filtration for computing extended persistence. This function only uses the filtration values at the 0-dimensional simplices, and computes the extended persistence diagram induced by the lower-star filtration computed with these values. Note that after calling this function, the filtration values are actually modified. The function compute_extended_persistence_subdiagrams retrieves the original values and separates the extended persistence diagram points w.r.t. their types (Ord, Rel, Ext+, Ext-) and should always be called after computing the persistent homology of the extended simplicial complex. */ void extend_filtration() { -- cgit v1.2.3 From 360cc2cc31e9e81b99f5c21aa2b4e79b066baabf Mon Sep 17 00:00:00 2001 From: mathieu Date: Tue, 4 Feb 2020 19:44:52 -0500 Subject: fixed Vincent's comments --- src/Simplex_tree/include/gudhi/Simplex_tree.h | 74 ++++++++++++++++++----- src/python/gudhi/simplex_tree.pxd | 2 +- src/python/gudhi/simplex_tree.pyx | 14 +++-- src/python/test/test_simplex_tree.py | 86 +++++++++++++++++++++++++-- 4 files changed, 150 insertions(+), 26 deletions(-) (limited to 'src') diff --git a/src/Simplex_tree/include/gudhi/Simplex_tree.h b/src/Simplex_tree/include/gudhi/Simplex_tree.h index 301f7aae..42cf4246 100644 --- a/src/Simplex_tree/include/gudhi/Simplex_tree.h +++ b/src/Simplex_tree/include/gudhi/Simplex_tree.h @@ -1467,34 +1467,68 @@ class Simplex_tree { } } - /** \brief Retrieve good values for extended persistence, and separate the diagrams into the ordinary, relative, extended+ and extended- subdiagrams. Need extend_filtration to be called first! - * @param[in] dgm Persistence diagram obtained after calling this->extend_filtration and this->get_persistence. - * @return A vector of four persistence diagrams. The first one is Ordinary, the second one is Relative, the third one is Extended+ and the fourth one is Extended-. + /** \brief Retrieve good values for extended persistence, and separate the + * diagrams into the ordinary, relative, extended+ and extended- subdiagrams. + * Need extend_filtration to be called first! + * @param[in] dgm Persistence diagram obtained after calling this->extend_filtration + * and this->get_persistence. + * @return A vector of four persistence diagrams. The first one is Ordinary, the + * second one is Relative, the third one is Extended+ and the fourth one is Extended-. */ std::vector>>> compute_extended_persistence_subdiagrams(const std::vector>>& dgm){ - std::vector>>> new_dgm(4); double x, y; - for(unsigned int i = 0; i < dgm.size(); i++){ int h = dgm[i].first; double px = dgm[i].second.first; double py = dgm[i].second.second; + std::vector>>> new_dgm(4); + double x, y; + for(unsigned int i = 0; i < dgm.size(); i++){ + int h = dgm[i].first; + double px = dgm[i].second.first; + double py = dgm[i].second.second; if(std::isinf(py)) continue; else{ - if ((px <= -1) & (py <= -1)){x = minval_ + (maxval_-minval_)*(px + 2); y = minval_ + (maxval_-minval_)*(py + 2); new_dgm[0].push_back(std::make_pair(h, std::make_pair(x,y))); } - if ((px >= 1) & (py >= 1)){x = minval_ - (maxval_-minval_)*(px - 2); y = minval_ - (maxval_-minval_)*(py - 2); new_dgm[1].push_back(std::make_pair(h, std::make_pair(x,y))); } - if ((px <= -1) & (py >= 1)){x = minval_ + (maxval_-minval_)*(px + 2); y = minval_ - (maxval_-minval_)*(py - 2); - if (x <= y) new_dgm[2].push_back(std::make_pair(h, std::make_pair(x,y))); - else new_dgm[3].push_back(std::make_pair(h, std::make_pair(x,y))); + if ((px <= -1) & (py <= -1)){ + x = minval_ + (maxval_-minval_)*(px + 2); + y = minval_ + (maxval_-minval_)*(py + 2); + new_dgm[0].push_back(std::make_pair(h, std::make_pair(x,y))); + } + if ((px >= 1) & (py >= 1)){ + x = minval_ - (maxval_-minval_)*(px - 2); + y = minval_ - (maxval_-minval_)*(py - 2); + new_dgm[1].push_back(std::make_pair(h, std::make_pair(x,y))); + } + if ((px <= -1) & (py >= 1)){ + x = minval_ + (maxval_-minval_)*(px + 2); + y = minval_ - (maxval_-minval_)*(py - 2); + if (x <= y){ + new_dgm[2].push_back(std::make_pair(h, std::make_pair(x,y))); + } + else{ + new_dgm[3].push_back(std::make_pair(h, std::make_pair(x,y))); + } } } } return new_dgm; } - /** \brief Extend filtration for computing extended persistence. This function only uses the filtration values at the 0-dimensional simplices, and computes the extended persistence diagram induced by the lower-star filtration computed with these values. Note that after calling this function, the filtration values are actually modified. The function compute_extended_persistence_subdiagrams retrieves the original values and separates the extended persistence diagram points w.r.t. their types (Ord, Rel, Ext+, Ext-) and should always be called after computing the persistent homology of the extended simplicial complex. + /** \brief Extend filtration for computing extended persistence. + * This function only uses the filtration values at the 0-dimensional simplices, + * and computes the extended persistence diagram induced by the lower-star filtration + * computed with these values. Note that after calling this function, the filtration + * values are actually modified. The function compute_extended_persistence_subdiagrams + * retrieves the original values and separates the extended persistence diagram points + * w.r.t. their types (Ord, Rel, Ext+, Ext-) and should always be called after + * computing the persistent homology of the extended simplicial complex. */ void extend_filtration() { // Compute maximum and minimum of filtration values - int maxvert = -std::numeric_limits::infinity(); + int maxvert = -std::numeric_limits::infinity(); std::vector filt; - for (auto sh : this->complex_simplex_range()) {if (this->dimension(sh) == 0){filt.push_back(this->filtration(sh)); maxvert = std::max(*this->simplex_vertex_range(sh).begin(), maxvert);}} + for (auto sh : this->complex_simplex_range()) { + if (this->dimension(sh) == 0){ + filt.push_back(this->filtration(sh)); + maxvert = std::max(*this->simplex_vertex_range(sh).begin(), maxvert); + } + } minval_ = *std::min_element(filt.begin(), filt.end()); maxval_ = *std::max_element(filt.begin(), filt.end()); maxvert += 1; @@ -1502,13 +1536,20 @@ class Simplex_tree { // Compute vectors of integers corresponding to the Simplex handles std::vector > splxs; for (auto sh : this->complex_simplex_range()) { - std::vector vr; for (auto vh : this->simplex_vertex_range(sh)){vr.push_back(vh);} + std::vector vr; + for (auto vh : this->simplex_vertex_range(sh)){ + vr.push_back(vh); + } splxs.push_back(vr); } // Add point for coning the simplicial complex int count = this->num_simplices(); - std::vector cone; cone.push_back(maxvert); auto ins = this->insert_simplex(cone, -3); this->assign_key(ins.first, count); count++; + std::vector cone; + cone.push_back(maxvert); + auto ins = this->insert_simplex(cone, -3); + this->assign_key(ins.first, count); + count++; // For each simplex for (auto vr : splxs){ @@ -1531,7 +1572,8 @@ class Simplex_tree { count++; } - this->make_filtration_non_decreasing(); this->initialize_filtration(); + this->make_filtration_non_decreasing(); + this->initialize_filtration(); } diff --git a/src/python/gudhi/simplex_tree.pxd b/src/python/gudhi/simplex_tree.pxd index 4393047f..7aa16926 100644 --- a/src/python/gudhi/simplex_tree.pxd +++ b/src/python/gudhi/simplex_tree.pxd @@ -44,7 +44,7 @@ cdef extern from "Simplex_tree_interface.h" namespace "Gudhi": bool prune_above_filtration(double filtration) bool make_filtration_non_decreasing() void extend_filtration() - vector[vector[pair[int, pair[double, double]]]] convert(vector[pair[int, pair[double, double]]]) + vector[vector[pair[int, pair[double, double]]]] compute_extended_persistence_subdiagrams(vector[pair[int, pair[double, double]]]) cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi": cdef cppclass Simplex_tree_persistence_interface "Gudhi::Persistent_cohomology_interface>": diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index cfab14f4..e429e28a 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -387,17 +387,21 @@ cdef class SimplexTree: return self.get_ptr().make_filtration_non_decreasing() def extend_filtration(self): - """ This function extends filtration for computing extended persistence. + """ Extend filtration for computing extended persistence. This function only uses the filtration values at the 0-dimensional simplices, and computes the extended persistence diagram induced by the lower-star filtration computed with these values. Note that after calling this function, the filtration values are actually modified. The function :func:`compute_extended_persistence_subdiagrams()` retrieves the original values and separates the extended persistence diagram points w.r.t. their types (Ord, Rel, Ext+, Ext-) and should always be called after computing the persistent homology of the extended simplicial complex. """ return self.get_ptr().extend_filtration() - def convert(self, dgm): - """This function retrieves good values for extended persistence, and separate the diagrams into the ordinary, relative, extended+ and extended- subdiagrams. Need extend_filtration to be called first! + def compute_extended_persistence_subdiagrams(self, dgm): + """This function retrieves good values for extended persistence, and separate the diagrams into the ordinary, relative, extended+ and extended- subdiagrams. - :param dgm: Persistence diagram obtained after calling this->extend_filtration and this->get_persistence. + :param dgm: Persistence diagram obtained after calling :func:`extend_filtration()` and :func:`persistence()`. :returns: A vector of four persistence diagrams. The first one is Ordinary, the second one is Relative, the third one is Extended+ and the fourth one is Extended-. + + .. note:: + + This function should be called only after calling :func:`extend_filtration()` and :func:`persistence()`. """ - return self.get_ptr().convert(dgm) + return self.get_ptr().compute_extended_persistence_subdiagrams(dgm) def persistence(self, homology_coeff_field=11, min_persistence=0, persistence_dim_max = False): diff --git a/src/python/test/test_simplex_tree.py b/src/python/test/test_simplex_tree.py index 1822c43b..7e3d843e 100755 --- a/src/python/test/test_simplex_tree.py +++ b/src/python/test/test_simplex_tree.py @@ -244,7 +244,85 @@ def test_make_filtration_non_decreasing(): assert st.filtration([0, 1, 6]) == 1.0 assert st.filtration([0, 1]) == 1.0 assert st.filtration([0]) == 1.0 - assert st.filtration([1]) == 1.0 - assert st.filtration([3, 4, 5]) == 2.0 - assert st.filtration([3, 4]) == 2.0 - assert st.filtration([4, 5]) == 2.0 + +def test_extend_filtration(): + + # Inserted simplex: + # 5 4 + # o o + # / \ / + # o o + # /2\ /3 + # o o + # 1 0 + + st = SimplexTree() + st.insert([0,2]) + st.insert([1,2]) + st.insert([0,3]) + st.insert([2,5]) + st.insert([3,4]) + st.insert([3,5]) + st.assign_filtration([0], 1.) + st.assign_filtration([1], 2.) + st.assign_filtration([2], 3.) + st.assign_filtration([3], 4.) + st.assign_filtration([4], 5.) + st.assign_filtration([5], 6.) + + assert st.get_filtration() == [ + ([0, 2], 0.0), + ([1, 2], 0.0), + ([0, 3], 0.0), + ([3, 4], 0.0), + ([2, 5], 0.0), + ([3, 5], 0.0), + ([0], 1.0), + ([1], 2.0), + ([2], 3.0), + ([3], 4.0), + ([4], 5.0), + ([5], 6.0) + ] + + + st.extend_filtration() + + assert st.get_filtration() == [ + ([6], -3.0), + ([0], -2.0), + ([1], -1.8), + ([2], -1.6), + ([0, 2], -1.6), + ([1, 2], -1.6), + ([3], -1.4), + ([0, 3], -1.4), + ([4], -1.2), + ([3, 4], -1.2), + ([5], -1.0), + ([2, 5], -1.0), + ([3, 5], -1.0), + ([5, 6], 1.0), + ([4, 6], 1.2), + ([3, 6], 1.4), + ([3, 4, 6], 1.4), + ([3, 5, 6], 1.4), + ([2, 6], 1.6), + ([2, 5, 6], 1.6), + ([1, 6], 1.8), + ([1, 2, 6], 1.8), + ([0, 6], 2.0), + ([0, 2, 6], 2.0), + ([0, 3, 6], 2.0) + ] + + + dgm = st.persistence() + L = st.compute_extended_persistence_subdiagrams(dgm) + assert L == [ + [(0, (1.9999999999999998, 2.9999999999999996))], + [(1, (5.0, 4.0))], + [(0, (1.0, 6.0))], + [(1, (6.0, 1.0))] + ] + -- cgit v1.2.3 From dc4442bc402ac25290eb529b57407607434bb7ae Mon Sep 17 00:00:00 2001 From: tlacombe Date: Fri, 14 Feb 2020 14:53:51 +0100 Subject: barycenter update, adding more tests and details about log (assigments, cost, nb iter) --- src/python/gudhi/barycenter.py | 125 +++++++++++-------------- src/python/test/test_wasserstein_barycenter.py | 15 ++- 2 files changed, 69 insertions(+), 71 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/barycenter.py b/src/python/gudhi/barycenter.py index 11098afe..4a00c457 100644 --- a/src/python/gudhi/barycenter.py +++ b/src/python/gudhi/barycenter.py @@ -2,6 +2,7 @@ import ot import numpy as np import scipy.spatial.distance as sc +from wasserstein import _build_dist_matrix, _perstot # This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. # See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. @@ -20,42 +21,19 @@ def _proj_on_diag(w): return np.array([(w[0] + w[1])/2 , (w[0] + w[1])/2]) -def _proj_on_diag_array(X): - ''' - :param X: (n x 2) array encoding the points of a persistent diagram. - :returns: (n x 2) array encoding the (respective orthogonal) projections of the points onto the diagonal - ''' - Z = (X[:,0] + X[:,1]) / 2. - return np.array([Z , Z]).T - - -def _build_dist_matrix(X, Y, p=2., q=2.): - ''' - :param X: (n x 2) numpy.array encoding the (points of the) first diagram. - :param Y: (m x 2) numpy.array encoding the second diagram. - :param q: Ground metric (i.e. norm l_q). - :param p: exponent for the Wasserstein metric. - :returns: (n+1) x (m+1) np.array encoding the cost matrix C. - For 1 <= i <= n, 1 <= j <= m, C[i,j] encodes the distance between X[i] and Y[j], while C[i, m+1] (resp. C[n+1, j]) encodes the distance (to the p) between X[i] (resp Y[j]) and its orthogonal proj onto the diagonal. - note also that C[n+1, m+1] = 0 (it costs nothing to move from the diagonal to the diagonal). - Note that for lagrangian_barycenter, one must use p=q=2. - ''' - Xdiag = _proj_on_diag_array(X) - Ydiag = _proj_on_diag_array(Y) - if np.isinf(q): - C = sc.cdist(X, Y, metric='chebyshev')**p - Cxd = np.linalg.norm(X - Xdiag, ord=q, axis=1)**p - Cdy = np.linalg.norm(Y - Ydiag, ord=q, axis=1)**p +def _mean(x, m): + """ + :param x: a list of 2D-points, off diagonal, x_0... x_{k-1} + :param m: total amount of points taken into account, that is we have (m-k) copies of diagonal + :returns: the weighted mean of x with (m-k) copies of the diagonal + """ + k = len(x) + if k > 0: + w = np.mean(x, axis=0) + w_delta = _proj_on_diag(w) + return (k * w + (m-k) * w_delta) / m else: - C = sc.cdist(X,Y, metric='minkowski', p=q)**p - Cxd = np.linalg.norm(X - Xdiag, ord=q, axis=1)**p - Cdy = np.linalg.norm(Y - Ydiag, ord=q, axis=1)**p - Cf = np.hstack((C, Cxd[:,None])) - Cdy = np.append(Cdy, 0) - - Cf = np.vstack((Cf, Cdy[None,:])) - - return Cf + return np.array([0, 0]) def _optimal_matching(X, Y, withcost=False): @@ -64,63 +42,63 @@ def _optimal_matching(X, Y, withcost=False): :param Y: numpy.array of size (m x 2) :param withcost: returns also the cost corresponding to this optimal matching :returns: numpy.array of shape (k x 2) encoding the list of edges in the optimal matching. - That is, [(i, j) ...], where (i,j) indicates that X[i] is matched to Y[j] - if i > len(X) or j > len(Y), it means they represent the diagonal. - + That is, [[i, j] ...], where (i,j) indicates that X[i] is matched to Y[j] + if i >= len(X) or j >= len(Y), it means they represent the diagonal. + They will be encoded by -1 afterwards. """ n = len(X) m = len(Y) + # Start by handling empty diagrams. Could it be shorten? if X.size == 0: # X is empty if Y.size == 0: # Y is empty - return np.array([[0,0]]) # the diagonal is matched to the diagonal and that's it... - else: - return np.column_stack([np.zeros(m+1, dtype=int), np.arange(m+1, dtype=int)]) + res = np.array([[0,0]]) # the diagonal is matched to the diagonal and that's it... + if withcost: + return res, 0 + else: + return res + else: # X is empty but not Y + res = np.array([[0, i] for i in range(m)]) + cost = _perstot(Y, order=2, internal_p=2)**2 + if withcost: + return res, cost + else: + return res elif Y.size == 0: # X is not empty but Y is empty - return np.column_stack([np.zeros(n+1, dtype=int), np.arange(n+1, dtype=int)]) - + res = np.array([[i,0] for i in range(n)]) + cost = _perstot(X, order=2, internal_p=2)**2 + if withcost: + return res, cost + else: + return res + # we know X, Y are not empty diags now - M = _build_dist_matrix(X, Y) + M = _build_dist_matrix(X, Y, order=2, internal_p=2) a = np.full(n+1, 1. / (n + m) ) # weight vector of the input diagram. Uniform here. a[-1] = a[-1] * m # normalized so that we have a probability measure, required by POT b = np.full(m+1, 1. / (n + m) ) # weight vector of the input diagram. Uniform here. b[-1] = b[-1] * n # so that we have a probability measure, required by POT P = ot.emd(a=a, b=b, M=M)*(n+m) - # Note : it seems POT return a permutation matrix in this situation, ie a vertex of the constraint set (generically true). + # Note : it seems POT returns a permutation matrix in this situation, ie a vertex of the constraint set (generically true). if withcost: - cost = np.sqrt(np.sum(np.multiply(P, M))) + cost = np.sum(np.multiply(P, M)) P[P < 0.5] = 0 # dirty trick to avoid some numerical issues... to be improved. - # return the list of (i,j) such that P[i,j] > 0, i.e. x_i is matched to y_j (should it be the diag). res = np.nonzero(P) + # return the list of (i,j) such that P[i,j] > 0, i.e. x_i is matched to y_j (should it be the diag). if withcost: return np.column_stack(res), cost return np.column_stack(res) -def _mean(x, m): - """ - :param x: a list of 2D-points, off diagonal, x_0... x_{k-1} - :param m: total amount of points taken into account, that is we have (m-k) copies of diagonal - :returns: the weighted mean of x with (m-k) copies of the diagonal - """ - k = len(x) - if k > 0: - w = np.mean(x, axis=0) - w_delta = _proj_on_diag(w) - return (k * w + (m-k) * w_delta) / m - else: - return np.array([0, 0]) - - def lagrangian_barycenter(pdiagset, init=None, verbose=False): """ Compute the estimated barycenter computed with the algorithm provided by Turner et al (2014). It is a local minimum of the corresponding Frechet function. - :param pdiagset: a list of size N containing numpy.array of shape (n x 2) + :param pdiagset: a list of size m containing numpy.array of shape (n x 2) (n can variate), encoding a set of persistence diagrams with only finite coordinates. :param init: The initial value for barycenter estimate. @@ -134,10 +112,13 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False): If verbose, returns a couple (Y, log) where Y is the barycenter estimate, and log is a dict that contains additional informations: - - assigments, a list of list of pairs (i,j), - That is, a[k] = [(i, j) ...], where (i,j) indicates that X[i] is matched to Y[j] + - groupings, a list of list of pairs (i,j), + That is, G[k] = [(i, j) ...], where (i,j) indicates that X[i] is matched to Y[j] if i > len(X) or j > len(Y), it means they represent the diagonal. - - energy, a float representing the Frechet mean value obtained. + - energy, a float representing the Frechet energy value obtained, + that is the mean of squared distances of observations to the output. + - nb_iter, integer representing the number of iterations performed before convergence + of the algorithm. """ X = pdiagset # to shorten notations, not a copy m = len(X) # number of diagrams we are averaging @@ -157,8 +138,11 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False): else: Y = init.copy() + nb_iter = 0 + converged = False # stoping criterion while not converged: + nb_iter += 1 K = len(Y) # current nb of points in Y (some might be on diagonal) G = np.zeros((K, m), dtype=int)-1 # will store for each j, the (index) point matched in each other diagram (might be the diagonal). # that is G[j, i] = k <=> y_j is matched to @@ -185,7 +169,6 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False): new_created_points.append(new_y) # Step 2 : Update current point position thanks to the groupings computed - to_delete = [] for j in range(K): matched_points = [X[i][G[j, i]] for i in range(m) if G[j, i] > -1] @@ -214,12 +197,16 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False): n_y = len(Y) for i in range(m): edges, cost = _optimal_matching(Y, X[i], withcost=True) - print(edges) - groupings.append([x_i_j for (y_j, x_i_j) in enumerate(edges) if y_j < n_y]) + n_x = len(X[i]) + G = edges[np.where(edges[:,0]= n_x) + G[idx,1] = -1 # -1 will encode the diagonal + groupings.append(G) energy += cost log["groupings"] = groupings energy = energy/m log["energy"] = energy + log["nb_iter"] = nb_iter return Y, log else: diff --git a/src/python/test/test_wasserstein_barycenter.py b/src/python/test/test_wasserstein_barycenter.py index 910d23ff..07242582 100755 --- a/src/python/test/test_wasserstein_barycenter.py +++ b/src/python/test/test_wasserstein_barycenter.py @@ -27,7 +27,18 @@ def test_lagrangian_barycenter(): res = np.array([[0.27916667, 0.55416667], [0.7375, 0.7625], [0.2375, 0.2625]]) dg7 = np.array([[0.1, 0.15], [0.1, 0.7], [0.2, 0.22], [0.55, 0.84], [0.11, 0.91], [0.61, 0.75], [0.33, 0.46], [0.12, 0.41], [0.32, 0.48]]) + dg8 = np.array([[0., 4.]]) + + # error crit. + eps = 0.000001 - assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg1, dg2, dg3, dg4],init=3, verbose=False) - res) < 0.001 + + assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg1, dg2, dg3, dg4],init=3, verbose=False) - res) < eps assert np.array_equal(lagrangian_barycenter(pdiagset=[dg4, dg5, dg6], verbose=False), np.empty(shape=(0,2))) - assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg7], verbose=False) - dg7) < 0.001 + assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg7], verbose=False) - dg7) < eps + Y, log = lagrangian_barycenter(pdiagset=[dg4, dg8], verbose=True) + assert np.linalg.norm(Y - np.array([[1,3]])) < eps + assert np.abs(log["energy"] - 2) < eps + assert np.array_equal(log["groupings"][0] , np.array([[0, -1]])) + assert np.array_equal(log["groupings"][1] , np.array([[0, 0]])) + assert lagrangian_barycenter(pdiagset = []) is None -- cgit v1.2.3 From dc5c7ac2167bfa467b52d0a36ecb9999fe03ba91 Mon Sep 17 00:00:00 2001 From: tlacombe Date: Fri, 14 Feb 2020 14:58:53 +0100 Subject: added two more tests for barycenter --- src/python/test/test_wasserstein_barycenter.py | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/python/test/test_wasserstein_barycenter.py b/src/python/test/test_wasserstein_barycenter.py index 07242582..a58a4d62 100755 --- a/src/python/test/test_wasserstein_barycenter.py +++ b/src/python/test/test_wasserstein_barycenter.py @@ -41,4 +41,5 @@ def test_lagrangian_barycenter(): assert np.abs(log["energy"] - 2) < eps assert np.array_equal(log["groupings"][0] , np.array([[0, -1]])) assert np.array_equal(log["groupings"][1] , np.array([[0, 0]])) + assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg8, dg4], init=np.array([[0.2, 0.6], [0.5, 0.7]]), verbose=False) - np.array([[1, 3]])) < eps assert lagrangian_barycenter(pdiagset = []) is None -- cgit v1.2.3 From 3eaba12b66518717e90ffb1e410b7f8d769719cf Mon Sep 17 00:00:00 2001 From: tlacombe Date: Fri, 14 Feb 2020 15:41:23 +0100 Subject: update import gudhi.wasserstein --- src/python/gudhi/barycenter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/python/gudhi/barycenter.py b/src/python/gudhi/barycenter.py index 4a00c457..a2af7a58 100644 --- a/src/python/gudhi/barycenter.py +++ b/src/python/gudhi/barycenter.py @@ -2,7 +2,7 @@ import ot import numpy as np import scipy.spatial.distance as sc -from wasserstein import _build_dist_matrix, _perstot +from gudhi.wasserstein import _build_dist_matrix, _perstot # This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. # See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. -- cgit v1.2.3 From f8fe3fdb01f6161b57da732a1c3f0c14a8b359a6 Mon Sep 17 00:00:00 2001 From: tlacombe Date: Fri, 14 Feb 2020 18:45:34 +0100 Subject: moved import after docstring + reduce lines < 80 char --- src/python/gudhi/barycenter.py | 99 +++++++++++++++++++++++++----------------- 1 file changed, 59 insertions(+), 40 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/barycenter.py b/src/python/gudhi/barycenter.py index a2af7a58..4a877b4a 100644 --- a/src/python/gudhi/barycenter.py +++ b/src/python/gudhi/barycenter.py @@ -1,9 +1,3 @@ -import ot -import numpy as np -import scipy.spatial.distance as sc - -from gudhi.wasserstein import _build_dist_matrix, _perstot - # This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. # See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. # Author(s): Theo Lacombe @@ -14,6 +8,13 @@ from gudhi.wasserstein import _build_dist_matrix, _perstot # - YYYY/MM Author: Description of the modification +import ot +import numpy as np +import scipy.spatial.distance as sc + +from gudhi.wasserstein import _build_dist_matrix, _perstot + + def _proj_on_diag(w): ''' Util function to project a point on the diag. @@ -24,7 +25,8 @@ def _proj_on_diag(w): def _mean(x, m): """ :param x: a list of 2D-points, off diagonal, x_0... x_{k-1} - :param m: total amount of points taken into account, that is we have (m-k) copies of diagonal + :param m: total amount of points taken into account, + that is we have (m-k) copies of diagonal :returns: the weighted mean of x with (m-k) copies of the diagonal """ k = len(x) @@ -40,11 +42,14 @@ def _optimal_matching(X, Y, withcost=False): """ :param X: numpy.array of size (n x 2) :param Y: numpy.array of size (m x 2) - :param withcost: returns also the cost corresponding to this optimal matching - :returns: numpy.array of shape (k x 2) encoding the list of edges in the optimal matching. - That is, [[i, j] ...], where (i,j) indicates that X[i] is matched to Y[j] - if i >= len(X) or j >= len(Y), it means they represent the diagonal. - They will be encoded by -1 afterwards. + :param withcost: returns also the cost corresponding to the optimal matching + :returns: numpy.array of shape (k x 2) encoding the list of edges + in the optimal matching. + That is, [[i, j] ...], where (i,j) indicates + that X[i] is matched to Y[j] + if i >= len(X) or j >= len(Y), it means they + represent the diagonal. + They will be encoded by -1 afterwards. """ n = len(X) @@ -52,7 +57,7 @@ def _optimal_matching(X, Y, withcost=False): # Start by handling empty diagrams. Could it be shorten? if X.size == 0: # X is empty if Y.size == 0: # Y is empty - res = np.array([[0,0]]) # the diagonal is matched to the diagonal and that's it... + res = np.array([[0,0]]) # the diagonal is matched to the diagonal if withcost: return res, 0 else: @@ -75,18 +80,20 @@ def _optimal_matching(X, Y, withcost=False): # we know X, Y are not empty diags now M = _build_dist_matrix(X, Y, order=2, internal_p=2) - a = np.full(n+1, 1. / (n + m) ) # weight vector of the input diagram. Uniform here. - a[-1] = a[-1] * m # normalized so that we have a probability measure, required by POT - b = np.full(m+1, 1. / (n + m) ) # weight vector of the input diagram. Uniform here. - b[-1] = b[-1] * n # so that we have a probability measure, required by POT + a = np.full(n+1, 1. / (n + m) ) + a[-1] = a[-1] * m + b = np.full(m+1, 1. / (n + m) ) + b[-1] = b[-1] * n P = ot.emd(a=a, b=b, M=M)*(n+m) - # Note : it seems POT returns a permutation matrix in this situation, ie a vertex of the constraint set (generically true). + # Note : it seems POT returns a permutation matrix in this situation, + # ie a vertex of the constraint set (generically true). if withcost: cost = np.sum(np.multiply(P, M)) - P[P < 0.5] = 0 # dirty trick to avoid some numerical issues... to be improved. + P[P < 0.5] = 0 # dirty trick to avoid some numerical issues... to improve. res = np.nonzero(P) - # return the list of (i,j) such that P[i,j] > 0, i.e. x_i is matched to y_j (should it be the diag). + # return the list of (i,j) such that P[i,j] > 0, + #i.e. x_i is matched to y_j (should it be the diag). if withcost: return np.column_stack(res), cost @@ -103,31 +110,38 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False): persistence diagrams with only finite coordinates. :param init: The initial value for barycenter estimate. If None, init is made on a random diagram from the dataset. - Otherwise, it must be an int (then we init with diagset[init]) - or a (n x 2) numpy.array enconding a persistence diagram with n points. + Otherwise, it must be an int + (then we init with diagset[init]) + or a (n x 2) numpy.array enconding + a persistence diagram with n points. :param verbose: if True, returns additional information about the barycenter. :returns: If not verbose (default), a numpy.array encoding - the barycenter estimate (local minima of the energy function). + the barycenter estimate + (local minima of the energy function). If verbose, returns a couple (Y, log) where Y is the barycenter estimate, and log is a dict that contains additional informations: - groupings, a list of list of pairs (i,j), - That is, G[k] = [(i, j) ...], where (i,j) indicates that X[i] is matched to Y[j] - if i > len(X) or j > len(Y), it means they represent the diagonal. - - energy, a float representing the Frechet energy value obtained, - that is the mean of squared distances of observations to the output. - - nb_iter, integer representing the number of iterations performed before convergence - of the algorithm. + That is, G[k] = [(i, j) ...], where (i,j) indicates + that X[i] is matched to Y[j] + if i > len(X) or j > len(Y), it means they + represent the diagonal. + - energy, a float representing the Frechet + energy value obtained, + that is the mean of squared distances + of observations to the output. + - nb_iter, integer representing the number of iterations + performed before convergence of the algorithm. """ X = pdiagset # to shorten notations, not a copy m = len(X) # number of diagrams we are averaging if m == 0: print("Warning: computing barycenter of empty diag set. Returns None") return None - - nb_off_diag = np.array([len(X_i) for X_i in X]) # store the number of off-diagonal point for each of the X_i - + + # store the number of off-diagonal point for each of the X_i + nb_off_diag = np.array([len(X_i) for X_i in X]) # Initialisation of barycenter if init is None: i0 = np.random.randint(m) # Index of first state for the barycenter @@ -144,7 +158,9 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False): while not converged: nb_iter += 1 K = len(Y) # current nb of points in Y (some might be on diagonal) - G = np.zeros((K, m), dtype=int)-1 # will store for each j, the (index) point matched in each other diagram (might be the diagonal). + G = np.zeros((K, m), dtype=int)-1 # will store for each j, the (index) + # point matched in each other diagram + #(might be the diagonal). # that is G[j, i] = k <=> y_j is matched to # x_k in the diagram i-th diagram X[i] updated_points = np.zeros((K, 2)) # will store the new positions of @@ -159,16 +175,19 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False): indices = _optimal_matching(Y, X[i]) for y_j, x_i_j in indices: if y_j < K: # we matched an off diagonal point to x_i_j... - if x_i_j < nb_off_diag[i]: # ...which is also an off-diagonal point + # ...which is also an off-diagonal point. + if x_i_j < nb_off_diag[i]: G[y_j, i] = x_i_j else: # ...which is a diagonal point G[y_j, i] = -1 # -1 stands for the diagonal (mask) else: # We matched a diagonal point to x_i_j... - if x_i_j < nb_off_diag[i]: # which is a off-diag point ! so we need to create a new point in Y - new_y = _mean(np.array([X[i][x_i_j]]), m) # Average this point with (m-1) copies of Delta + if x_i_j < nb_off_diag[i]: # which is a off-diag point ! + # need to create new point in Y + new_y = _mean(np.array([X[i][x_i_j]]), m) + # Average this point with (m-1) copies of Delta new_created_points.append(new_y) - # Step 2 : Update current point position thanks to the groupings computed + # Step 2 : Update current point position thanks to groupings computed to_delete = [] for j in range(K): matched_points = [X[i][G[j, i]] for i in range(m) if G[j, i] > -1] @@ -178,10 +197,10 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False): else: # this points is no longer of any use. to_delete.append(j) # we remove the point to be deleted now. - updated_points = np.delete(updated_points, to_delete, axis=0) # cannot be done in-place. - + updated_points = np.delete(updated_points, to_delete, axis=0) - if new_created_points: # we cannot converge if there have been new created points. + # we cannot converge if there have been new created points. + if new_created_points: Y = np.concatenate((updated_points, new_created_points)) else: # Step 3 : we check convergence -- cgit v1.2.3 From 5e4bc93510f50dacdb59f1a7578aca72817c9631 Mon Sep 17 00:00:00 2001 From: tlacombe Date: Mon, 17 Feb 2020 17:50:37 +0100 Subject: update doc + removed normalization + use argwhere --- src/python/doc/barycenter_user.rst | 7 ++++++- src/python/gudhi/barycenter.py | 29 ++++++++++++----------------- 2 files changed, 18 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/python/doc/barycenter_user.rst b/src/python/doc/barycenter_user.rst index 714d807e..f81e9358 100644 --- a/src/python/doc/barycenter_user.rst +++ b/src/python/doc/barycenter_user.rst @@ -9,7 +9,8 @@ Definition .. include:: barycenter_sum.inc -This implementation is based on ideas from "Frechet means for distribution of persistence diagrams", Turner et al. 2014. +This implementation is based on ideas from "Frechet means for distribution of +persistence diagrams", Turner et al. 2014. Function -------- @@ -21,6 +22,10 @@ Basic example This example computes the Frechet mean (aka Wasserstein barycenter) between four persistence diagrams. It is initialized on the 4th diagram, which is the empty diagram. It is encoded by np.array([]). +As the algorithm is not convex, its output depends on the initialization and is only a local minimum of the objective function. +Initialization can be either given as an integer (in which case the i-th diagram of the list is used as initial estimate) +or as a diagram. +If None, it will randomly select one of the diagram of the list as initial estimate. Note that persistence diagrams must be submitted as (n x 2) numpy arrays and must not contain inf values. .. testcode:: diff --git a/src/python/gudhi/barycenter.py b/src/python/gudhi/barycenter.py index 4a877b4a..c54066ec 100644 --- a/src/python/gudhi/barycenter.py +++ b/src/python/gudhi/barycenter.py @@ -15,12 +15,6 @@ import scipy.spatial.distance as sc from gudhi.wasserstein import _build_dist_matrix, _perstot -def _proj_on_diag(w): - ''' - Util function to project a point on the diag. - ''' - return np.array([(w[0] + w[1])/2 , (w[0] + w[1])/2]) - def _mean(x, m): """ @@ -32,7 +26,7 @@ def _mean(x, m): k = len(x) if k > 0: w = np.mean(x, axis=0) - w_delta = _proj_on_diag(w) + w_delta = (w[0] + w[1]) / 2 * np.ones(2) return (k * w + (m-k) * w_delta) / m else: return np.array([0, 0]) @@ -80,31 +74,32 @@ def _optimal_matching(X, Y, withcost=False): # we know X, Y are not empty diags now M = _build_dist_matrix(X, Y, order=2, internal_p=2) - a = np.full(n+1, 1. / (n + m) ) - a[-1] = a[-1] * m - b = np.full(m+1, 1. / (n + m) ) - b[-1] = b[-1] * n - P = ot.emd(a=a, b=b, M=M)*(n+m) + a = np.ones(n+1) + a[-1] = m + b = np.ones(m+1) + b[-1] = n + P = ot.emd(a=a, b=b, M=M) # Note : it seems POT returns a permutation matrix in this situation, # ie a vertex of the constraint set (generically true). if withcost: cost = np.sum(np.multiply(P, M)) P[P < 0.5] = 0 # dirty trick to avoid some numerical issues... to improve. - res = np.nonzero(P) + res = np.argwhere(P) # return the list of (i,j) such that P[i,j] > 0, #i.e. x_i is matched to y_j (should it be the diag). if withcost: - return np.column_stack(res), cost - - return np.column_stack(res) + return res, cost + return res def lagrangian_barycenter(pdiagset, init=None, verbose=False): """ - Compute the estimated barycenter computed with the algorithm provided + Returns the estimated barycenter computed with the algorithm provided by Turner et al (2014). + As the algorithm is not convex, the output depends on initialization. It is a local minimum of the corresponding Frechet function. + :param pdiagset: a list of size m containing numpy.array of shape (n x 2) (n can variate), encoding a set of persistence diagrams with only finite coordinates. -- cgit v1.2.3 From 16e80e921e1edbc63398f7dbc342bd25d1f169de Mon Sep 17 00:00:00 2001 From: tlacombe Date: Mon, 17 Feb 2020 17:53:39 +0100 Subject: removed message about empty dgm --- src/python/doc/barycenter_user.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/python/doc/barycenter_user.rst b/src/python/doc/barycenter_user.rst index f81e9358..59f758fa 100644 --- a/src/python/doc/barycenter_user.rst +++ b/src/python/doc/barycenter_user.rst @@ -21,7 +21,7 @@ Basic example ------------- This example computes the Frechet mean (aka Wasserstein barycenter) between four persistence diagrams. -It is initialized on the 4th diagram, which is the empty diagram. It is encoded by np.array([]). +It is initialized on the 4th diagram. As the algorithm is not convex, its output depends on the initialization and is only a local minimum of the objective function. Initialization can be either given as an integer (in which case the i-th diagram of the list is used as initial estimate) or as a diagram. -- cgit v1.2.3 From a9b0d8185ecab51428c1aeeb3bf78787420103b2 Mon Sep 17 00:00:00 2001 From: tlacombe Date: Mon, 17 Feb 2020 17:54:01 +0100 Subject: specified that the alg returns None if input is empty --- src/python/gudhi/barycenter.py | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/python/gudhi/barycenter.py b/src/python/gudhi/barycenter.py index c54066ec..dc9e8241 100644 --- a/src/python/gudhi/barycenter.py +++ b/src/python/gudhi/barycenter.py @@ -103,6 +103,7 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False): :param pdiagset: a list of size m containing numpy.array of shape (n x 2) (n can variate), encoding a set of persistence diagrams with only finite coordinates. + If empty, returns None. :param init: The initial value for barycenter estimate. If None, init is made on a random diagram from the dataset. Otherwise, it must be an int -- cgit v1.2.3 From 59f046cd0f405b124a6e08f26ca7b0248f707374 Mon Sep 17 00:00:00 2001 From: tlacombe Date: Mon, 24 Feb 2020 10:14:09 +0100 Subject: update doc for barycenter --- src/python/doc/index.rst | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src') diff --git a/src/python/doc/index.rst b/src/python/doc/index.rst index 3387a64f..96cd3513 100644 --- a/src/python/doc/index.rst +++ b/src/python/doc/index.rst @@ -71,6 +71,11 @@ Wasserstein distance .. include:: wasserstein_distance_sum.inc +Barycenter +============ + +.. include:: barycenter_sum.inc + Persistence representations =========================== -- cgit v1.2.3 From 3e15e9fe5bffb0ffcf8f7f3a0dac1c331646630a Mon Sep 17 00:00:00 2001 From: tlacombe Date: Mon, 24 Feb 2020 10:14:31 +0100 Subject: changed double quote into simple quote to be consistent with wasserstein.py --- src/python/gudhi/barycenter.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/barycenter.py b/src/python/gudhi/barycenter.py index dc9e8241..4e132c23 100644 --- a/src/python/gudhi/barycenter.py +++ b/src/python/gudhi/barycenter.py @@ -17,12 +17,12 @@ from gudhi.wasserstein import _build_dist_matrix, _perstot def _mean(x, m): - """ + ''' :param x: a list of 2D-points, off diagonal, x_0... x_{k-1} :param m: total amount of points taken into account, that is we have (m-k) copies of diagonal :returns: the weighted mean of x with (m-k) copies of the diagonal - """ + ''' k = len(x) if k > 0: w = np.mean(x, axis=0) @@ -33,7 +33,7 @@ def _mean(x, m): def _optimal_matching(X, Y, withcost=False): - """ + ''' :param X: numpy.array of size (n x 2) :param Y: numpy.array of size (m x 2) :param withcost: returns also the cost corresponding to the optimal matching @@ -44,7 +44,7 @@ def _optimal_matching(X, Y, withcost=False): if i >= len(X) or j >= len(Y), it means they represent the diagonal. They will be encoded by -1 afterwards. - """ + ''' n = len(X) m = len(Y) @@ -94,7 +94,7 @@ def _optimal_matching(X, Y, withcost=False): def lagrangian_barycenter(pdiagset, init=None, verbose=False): - """ + ''' Returns the estimated barycenter computed with the algorithm provided by Turner et al (2014). As the algorithm is not convex, the output depends on initialization. @@ -129,7 +129,7 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False): of observations to the output. - nb_iter, integer representing the number of iterations performed before convergence of the algorithm. - """ + ''' X = pdiagset # to shorten notations, not a copy m = len(X) # number of diagrams we are averaging if m == 0: -- cgit v1.2.3 From 2dc7b150576d959b489d3f52890242fd6a492171 Mon Sep 17 00:00:00 2001 From: tlacombe Date: Mon, 24 Feb 2020 13:18:38 +0100 Subject: changed doc for CI ? --- src/python/gudhi/barycenter.py | 5 ----- 1 file changed, 5 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/barycenter.py b/src/python/gudhi/barycenter.py index 4e132c23..a41b5906 100644 --- a/src/python/gudhi/barycenter.py +++ b/src/python/gudhi/barycenter.py @@ -95,11 +95,6 @@ def _optimal_matching(X, Y, withcost=False): def lagrangian_barycenter(pdiagset, init=None, verbose=False): ''' - Returns the estimated barycenter computed with the algorithm provided - by Turner et al (2014). - As the algorithm is not convex, the output depends on initialization. - It is a local minimum of the corresponding Frechet function. - :param pdiagset: a list of size m containing numpy.array of shape (n x 2) (n can variate), encoding a set of persistence diagrams with only finite coordinates. -- cgit v1.2.3 From 0998cecac7f15e3c68058d33acc21fb427f803e9 Mon Sep 17 00:00:00 2001 From: tlacombe Date: Fri, 28 Feb 2020 11:18:59 +0100 Subject: shorten < 80 char the doc --- src/python/doc/barycenter_user.rst | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/python/doc/barycenter_user.rst b/src/python/doc/barycenter_user.rst index 59f758fa..83e9bebb 100644 --- a/src/python/doc/barycenter_user.rst +++ b/src/python/doc/barycenter_user.rst @@ -20,13 +20,17 @@ Function Basic example ------------- -This example computes the Frechet mean (aka Wasserstein barycenter) between four persistence diagrams. +This example computes the Frechet mean (aka Wasserstein barycenter) between +four persistence diagrams. It is initialized on the 4th diagram. -As the algorithm is not convex, its output depends on the initialization and is only a local minimum of the objective function. -Initialization can be either given as an integer (in which case the i-th diagram of the list is used as initial estimate) -or as a diagram. -If None, it will randomly select one of the diagram of the list as initial estimate. -Note that persistence diagrams must be submitted as (n x 2) numpy arrays and must not contain inf values. +As the algorithm is not convex, its output depends on the initialization and +is only a local minimum of the objective function. +Initialization can be either given as an integer (in which case the i-th +diagram of the list is used as initial estimate) or as a diagram. +If None, it will randomly select one of the diagram of the list +as initial estimate. +Note that persistence diagrams must be submitted as +(n x 2) numpy arrays and must not contain inf values. .. testcode:: @@ -37,8 +41,8 @@ Note that persistence diagrams must be submitted as (n x 2) numpy arrays and mus dg2 = np.array([[0.2, 0.7]]) dg3 = np.array([[0.3, 0.6], [0.7, 0.8], [0.2, 0.3]]) dg4 = np.array([]) - - bary = gudhi.barycenter.lagrangian_barycenter(pdiagset=[dg1, dg2, dg3, dg4],init=3) + pdiagset = [dg1, dg2, dg3, dg4] + bary = gudhi.barycenter.lagrangian_barycenter(pdiagset=pdiagset,init=3) message = "Wasserstein barycenter estimated:" print(message) -- cgit v1.2.3 From 4b546a43fe14178dcfb2b327e27a580fc9811499 Mon Sep 17 00:00:00 2001 From: tlacombe Date: Mon, 16 Mar 2020 13:16:04 +0100 Subject: update doc (indentation, mention of -1 for the diag) and added a few more tests --- src/python/gudhi/barycenter.py | 30 +++++++++++++------------- src/python/test/test_wasserstein_barycenter.py | 15 +++++++------ 2 files changed, 23 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/barycenter.py b/src/python/gudhi/barycenter.py index a41b5906..3af12c14 100644 --- a/src/python/gudhi/barycenter.py +++ b/src/python/gudhi/barycenter.py @@ -96,9 +96,8 @@ def _optimal_matching(X, Y, withcost=False): def lagrangian_barycenter(pdiagset, init=None, verbose=False): ''' :param pdiagset: a list of size m containing numpy.array of shape (n x 2) - (n can variate), encoding a set of - persistence diagrams with only finite coordinates. - If empty, returns None. + (n can variate), encoding a set of + persistence diagrams with only finite coordinates. :param init: The initial value for barycenter estimate. If None, init is made on a random diagram from the dataset. Otherwise, it must be an int @@ -106,24 +105,25 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False): or a (n x 2) numpy.array enconding a persistence diagram with n points. :param verbose: if True, returns additional information about the - barycenter. + barycenter. :returns: If not verbose (default), a numpy.array encoding - the barycenter estimate + the barycenter estimate of pdiagset (local minima of the energy function). + If pdiagset is empty, returns None. If verbose, returns a couple (Y, log) where Y is the barycenter estimate, and log is a dict that contains additional informations: - groupings, a list of list of pairs (i,j), - That is, G[k] = [(i, j) ...], where (i,j) indicates - that X[i] is matched to Y[j] - if i > len(X) or j > len(Y), it means they - represent the diagonal. + That is, G[k] = [(i, j) ...], where (i,j) indicates + that X[i] is matched to Y[j] + if i = -1 or j = -1, it means they + represent the diagonal. - energy, a float representing the Frechet - energy value obtained, - that is the mean of squared distances - of observations to the output. + energy value obtained, + that is the mean of squared distances + of observations to the output. - nb_iter, integer representing the number of iterations - performed before convergence of the algorithm. + performed before convergence of the algorithm. ''' X = pdiagset # to shorten notations, not a copy m = len(X) # number of diagrams we are averaging @@ -136,7 +136,7 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False): # Initialisation of barycenter if init is None: i0 = np.random.randint(m) # Index of first state for the barycenter - Y = X[i0].copy() #copy() ensure that we do not modify X[i0] + Y = X[i0].copy() else: if type(init)==int: Y = X[init].copy() @@ -149,7 +149,7 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False): while not converged: nb_iter += 1 K = len(Y) # current nb of points in Y (some might be on diagonal) - G = np.zeros((K, m), dtype=int)-1 # will store for each j, the (index) + G = np.full((K, m), -1, dtype=int) # will store for each j, the (index) # point matched in each other diagram #(might be the diagonal). # that is G[j, i] = k <=> y_j is matched to diff --git a/src/python/test/test_wasserstein_barycenter.py b/src/python/test/test_wasserstein_barycenter.py index a58a4d62..5167cb84 100755 --- a/src/python/test/test_wasserstein_barycenter.py +++ b/src/python/test/test_wasserstein_barycenter.py @@ -27,19 +27,20 @@ def test_lagrangian_barycenter(): res = np.array([[0.27916667, 0.55416667], [0.7375, 0.7625], [0.2375, 0.2625]]) dg7 = np.array([[0.1, 0.15], [0.1, 0.7], [0.2, 0.22], [0.55, 0.84], [0.11, 0.91], [0.61, 0.75], [0.33, 0.46], [0.12, 0.41], [0.32, 0.48]]) - dg8 = np.array([[0., 4.]]) + dg8 = np.array([[0., 4.], [4, 8]]) # error crit. - eps = 0.000001 + eps = 1e-7 assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg1, dg2, dg3, dg4],init=3, verbose=False) - res) < eps assert np.array_equal(lagrangian_barycenter(pdiagset=[dg4, dg5, dg6], verbose=False), np.empty(shape=(0,2))) assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg7], verbose=False) - dg7) < eps Y, log = lagrangian_barycenter(pdiagset=[dg4, dg8], verbose=True) - assert np.linalg.norm(Y - np.array([[1,3]])) < eps - assert np.abs(log["energy"] - 2) < eps - assert np.array_equal(log["groupings"][0] , np.array([[0, -1]])) - assert np.array_equal(log["groupings"][1] , np.array([[0, 0]])) - assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg8, dg4], init=np.array([[0.2, 0.6], [0.5, 0.7]]), verbose=False) - np.array([[1, 3]])) < eps + assert np.linalg.norm(Y - np.array([[1,3], [5, 7]])) < eps + assert np.abs(log["energy"] - 4) < eps + assert np.array_equal(log["groupings"][0] , np.array([[0, -1], [1, -1]])) + assert np.array_equal(log["groupings"][1] , np.array([[0, 0], [1, 1]])) + assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg8, dg4], init=np.array([[0.2, 0.6], [0.5, 0.7]]), verbose=False) - np.array([[1, 3], [5, 7]])) < eps assert lagrangian_barycenter(pdiagset = []) is None + -- cgit v1.2.3 From aa93247860bb01e3fc15926658dd9e6a95198f3d Mon Sep 17 00:00:00 2001 From: tlacombe Date: Mon, 16 Mar 2020 13:18:58 +0100 Subject: added mention that _optimal matching should be removed at some point --- src/python/gudhi/barycenter.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src') diff --git a/src/python/gudhi/barycenter.py b/src/python/gudhi/barycenter.py index 3af12c14..517cdb2f 100644 --- a/src/python/gudhi/barycenter.py +++ b/src/python/gudhi/barycenter.py @@ -44,6 +44,9 @@ def _optimal_matching(X, Y, withcost=False): if i >= len(X) or j >= len(Y), it means they represent the diagonal. They will be encoded by -1 afterwards. + + NOTE : this code will be removed for final merge, + and wasserstein.optimal_matching will be used instead. ''' n = len(X) -- cgit v1.2.3 From 6e289999fab86bf06cd69c5b7b846c4f26e0a525 Mon Sep 17 00:00:00 2001 From: MathieuCarriere Date: Tue, 17 Mar 2020 00:13:32 -0400 Subject: fixes --- src/Simplex_tree/include/gudhi/Simplex_tree.h | 74 +++++++++++++++------------ src/python/test/test_simplex_tree.py | 12 ++--- 2 files changed, 47 insertions(+), 39 deletions(-) (limited to 'src') diff --git a/src/Simplex_tree/include/gudhi/Simplex_tree.h b/src/Simplex_tree/include/gudhi/Simplex_tree.h index 7be14bce..02f2c7e9 100644 --- a/src/Simplex_tree/include/gudhi/Simplex_tree.h +++ b/src/Simplex_tree/include/gudhi/Simplex_tree.h @@ -1354,6 +1354,7 @@ class Simplex_tree { // Replacing if(f=max)) would mean that if f is NaN, we replace it with the max of the children. // That seems more useful than keeping NaN. if (!(simplex.second.filtration() >= max_filt_border_value)) { + // Store the filtration modification information modified = true; simplex.second.assign_filtration(max_filt_border_value); @@ -1473,15 +1474,21 @@ class Simplex_tree { /** \brief Retrieve good values for extended persistence, and separate the * diagrams into the ordinary, relative, extended+ and extended- subdiagrams. - * Need extend_filtration to be called first! + * \post This function should be called only if extend_filtration has been called first! + * \post The coordinates of the persistence diagram points might be a little different than the + * original filtration values due to the internal transformation (scaling to [-2,-1]) that is + * performed on these values during the computation of extended persistence. * @param[in] dgm Persistence diagram obtained after calling this->extend_filtration * and this->get_persistence. * @return A vector of four persistence diagrams. The first one is Ordinary, the * second one is Relative, the third one is Extended+ and the fourth one is Extended-. + * See section 2.2 in https://link.springer.com/article/10.1007/s10208-017-9370-z for a description of these subtypes. */ std::vector>>> compute_extended_persistence_subdiagrams(const std::vector>>& dgm){ std::vector>>> new_dgm(4); double x, y; + double minval_ = this->minval_; + double maxval_ = this->maxval_; for(unsigned int i = 0; i < dgm.size(); i++){ int h = dgm[i].first; double px = dgm[i].second.first; @@ -1516,69 +1523,70 @@ class Simplex_tree { /** \brief Extend filtration for computing extended persistence. * This function only uses the filtration values at the 0-dimensional simplices, * and computes the extended persistence diagram induced by the lower-star filtration - * computed with these values. Note that after calling this function, the filtration + * computed with these values. + * \post Note that after calling this function, the filtration * values are actually modified. The function compute_extended_persistence_subdiagrams * retrieves the original values and separates the extended persistence diagram points * w.r.t. their types (Ord, Rel, Ext+, Ext-) and should always be called after * computing the persistent homology of the extended simplicial complex. + * \post Note that this code creates an extra vertex internally, so you should make sure that + * the Simplex tree does not contain a vertex with the largest Vertex_handle. */ void extend_filtration() { // Compute maximum and minimum of filtration values - int maxvert = -std::numeric_limits::infinity(); - std::vector filt; - for (auto sh : this->complex_simplex_range()) { - if (this->dimension(sh) == 0){ - filt.push_back(this->filtration(sh)); - maxvert = std::max(*this->simplex_vertex_range(sh).begin(), maxvert); - } + int maxvert = std::numeric_limits::min(); + this->minval_ = std::numeric_limits::max(); + this->maxval_ = std::numeric_limits::min(); + for (auto sh : this->skeleton_simplex_range(0)) { + double f = this->filtration(sh); + this->minval_ = std::min(this->minval_, f); + this->maxval_ = std::max(this->maxval_, f); + maxvert = std::max(*this->simplex_vertex_range(sh).begin(), maxvert); } - minval_ = *std::min_element(filt.begin(), filt.end()); - maxval_ = *std::max_element(filt.begin(), filt.end()); + + assert (maxvert < std::numeric_limits::max()); maxvert += 1; - // Compute vectors of integers corresponding to the Simplex handles - std::vector > splxs; - for (auto sh : this->complex_simplex_range()) { - std::vector vr; - for (auto vh : this->simplex_vertex_range(sh)){ - vr.push_back(vh); - } - splxs.push_back(vr); - } + Simplex_tree* st_copy = new Simplex_tree(*this); // Add point for coning the simplicial complex int count = this->num_simplices(); - std::vector cone; - cone.push_back(maxvert); - auto ins = this->insert_simplex(cone, -3); - this->assign_key(ins.first, count); + this->insert_simplex({maxvert}, -3); count++; // For each simplex - for (auto vr : splxs){ + for (auto sh_copy : st_copy->complex_simplex_range()){ + + // Locate simplex + std::vector vr; + for (auto vh : st_copy->simplex_vertex_range(sh_copy)){ + vr.push_back(vh); + } + auto sh = this->find(vr); + // Create cone on simplex - auto sh = this->find(vr); vr.push_back(maxvert); + vr.push_back(maxvert); if (this->dimension(sh) == 0){ // Assign ascending value between -2 and -1 to vertex double v = this->filtration(sh); - this->assign_filtration(sh, -2 + (v-minval_)/(maxval_-minval_)); + this->assign_filtration(sh, -2 + (v-this->minval_)/(this->maxval_-this->minval_)); // Assign descending value between 1 and 2 to cone on vertex - auto ins = this->insert_simplex(vr, 2 - (v-minval_)/(maxval_-minval_)); - this->assign_key(ins.first, count); + this->insert_simplex(vr, 2 - (v-this->minval_)/(this->maxval_-this->minval_)); } else{ // Assign value -3 to simplex and cone on simplex this->assign_filtration(sh, -3); - auto ins = this->insert_simplex(vr, -3); - this->assign_key(ins.first, count); + this->insert_simplex(vr, -3); } count++; } - this->make_filtration_non_decreasing(); - this->initialize_filtration(); + // Deallocate memory + delete st_copy; + // Automatically assign good values for simplices + this->make_filtration_non_decreasing(); } diff --git a/src/python/test/test_simplex_tree.py b/src/python/test/test_simplex_tree.py index caefeb9c..96ec4707 100755 --- a/src/python/test/test_simplex_tree.py +++ b/src/python/test/test_simplex_tree.py @@ -245,6 +245,10 @@ def test_make_filtration_non_decreasing(): assert st.filtration([0, 1, 6]) == 1.0 assert st.filtration([0, 1]) == 1.0 assert st.filtration([0]) == 1.0 + assert st.filtration([1]) == 1.0 + assert st.filtration([3, 4, 5]) == 2.0 + assert st.filtration([3, 4]) == 2.0 + assert st.filtration([4, 5]) == 2.0 def test_extend_filtration(): @@ -271,7 +275,7 @@ def test_extend_filtration(): st.assign_filtration([4], 5.) st.assign_filtration([5], 6.) - assert st.get_filtration() == [ + assert list(st.get_filtration()) == [ ([0, 2], 0.0), ([1, 2], 0.0), ([0, 3], 0.0), @@ -289,7 +293,7 @@ def test_extend_filtration(): st.extend_filtration() - assert st.get_filtration() == [ + assert list(st.get_filtration()) == [ ([6], -3.0), ([0], -2.0), ([1], -1.8), @@ -327,10 +331,6 @@ def test_extend_filtration(): [(1, (6.0, 1.0))] ] - assert st.filtration([1]) == 1.0 - assert st.filtration([3, 4, 5]) == 2.0 - assert st.filtration([3, 4]) == 2.0 - assert st.filtration([4, 5]) == 2.0 def test_simplices_iterator(): st = SimplexTree() -- cgit v1.2.3 From a52e84fdcdbf66f3542416499c26245d0435a8fb Mon Sep 17 00:00:00 2001 From: MathieuCarriere Date: Tue, 17 Mar 2020 00:48:54 -0400 Subject: fix test --- src/python/test/test_simplex_tree.py | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/python/test/test_simplex_tree.py b/src/python/test/test_simplex_tree.py index 96ec4707..63eee9a5 100755 --- a/src/python/test/test_simplex_tree.py +++ b/src/python/test/test_simplex_tree.py @@ -292,6 +292,7 @@ def test_extend_filtration(): st.extend_filtration() + st.initialize_filtration() assert list(st.get_filtration()) == [ ([6], -3.0), -- cgit v1.2.3 From cdc57712ca159f3044453cef41e31ebc03617a1b Mon Sep 17 00:00:00 2001 From: tlacombe Date: Tue, 17 Mar 2020 10:55:14 +0100 Subject: removed _optimal_matching from barycenter as it is now handled by wasserstein_distance. --- src/python/gudhi/barycenter.py | 85 +++----------------------- src/python/test/test_wasserstein_barycenter.py | 2 +- 2 files changed, 9 insertions(+), 78 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/barycenter.py b/src/python/gudhi/barycenter.py index 517cdb2f..0490fdd1 100644 --- a/src/python/gudhi/barycenter.py +++ b/src/python/gudhi/barycenter.py @@ -12,8 +12,7 @@ import ot import numpy as np import scipy.spatial.distance as sc -from gudhi.wasserstein import _build_dist_matrix, _perstot - +from gudhi.wasserstein import wasserstein_distance, _perstot def _mean(x, m): @@ -32,70 +31,6 @@ def _mean(x, m): return np.array([0, 0]) -def _optimal_matching(X, Y, withcost=False): - ''' - :param X: numpy.array of size (n x 2) - :param Y: numpy.array of size (m x 2) - :param withcost: returns also the cost corresponding to the optimal matching - :returns: numpy.array of shape (k x 2) encoding the list of edges - in the optimal matching. - That is, [[i, j] ...], where (i,j) indicates - that X[i] is matched to Y[j] - if i >= len(X) or j >= len(Y), it means they - represent the diagonal. - They will be encoded by -1 afterwards. - - NOTE : this code will be removed for final merge, - and wasserstein.optimal_matching will be used instead. - ''' - - n = len(X) - m = len(Y) - # Start by handling empty diagrams. Could it be shorten? - if X.size == 0: # X is empty - if Y.size == 0: # Y is empty - res = np.array([[0,0]]) # the diagonal is matched to the diagonal - if withcost: - return res, 0 - else: - return res - else: # X is empty but not Y - res = np.array([[0, i] for i in range(m)]) - cost = _perstot(Y, order=2, internal_p=2)**2 - if withcost: - return res, cost - else: - return res - elif Y.size == 0: # X is not empty but Y is empty - res = np.array([[i,0] for i in range(n)]) - cost = _perstot(X, order=2, internal_p=2)**2 - if withcost: - return res, cost - else: - return res - - # we know X, Y are not empty diags now - M = _build_dist_matrix(X, Y, order=2, internal_p=2) - - a = np.ones(n+1) - a[-1] = m - b = np.ones(m+1) - b[-1] = n - P = ot.emd(a=a, b=b, M=M) - # Note : it seems POT returns a permutation matrix in this situation, - # ie a vertex of the constraint set (generically true). - if withcost: - cost = np.sum(np.multiply(P, M)) - P[P < 0.5] = 0 # dirty trick to avoid some numerical issues... to improve. - res = np.argwhere(P) - - # return the list of (i,j) such that P[i,j] > 0, - #i.e. x_i is matched to y_j (should it be the diag). - if withcost: - return res, cost - return res - - def lagrangian_barycenter(pdiagset, init=None, verbose=False): ''' :param pdiagset: a list of size m containing numpy.array of shape (n x 2) @@ -166,16 +101,15 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False): # Step 1 : compute optimal matching (Y, X_i) for each X_i # and create new points in Y if needed for i in range(m): - indices = _optimal_matching(Y, X[i]) + _, indices = wasserstein_distance(Y, X[i], matching=True, order=2., internal_p=2.) for y_j, x_i_j in indices: - if y_j < K: # we matched an off diagonal point to x_i_j... - # ...which is also an off-diagonal point. - if x_i_j < nb_off_diag[i]: + if y_j >= 0: # we matched an off diagonal point to x_i_j... + if x_i_j >= 0: # ...which is also an off-diagonal point. G[y_j, i] = x_i_j else: # ...which is a diagonal point G[y_j, i] = -1 # -1 stands for the diagonal (mask) else: # We matched a diagonal point to x_i_j... - if x_i_j < nb_off_diag[i]: # which is a off-diag point ! + if x_i_j >= 0: # which is a off-diag point ! # need to create new point in Y new_y = _mean(np.array([X[i][x_i_j]]), m) # Average this point with (m-1) copies of Delta @@ -209,15 +143,12 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False): log = {} n_y = len(Y) for i in range(m): - edges, cost = _optimal_matching(Y, X[i], withcost=True) - n_x = len(X[i]) - G = edges[np.where(edges[:,0]= n_x) - G[idx,1] = -1 # -1 will encode the diagonal - groupings.append(G) + cost, edges = wasserstein_distance(Y, X[i], matching=True, order=2., internal_p=2.) + groupings.append(edges) energy += cost log["groupings"] = groupings energy = energy/m + print(energy) log["energy"] = energy log["nb_iter"] = nb_iter diff --git a/src/python/test/test_wasserstein_barycenter.py b/src/python/test/test_wasserstein_barycenter.py index 5167cb84..4d18616b 100755 --- a/src/python/test/test_wasserstein_barycenter.py +++ b/src/python/test/test_wasserstein_barycenter.py @@ -38,7 +38,7 @@ def test_lagrangian_barycenter(): assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg7], verbose=False) - dg7) < eps Y, log = lagrangian_barycenter(pdiagset=[dg4, dg8], verbose=True) assert np.linalg.norm(Y - np.array([[1,3], [5, 7]])) < eps - assert np.abs(log["energy"] - 4) < eps + assert np.abs(log["energy"] - 2) < eps assert np.array_equal(log["groupings"][0] , np.array([[0, -1], [1, -1]])) assert np.array_equal(log["groupings"][1] , np.array([[0, 0], [1, 1]])) assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg8, dg4], init=np.array([[0.2, 0.6], [0.5, 0.7]]), verbose=False) - np.array([[1, 3], [5, 7]])) < eps -- cgit v1.2.3 From 58d923b13afb9b18a2d5b028c6575baee691d182 Mon Sep 17 00:00:00 2001 From: MathieuCarriere Date: Tue, 17 Mar 2020 12:14:49 -0400 Subject: update python doc --- src/Simplex_tree/include/gudhi/Simplex_tree.h | 8 +++---- src/python/gudhi/simplex_tree.pyx | 34 +++++++++++++++++++++++---- 2 files changed, 33 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/Simplex_tree/include/gudhi/Simplex_tree.h b/src/Simplex_tree/include/gudhi/Simplex_tree.h index 02f2c7e9..f661f687 100644 --- a/src/Simplex_tree/include/gudhi/Simplex_tree.h +++ b/src/Simplex_tree/include/gudhi/Simplex_tree.h @@ -1478,8 +1478,8 @@ class Simplex_tree { * \post The coordinates of the persistence diagram points might be a little different than the * original filtration values due to the internal transformation (scaling to [-2,-1]) that is * performed on these values during the computation of extended persistence. - * @param[in] dgm Persistence diagram obtained after calling this->extend_filtration - * and this->get_persistence. + * @param[in] dgm Persistence diagram obtained after calling this->extend_filtration, + * this->initialize_filtration, and this->compute_persistent_cohomology. * @return A vector of four persistence diagrams. The first one is Ordinary, the * second one is Relative, the third one is Extended+ and the fourth one is Extended-. * See section 2.2 in https://link.springer.com/article/10.1007/s10208-017-9370-z for a description of these subtypes. @@ -1538,14 +1538,14 @@ class Simplex_tree { int maxvert = std::numeric_limits::min(); this->minval_ = std::numeric_limits::max(); this->maxval_ = std::numeric_limits::min(); - for (auto sh : this->skeleton_simplex_range(0)) { + for (auto sh = root_.members().begin(); sh != root_.members().end(); ++sh){ double f = this->filtration(sh); this->minval_ = std::min(this->minval_, f); this->maxval_ = std::max(this->maxval_, f); maxvert = std::max(*this->simplex_vertex_range(sh).begin(), maxvert); } - assert (maxvert < std::numeric_limits::max()); + GUDHI_CHECK(maxvert < std::numeric_limits::max(), std::invalid_argument("Simplex_tree contains a vertex with the largest Vertex_handle")); maxvert += 1; Simplex_tree* st_copy = new Simplex_tree(*this); diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index 733ecb97..7af44683 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -397,19 +397,43 @@ cdef class SimplexTree: return self.get_ptr().make_filtration_non_decreasing() def extend_filtration(self): - """ Extend filtration for computing extended persistence. This function only uses the filtration values at the 0-dimensional simplices, and computes the extended persistence diagram induced by the lower-star filtration computed with these values. Note that after calling this function, the filtration values are actually modified. The function :func:`compute_extended_persistence_subdiagrams()` retrieves the original values and separates the extended persistence diagram points w.r.t. their types (Ord, Rel, Ext+, Ext-) and should always be called after computing the persistent homology of the extended simplicial complex. + """ Extend filtration for computing extended persistence. This function only uses the + filtration values at the 0-dimensional simplices, and computes the extended persistence + diagram induced by the lower-star filtration computed with these values. + + .. note:: + + Note that after calling this function, the filtration + values are actually modified within the Simplex_tree. + The function :func:`compute_extended_persistence_subdiagrams()` + retrieves the original values. + + .. note:: + + Note that this code creates an extra vertex internally, so you should make sure that + the Simplex_tree does not contain a vertex with the largest Vertex_handle. """ return self.get_ptr().extend_filtration() def compute_extended_persistence_subdiagrams(self, dgm): - """This function retrieves good values for extended persistence, and separate the diagrams into the ordinary, relative, extended+ and extended- subdiagrams. + """This function retrieves good values for extended persistence, and separate the diagrams + into the ordinary, relative, extended+ and extended- subdiagrams. + + :param dgm: Persistence diagram obtained after calling :func:`extend_filtration()`, :func:`initialize_filtration()`, and :func:`persistence()`. + + :returns: A vector of four persistence diagrams. The first one is Ordinary, the second one is Relative, the third one is Extended+ and the fourth one is Extended-. See section 2.2 in https://link.springer.com/article/10.1007/s10208-017-9370-z for a description of these subtypes. + + .. note:: - :param dgm: Persistence diagram obtained after calling :func:`extend_filtration()` and :func:`persistence()`. - :returns: A vector of four persistence diagrams. The first one is Ordinary, the second one is Relative, the third one is Extended+ and the fourth one is Extended-. + This function should be called only if :func:`extend_filtration()`, + :func:`initialize_filtration()`, + and :func:`persistence()` have been called first! .. note:: - This function should be called only after calling :func:`extend_filtration()` and :func:`persistence()`. + The coordinates of the persistence diagram points might be a little different than the + original filtration values due to the internal transformation (scaling to [-2,-1]) that is + performed on these values during the computation of extended persistence. """ return self.get_ptr().compute_extended_persistence_subdiagrams(dgm) -- cgit v1.2.3 From 18a0eb17d9370eca6dde7c0cada0624302ded002 Mon Sep 17 00:00:00 2001 From: MathieuCarriere Date: Tue, 17 Mar 2020 12:31:18 -0400 Subject: implement Marc's suggestions --- src/Simplex_tree/include/gudhi/Simplex_tree.h | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/Simplex_tree/include/gudhi/Simplex_tree.h b/src/Simplex_tree/include/gudhi/Simplex_tree.h index 1c06e7cb..5b36cc1c 100644 --- a/src/Simplex_tree/include/gudhi/Simplex_tree.h +++ b/src/Simplex_tree/include/gudhi/Simplex_tree.h @@ -1552,13 +1552,13 @@ class Simplex_tree { double f = this->filtration(sh); this->minval_ = std::min(this->minval_, f); this->maxval_ = std::max(this->maxval_, f); - maxvert = std::max(*this->simplex_vertex_range(sh).begin(), maxvert); + maxvert = std::max(sh->first, maxvert); } GUDHI_CHECK(maxvert < std::numeric_limits::max(), std::invalid_argument("Simplex_tree contains a vertex with the largest Vertex_handle")); maxvert += 1; - Simplex_tree* st_copy = new Simplex_tree(*this); + Simplex_tree st_copy = *this; // Add point for coning the simplicial complex int count = this->num_simplices(); @@ -1566,11 +1566,11 @@ class Simplex_tree { count++; // For each simplex - for (auto sh_copy : st_copy->complex_simplex_range()){ + for (auto sh_copy : st_copy.complex_simplex_range()){ // Locate simplex std::vector vr; - for (auto vh : st_copy->simplex_vertex_range(sh_copy)){ + for (auto vh : st_copy.simplex_vertex_range(sh_copy)){ vr.push_back(vh); } auto sh = this->find(vr); @@ -1592,9 +1592,6 @@ class Simplex_tree { count++; } - // Deallocate memory - delete st_copy; - // Automatically assign good values for simplices this->make_filtration_non_decreasing(); } -- cgit v1.2.3 From a4bf8306d3926428a7d5087d96fbf8033d3bd932 Mon Sep 17 00:00:00 2001 From: MathieuCarriere Date: Tue, 17 Mar 2020 16:17:57 -0400 Subject: fix Marc's comments --- src/Simplex_tree/include/gudhi/Simplex_tree.h | 53 +++++++++++++-------------- 1 file changed, 26 insertions(+), 27 deletions(-) (limited to 'src') diff --git a/src/Simplex_tree/include/gudhi/Simplex_tree.h b/src/Simplex_tree/include/gudhi/Simplex_tree.h index 5b36cc1c..6c837042 100644 --- a/src/Simplex_tree/include/gudhi/Simplex_tree.h +++ b/src/Simplex_tree/include/gudhi/Simplex_tree.h @@ -126,8 +126,8 @@ class Simplex_tree { private: typedef typename Dictionary::iterator Dictionary_it; typedef typename Dictionary_it::value_type Dit_value_t; - double minval_; - double maxval_; + Filtration_value minval_; + Filtration_value maxval_; struct return_first { Vertex_handle operator()(const Dit_value_t& p_sh) const { @@ -1484,25 +1484,25 @@ class Simplex_tree { /** \brief Retrieve good values for extended persistence, and separate the * diagrams into the ordinary, relative, extended+ and extended- subdiagrams. - * \post This function should be called only if extend_filtration has been called first! + * \pre This function should be called only if this->extend_filtration() has been called first! * \post The coordinates of the persistence diagram points might be a little different than the * original filtration values due to the internal transformation (scaling to [-2,-1]) that is * performed on these values during the computation of extended persistence. - * @param[in] dgm Persistence diagram obtained after calling this->extend_filtration, - * this->initialize_filtration, and this->compute_persistent_cohomology. + * @param[in] dgm Persistence diagram obtained after calling this->extend_filtration(), + * this->initialize_filtration(), and Gudhi::persistent_cohomology::Persistent_cohomology::compute_persistent_cohomology(). * @return A vector of four persistence diagrams. The first one is Ordinary, the * second one is Relative, the third one is Extended+ and the fourth one is Extended-. * See section 2.2 in https://link.springer.com/article/10.1007/s10208-017-9370-z for a description of these subtypes. */ - std::vector>>> compute_extended_persistence_subdiagrams(const std::vector>>& dgm){ - std::vector>>> new_dgm(4); - double x, y; - double minval_ = this->minval_; - double maxval_ = this->maxval_; + std::vector>>> compute_extended_persistence_subdiagrams(const std::vector>>& dgm){ + std::vector>>> new_dgm(4); + Filtration_value x, y; + Filtration_value minval_ = this->minval_; + Filtration_value maxval_ = this->maxval_; for(unsigned int i = 0; i < dgm.size(); i++){ int h = dgm[i].first; - double px = dgm[i].second.first; - double py = dgm[i].second.second; + Filtration_value px = dgm[i].second.first; + Filtration_value py = dgm[i].second.second; if(std::isinf(py)) continue; else{ if ((px <= -1) & (py <= -1)){ @@ -1510,12 +1510,12 @@ class Simplex_tree { y = minval_ + (maxval_-minval_)*(py + 2); new_dgm[0].push_back(std::make_pair(h, std::make_pair(x,y))); } - if ((px >= 1) & (py >= 1)){ + else if ((px >= 1) & (py >= 1)){ x = minval_ - (maxval_-minval_)*(px - 2); y = minval_ - (maxval_-minval_)*(py - 2); new_dgm[1].push_back(std::make_pair(h, std::make_pair(x,y))); } - if ((px <= -1) & (py >= 1)){ + else { x = minval_ + (maxval_-minval_)*(px + 2); y = minval_ - (maxval_-minval_)*(py - 2); if (x <= y){ @@ -1539,37 +1539,36 @@ class Simplex_tree { * retrieves the original values and separates the extended persistence diagram points * w.r.t. their types (Ord, Rel, Ext+, Ext-) and should always be called after * computing the persistent homology of the extended simplicial complex. - * \post Note that this code creates an extra vertex internally, so you should make sure that + * \pre Note that this code creates an extra vertex internally, so you should make sure that * the Simplex tree does not contain a vertex with the largest Vertex_handle. */ void extend_filtration() { // Compute maximum and minimum of filtration values - int maxvert = std::numeric_limits::min(); - this->minval_ = std::numeric_limits::max(); - this->maxval_ = std::numeric_limits::min(); + Vertex_handle maxvert = std::numeric_limits::min(); + this->minval_ = std::numeric_limits::infinity(); + this->maxval_ = -std::numeric_limits::infinity(); for (auto sh = root_.members().begin(); sh != root_.members().end(); ++sh){ - double f = this->filtration(sh); + Filtration_value f = this->filtration(sh); this->minval_ = std::min(this->minval_, f); this->maxval_ = std::max(this->maxval_, f); maxvert = std::max(sh->first, maxvert); } - GUDHI_CHECK(maxvert < std::numeric_limits::max(), std::invalid_argument("Simplex_tree contains a vertex with the largest Vertex_handle")); + GUDHI_CHECK(maxvert < std::numeric_limits::max(), std::invalid_argument("Simplex_tree contains a vertex with the largest Vertex_handle")); maxvert += 1; Simplex_tree st_copy = *this; // Add point for coning the simplicial complex - int count = this->num_simplices(); this->insert_simplex({maxvert}, -3); - count++; // For each simplex + std::vector vr; for (auto sh_copy : st_copy.complex_simplex_range()){ // Locate simplex - std::vector vr; + vr.clear(); for (auto vh : st_copy.simplex_vertex_range(sh_copy)){ vr.push_back(vh); } @@ -1578,18 +1577,18 @@ class Simplex_tree { // Create cone on simplex vr.push_back(maxvert); if (this->dimension(sh) == 0){ + Filtration_value v = this->filtration(sh); + Filtration_value scaled_v = (v-this->minval_)/(this->maxval_-this->minval_); // Assign ascending value between -2 and -1 to vertex - double v = this->filtration(sh); - this->assign_filtration(sh, -2 + (v-this->minval_)/(this->maxval_-this->minval_)); + this->assign_filtration(sh, -2 + scaled_v); // Assign descending value between 1 and 2 to cone on vertex - this->insert_simplex(vr, 2 - (v-this->minval_)/(this->maxval_-this->minval_)); + this->insert_simplex(vr, 2 - scaled_v); } else{ // Assign value -3 to simplex and cone on simplex this->assign_filtration(sh, -3); this->insert_simplex(vr, -3); } - count++; } // Automatically assign good values for simplices -- cgit v1.2.3 From e1c8edc4b148331083f53c7c3d34766190bb6d99 Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Tue, 17 Mar 2020 22:16:23 +0100 Subject: Another proposal to fix #248 --- src/python/doc/alpha_complex_sum.inc | 2 +- src/python/doc/bottleneck_distance_sum.inc | 2 +- src/python/doc/cubical_complex_sum.inc | 2 +- src/python/doc/nerve_gic_complex_sum.inc | 2 +- src/python/doc/persistence_graphical_tools_sum.inc | 2 +- src/python/doc/persistent_cohomology_sum.inc | 2 +- src/python/doc/point_cloud_sum.inc | 2 +- src/python/doc/representations_sum.inc | 2 +- src/python/doc/rips_complex_sum.inc | 2 +- src/python/doc/simplex_tree_sum.inc | 2 +- src/python/doc/tangential_complex_sum.inc | 2 +- src/python/doc/wasserstein_distance_sum.inc | 2 +- src/python/doc/witness_complex_sum.inc | 2 +- 13 files changed, 13 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/python/doc/alpha_complex_sum.inc b/src/python/doc/alpha_complex_sum.inc index b5af0d27..00c35155 100644 --- a/src/python/doc/alpha_complex_sum.inc +++ b/src/python/doc/alpha_complex_sum.inc @@ -1,5 +1,5 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +----------------------------------------------------------------+------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------+ | .. figure:: | Alpha complex is a simplicial complex constructed from the finite | :Author: Vincent Rouvreau | diff --git a/src/python/doc/bottleneck_distance_sum.inc b/src/python/doc/bottleneck_distance_sum.inc index 6eb0ac19..a01e7f04 100644 --- a/src/python/doc/bottleneck_distance_sum.inc +++ b/src/python/doc/bottleneck_distance_sum.inc @@ -1,5 +1,5 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +-----------------------------------------------------------------+----------------------------------------------------------------------+------------------------------------------------------------------+ | .. figure:: | Bottleneck distance measures the similarity between two persistence | :Author: François Godi | diff --git a/src/python/doc/cubical_complex_sum.inc b/src/python/doc/cubical_complex_sum.inc index f200e695..ab6388e5 100644 --- a/src/python/doc/cubical_complex_sum.inc +++ b/src/python/doc/cubical_complex_sum.inc @@ -1,5 +1,5 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +--------------------------------------------------------------------------+----------------------------------------------------------------------+-----------------------------+ | .. figure:: | The cubical complex is an example of a structured complex useful in | :Author: Pawel Dlotko | diff --git a/src/python/doc/nerve_gic_complex_sum.inc b/src/python/doc/nerve_gic_complex_sum.inc index d633c4ff..d5356eca 100644 --- a/src/python/doc/nerve_gic_complex_sum.inc +++ b/src/python/doc/nerve_gic_complex_sum.inc @@ -1,5 +1,5 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +----------------------------------------------------------------+------------------------------------------------------------------------+------------------------------------------------------------------+ | .. figure:: | Nerves and Graph Induced Complexes are cover complexes, i.e. | :Author: Mathieu Carrière | diff --git a/src/python/doc/persistence_graphical_tools_sum.inc b/src/python/doc/persistence_graphical_tools_sum.inc index ef376802..723c0f78 100644 --- a/src/python/doc/persistence_graphical_tools_sum.inc +++ b/src/python/doc/persistence_graphical_tools_sum.inc @@ -1,5 +1,5 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +-----------------------------------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------+ | .. figure:: | These graphical tools comes on top of persistence results and allows | :Author: Vincent Rouvreau, Theo Lacombe | diff --git a/src/python/doc/persistent_cohomology_sum.inc b/src/python/doc/persistent_cohomology_sum.inc index 4d7b077e..9c29bfaa 100644 --- a/src/python/doc/persistent_cohomology_sum.inc +++ b/src/python/doc/persistent_cohomology_sum.inc @@ -1,5 +1,5 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +-----------------------------------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------+ | .. figure:: | The theory of homology consists in attaching to a topological space | :Author: Clément Maria | diff --git a/src/python/doc/point_cloud_sum.inc b/src/python/doc/point_cloud_sum.inc index 85d52de7..77245e86 100644 --- a/src/python/doc/point_cloud_sum.inc +++ b/src/python/doc/point_cloud_sum.inc @@ -1,5 +1,5 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +----------------------------------------------------------------+------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------+ | | :math:`(x_1, x_2, \ldots, x_d)` | Utilities to process point clouds: read from file, subsample, etc. | :Author: Vincent Rouvreau | diff --git a/src/python/doc/representations_sum.inc b/src/python/doc/representations_sum.inc index 700828f1..edb8a448 100644 --- a/src/python/doc/representations_sum.inc +++ b/src/python/doc/representations_sum.inc @@ -1,5 +1,5 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +------------------------------------------------------------------+----------------------------------------------------------------+-----------------------------------------------+ | .. figure:: | Vectorizations, distances and kernels that work on persistence | :Author: Mathieu Carrière | diff --git a/src/python/doc/rips_complex_sum.inc b/src/python/doc/rips_complex_sum.inc index 857c6893..a1f0e469 100644 --- a/src/python/doc/rips_complex_sum.inc +++ b/src/python/doc/rips_complex_sum.inc @@ -1,5 +1,5 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +----------------------------------------------------------------+------------------------------------------------------------------------+----------------------------------------------------------------------+ | .. figure:: | Rips complex is a simplicial complex constructed from a one skeleton | :Authors: Clément Maria, Pawel Dlotko, Vincent Rouvreau, Marc Glisse | diff --git a/src/python/doc/simplex_tree_sum.inc b/src/python/doc/simplex_tree_sum.inc index 5ba58d2b..3c637b8c 100644 --- a/src/python/doc/simplex_tree_sum.inc +++ b/src/python/doc/simplex_tree_sum.inc @@ -1,5 +1,5 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +----------------------------------------------------------------+------------------------------------------------------------------------+-----------------------------+ | .. figure:: | The simplex tree is an efficient and flexible data structure for | :Author: Clément Maria | diff --git a/src/python/doc/tangential_complex_sum.inc b/src/python/doc/tangential_complex_sum.inc index d84aa433..ddc3e609 100644 --- a/src/python/doc/tangential_complex_sum.inc +++ b/src/python/doc/tangential_complex_sum.inc @@ -1,5 +1,5 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +----------------------------------------------------------------+------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------+ | .. figure:: | A Tangential Delaunay complex is a simplicial complex designed to | :Author: Clément Jamin | diff --git a/src/python/doc/wasserstein_distance_sum.inc b/src/python/doc/wasserstein_distance_sum.inc index a97f428d..1632befa 100644 --- a/src/python/doc/wasserstein_distance_sum.inc +++ b/src/python/doc/wasserstein_distance_sum.inc @@ -1,5 +1,5 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +-----------------------------------------------------------------+----------------------------------------------------------------------+------------------------------------------------------------------+ | .. figure:: | The q-Wasserstein distance measures the similarity between two | :Author: Theo Lacombe | diff --git a/src/python/doc/witness_complex_sum.inc b/src/python/doc/witness_complex_sum.inc index 71b65a71..f9c009ab 100644 --- a/src/python/doc/witness_complex_sum.inc +++ b/src/python/doc/witness_complex_sum.inc @@ -1,5 +1,5 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +-------------------------------------------------------------------+----------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ | .. figure:: | Witness complex :math:`Wit(W,L)` is a simplicial complex defined on | :Author: Siargey Kachanovich | -- cgit v1.2.3 From 6f445b7e2bdb8481198f8c0f0e076d4fea081d62 Mon Sep 17 00:00:00 2001 From: MathieuCarriere Date: Wed, 18 Mar 2020 12:37:40 -0400 Subject: fix doc --- src/Simplex_tree/include/gudhi/Simplex_tree.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/Simplex_tree/include/gudhi/Simplex_tree.h b/src/Simplex_tree/include/gudhi/Simplex_tree.h index 6c837042..697afe26 100644 --- a/src/Simplex_tree/include/gudhi/Simplex_tree.h +++ b/src/Simplex_tree/include/gudhi/Simplex_tree.h @@ -1484,12 +1484,12 @@ class Simplex_tree { /** \brief Retrieve good values for extended persistence, and separate the * diagrams into the ordinary, relative, extended+ and extended- subdiagrams. - * \pre This function should be called only if this->extend_filtration() has been called first! + * \pre This function should be called only if `extend_filtration()` has been called first! * \post The coordinates of the persistence diagram points might be a little different than the * original filtration values due to the internal transformation (scaling to [-2,-1]) that is * performed on these values during the computation of extended persistence. - * @param[in] dgm Persistence diagram obtained after calling this->extend_filtration(), - * this->initialize_filtration(), and Gudhi::persistent_cohomology::Persistent_cohomology::compute_persistent_cohomology(). + * @param[in] dgm Persistence diagram obtained after calling `extend_filtration()`, + * `initialize_filtration()`, and `Gudhi::persistent_cohomology::Persistent_cohomology< FilteredComplex, CoefficientField >::compute_persistent_cohomology()`. * @return A vector of four persistence diagrams. The first one is Ordinary, the * second one is Relative, the third one is Extended+ and the fourth one is Extended-. * See section 2.2 in https://link.springer.com/article/10.1007/s10208-017-9370-z for a description of these subtypes. @@ -1535,7 +1535,7 @@ class Simplex_tree { * and computes the extended persistence diagram induced by the lower-star filtration * computed with these values. * \post Note that after calling this function, the filtration - * values are actually modified. The function compute_extended_persistence_subdiagrams + * values are actually modified. The function `compute_extended_persistence_subdiagrams()` * retrieves the original values and separates the extended persistence diagram points * w.r.t. their types (Ord, Rel, Ext+, Ext-) and should always be called after * computing the persistent homology of the extended simplicial complex. @@ -1545,7 +1545,7 @@ class Simplex_tree { void extend_filtration() { // Compute maximum and minimum of filtration values - Vertex_handle maxvert = std::numeric_limits::min(); + Vertex_handle maxvert = std::numeric_limits::min(); this->minval_ = std::numeric_limits::infinity(); this->maxval_ = -std::numeric_limits::infinity(); for (auto sh = root_.members().begin(); sh != root_.members().end(); ++sh){ -- cgit v1.2.3 From 61691b0081cb868645335c0b1433ddcc0bcbf9e3 Mon Sep 17 00:00:00 2001 From: MathieuCarriere Date: Thu, 19 Mar 2020 13:09:59 -0400 Subject: new fixes --- src/Simplex_tree/include/gudhi/Simplex_tree.h | 45 ++++++++++++++++----------- src/python/gudhi/simplex_tree.pxd | 4 +-- src/python/gudhi/simplex_tree.pyx | 32 ++++++++++++++----- src/python/include/Simplex_tree_interface.h | 13 ++++++++ src/python/test/test_simplex_tree.py | 18 ++++++----- 5 files changed, 77 insertions(+), 35 deletions(-) (limited to 'src') diff --git a/src/Simplex_tree/include/gudhi/Simplex_tree.h b/src/Simplex_tree/include/gudhi/Simplex_tree.h index 697afe26..50b8e582 100644 --- a/src/Simplex_tree/include/gudhi/Simplex_tree.h +++ b/src/Simplex_tree/include/gudhi/Simplex_tree.h @@ -100,6 +100,12 @@ class Simplex_tree { void assign_key(Simplex_key); Simplex_key key() const; }; + struct Extended_filtration_data { + Filtration_value minval; + Filtration_value maxval; + Extended_filtration_data(){} + Extended_filtration_data(Filtration_value vmin, Filtration_value vmax){ minval = vmin; maxval = vmax; } + }; typedef typename std::conditional::type Key_simplex_base; @@ -126,8 +132,6 @@ class Simplex_tree { private: typedef typename Dictionary::iterator Dictionary_it; typedef typename Dictionary_it::value_type Dit_value_t; - Filtration_value minval_; - Filtration_value maxval_; struct return_first { Vertex_handle operator()(const Dit_value_t& p_sh) const { @@ -1490,15 +1494,16 @@ class Simplex_tree { * performed on these values during the computation of extended persistence. * @param[in] dgm Persistence diagram obtained after calling `extend_filtration()`, * `initialize_filtration()`, and `Gudhi::persistent_cohomology::Persistent_cohomology< FilteredComplex, CoefficientField >::compute_persistent_cohomology()`. + * @param[in] efd Structure containing the minimum and maximum values of the original filtration * @return A vector of four persistence diagrams. The first one is Ordinary, the * second one is Relative, the third one is Extended+ and the fourth one is Extended-. * See section 2.2 in https://link.springer.com/article/10.1007/s10208-017-9370-z for a description of these subtypes. */ - std::vector>>> compute_extended_persistence_subdiagrams(const std::vector>>& dgm){ + std::vector>>> extended_persistence_subdiagrams(const std::vector>>& dgm, const Extended_filtration_data& efd){ std::vector>>> new_dgm(4); Filtration_value x, y; - Filtration_value minval_ = this->minval_; - Filtration_value maxval_ = this->maxval_; + Filtration_value minval = efd.minval; + Filtration_value maxval = efd.maxval; for(unsigned int i = 0; i < dgm.size(); i++){ int h = dgm[i].first; Filtration_value px = dgm[i].second.first; @@ -1506,18 +1511,18 @@ class Simplex_tree { if(std::isinf(py)) continue; else{ if ((px <= -1) & (py <= -1)){ - x = minval_ + (maxval_-minval_)*(px + 2); - y = minval_ + (maxval_-minval_)*(py + 2); + x = minval + (maxval-minval)*(px + 2); + y = minval + (maxval-minval)*(py + 2); new_dgm[0].push_back(std::make_pair(h, std::make_pair(x,y))); } else if ((px >= 1) & (py >= 1)){ - x = minval_ - (maxval_-minval_)*(px - 2); - y = minval_ - (maxval_-minval_)*(py - 2); + x = minval - (maxval-minval)*(px - 2); + y = minval - (maxval-minval)*(py - 2); new_dgm[1].push_back(std::make_pair(h, std::make_pair(x,y))); } else { - x = minval_ + (maxval_-minval_)*(px + 2); - y = minval_ - (maxval_-minval_)*(py - 2); + x = minval + (maxval-minval)*(px + 2); + y = minval - (maxval-minval)*(py - 2); if (x <= y){ new_dgm[2].push_back(std::make_pair(h, std::make_pair(x,y))); } @@ -1535,23 +1540,23 @@ class Simplex_tree { * and computes the extended persistence diagram induced by the lower-star filtration * computed with these values. * \post Note that after calling this function, the filtration - * values are actually modified. The function `compute_extended_persistence_subdiagrams()` + * values are actually modified. The function `extended_persistence_subdiagrams()` * retrieves the original values and separates the extended persistence diagram points * w.r.t. their types (Ord, Rel, Ext+, Ext-) and should always be called after * computing the persistent homology of the extended simplicial complex. * \pre Note that this code creates an extra vertex internally, so you should make sure that * the Simplex tree does not contain a vertex with the largest Vertex_handle. */ - void extend_filtration() { + Extended_filtration_data extend_filtration() { // Compute maximum and minimum of filtration values Vertex_handle maxvert = std::numeric_limits::min(); - this->minval_ = std::numeric_limits::infinity(); - this->maxval_ = -std::numeric_limits::infinity(); + Filtration_value minval = std::numeric_limits::infinity(); + Filtration_value maxval = -std::numeric_limits::infinity(); for (auto sh = root_.members().begin(); sh != root_.members().end(); ++sh){ Filtration_value f = this->filtration(sh); - this->minval_ = std::min(this->minval_, f); - this->maxval_ = std::max(this->maxval_, f); + minval = std::min(minval, f); + maxval = std::max(maxval, f); maxvert = std::max(sh->first, maxvert); } @@ -1578,7 +1583,7 @@ class Simplex_tree { vr.push_back(maxvert); if (this->dimension(sh) == 0){ Filtration_value v = this->filtration(sh); - Filtration_value scaled_v = (v-this->minval_)/(this->maxval_-this->minval_); + Filtration_value scaled_v = (v-minval)/(maxval-minval); // Assign ascending value between -2 and -1 to vertex this->assign_filtration(sh, -2 + scaled_v); // Assign descending value between 1 and 2 to cone on vertex @@ -1593,6 +1598,10 @@ class Simplex_tree { // Automatically assign good values for simplices this->make_filtration_non_decreasing(); + + // Return the filtration data + Extended_filtration_data efd(minval, maxval); + return efd; } /** \brief Returns a vertex of `sh` that has the same filtration value as `sh` if it exists, and `null_vertex()` otherwise. diff --git a/src/python/gudhi/simplex_tree.pxd b/src/python/gudhi/simplex_tree.pxd index ae32eb82..b6284af4 100644 --- a/src/python/gudhi/simplex_tree.pxd +++ b/src/python/gudhi/simplex_tree.pxd @@ -57,8 +57,8 @@ cdef extern from "Simplex_tree_interface.h" namespace "Gudhi": void remove_maximal_simplex(vector[int] simplex) bool prune_above_filtration(double filtration) bool make_filtration_non_decreasing() - void extend_filtration() - vector[vector[pair[int, pair[double, double]]]] compute_extended_persistence_subdiagrams(vector[pair[int, pair[double, double]]]) + void compute_extended_filtration() + vector[vector[pair[int, pair[double, double]]]] compute_extended_persistence_subdiagrams(vector[pair[int, pair[double, double]]] dgm) # Iterators over Simplex tree pair[vector[int], double] get_simplex_and_filtration(Simplex_tree_simplex_handle f_simplex) Simplex_tree_simplices_iterator get_simplices_iterator_begin() diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index 7af44683..3502000a 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -405,7 +405,7 @@ cdef class SimplexTree: Note that after calling this function, the filtration values are actually modified within the Simplex_tree. - The function :func:`compute_extended_persistence_subdiagrams()` + The function :func:`extended_persistence()` retrieves the original values. .. note:: @@ -413,21 +413,31 @@ cdef class SimplexTree: Note that this code creates an extra vertex internally, so you should make sure that the Simplex_tree does not contain a vertex with the largest Vertex_handle. """ - return self.get_ptr().extend_filtration() + return self.get_ptr().compute_extended_filtration() - def compute_extended_persistence_subdiagrams(self, dgm): + def extended_persistence(self, homology_coeff_field=11, min_persistence=0, persistence_dim_max = False): """This function retrieves good values for extended persistence, and separate the diagrams into the ordinary, relative, extended+ and extended- subdiagrams. - :param dgm: Persistence diagram obtained after calling :func:`extend_filtration()`, :func:`initialize_filtration()`, and :func:`persistence()`. - + :param homology_coeff_field: The homology coefficient field. Must be a + prime number. Default value is 11. + :type homology_coeff_field: int. + :param min_persistence: The minimum persistence value to take into + account (strictly greater than min_persistence). Default value is + 0.0. + Sets min_persistence to -1.0 to see all values. + :type min_persistence: float. + :param persistence_dim_max: If true, the persistent homology for the + maximal dimension in the complex is computed. If false, it is + ignored. Default is false. + :type persistence_dim_max: bool :returns: A vector of four persistence diagrams. The first one is Ordinary, the second one is Relative, the third one is Extended+ and the fourth one is Extended-. See section 2.2 in https://link.springer.com/article/10.1007/s10208-017-9370-z for a description of these subtypes. .. note:: This function should be called only if :func:`extend_filtration()`, :func:`initialize_filtration()`, - and :func:`persistence()` have been called first! + and (optionally) :func:`persistence()` have been called first! .. note:: @@ -435,7 +445,15 @@ cdef class SimplexTree: original filtration values due to the internal transformation (scaling to [-2,-1]) that is performed on these values during the computation of extended persistence. """ - return self.get_ptr().compute_extended_persistence_subdiagrams(dgm) + cdef vector[pair[int, pair[double, double]]] persistence_result + if self.pcohptr == NULL: + self.pcohptr = new Simplex_tree_persistence_interface(self.get_ptr(), persistence_dim_max) + if self.pcohptr != NULL: + self.pcohptr.get_persistence(homology_coeff_field, min_persistence) + if self.pcohptr != NULL: + pairs = self.pcohptr.persistence_pairs() + persistence_result = [(len(splx1)-1, [self.filtration(splx1), self.filtration(splx2)]) for [splx1, splx2] in pairs] + return self.get_ptr().compute_extended_persistence_subdiagrams(persistence_result) def persistence(self, homology_coeff_field=11, min_persistence=0, persistence_dim_max = False): diff --git a/src/python/include/Simplex_tree_interface.h b/src/python/include/Simplex_tree_interface.h index 4a7062d6..50ed58d0 100644 --- a/src/python/include/Simplex_tree_interface.h +++ b/src/python/include/Simplex_tree_interface.h @@ -37,8 +37,12 @@ class Simplex_tree_interface : public Simplex_tree { using Filtered_simplices = std::vector; using Skeleton_simplex_iterator = typename Base::Skeleton_simplex_iterator; using Complex_simplex_iterator = typename Base::Complex_simplex_iterator; + using Extended_filtration_data = typename Base::Extended_filtration_data; public: + + Extended_filtration_data efd; + bool find_simplex(const Simplex& vh) { return (Base::find(vh) != Base::null_simplex()); } @@ -117,6 +121,15 @@ class Simplex_tree_interface : public Simplex_tree { return cofaces; } + void compute_extended_filtration() { + this->efd = this->extend_filtration(); + return; + } + + std::vector>>> compute_extended_persistence_subdiagrams(const std::vector>>& dgm){ + return this->extended_persistence_subdiagrams(dgm, this->efd); + } + void create_persistence(Gudhi::Persistent_cohomology_interface* pcoh) { Base::initialize_filtration(); pcoh = new Gudhi::Persistent_cohomology_interface(*this); diff --git a/src/python/test/test_simplex_tree.py b/src/python/test/test_simplex_tree.py index 63eee9a5..20f6aabf 100755 --- a/src/python/test/test_simplex_tree.py +++ b/src/python/test/test_simplex_tree.py @@ -9,6 +9,7 @@ """ from gudhi import SimplexTree +import pytest __author__ = "Vincent Rouvreau" __copyright__ = "Copyright (C) 2016 Inria" @@ -322,15 +323,16 @@ def test_extend_filtration(): ([0, 3, 6], 2.0) ] + dgms = st.extended_persistence() - dgm = st.persistence() - L = st.compute_extended_persistence_subdiagrams(dgm) - assert L == [ - [(0, (1.9999999999999998, 2.9999999999999996))], - [(1, (5.0, 4.0))], - [(0, (1.0, 6.0))], - [(1, (6.0, 1.0))] - ] + assert dgms[0][0][1][0] == pytest.approx(2.) + assert dgms[0][0][1][1] == pytest.approx(3.) + assert dgms[1][0][1][0] == pytest.approx(5.) + assert dgms[1][0][1][1] == pytest.approx(4.) + assert dgms[2][0][1][0] == pytest.approx(1.) + assert dgms[2][0][1][1] == pytest.approx(6.) + assert dgms[3][0][1][0] == pytest.approx(6.) + assert dgms[3][0][1][1] == pytest.approx(1.) def test_simplices_iterator(): -- cgit v1.2.3 From 361abfcfa9ec18c76837f847f8e2e3a060cf7db7 Mon Sep 17 00:00:00 2001 From: MathieuCarriere Date: Thu, 19 Mar 2020 17:02:55 -0400 Subject: added decoding function --- src/Simplex_tree/include/gudhi/Simplex_tree.h | 82 +++++++++++---------------- src/python/gudhi/simplex_tree.pyx | 10 +--- src/python/include/Simplex_tree_interface.h | 27 ++++++++- 3 files changed, 63 insertions(+), 56 deletions(-) (limited to 'src') diff --git a/src/Simplex_tree/include/gudhi/Simplex_tree.h b/src/Simplex_tree/include/gudhi/Simplex_tree.h index 50b8e582..9008c5f2 100644 --- a/src/Simplex_tree/include/gudhi/Simplex_tree.h +++ b/src/Simplex_tree/include/gudhi/Simplex_tree.h @@ -87,6 +87,8 @@ class Simplex_tree { /* \brief Set of nodes sharing a same parent in the simplex tree. */ typedef Simplex_tree_siblings Siblings; + enum Extended_simplex_type {UP, DOWN, EXTRA}; + struct Key_simplex_base_real { Key_simplex_base_real() : key_(-1) {} void assign_key(Simplex_key k) { key_ = k; } @@ -1486,66 +1488,50 @@ class Simplex_tree { } } - /** \brief Retrieve good values for extended persistence, and separate the - * diagrams into the ordinary, relative, extended+ and extended- subdiagrams. + /** \brief Retrieve the original filtration value for a given simplex in the Simplex_tree. Since the + * computation of extended persistence requires modifying the filtration values, this function can be used + * to recover the original values. Moreover, computing extended persistence requires adding new simplices + * in the Simplex_tree. Hence, this function also outputs the type of each simplex. It can be either UP (which means + * that the simplex was present originally, and is thus part of the ascending extended filtration), DOWN (which means + * that the simplex is the cone of an original simplex, and is thus part of the descending extended filtration) or + * EXTRA (which means the simplex is the cone point). Note that if the simplex type is DOWN, the original filtration value + * is set to be the original filtration value of the corresponding (not coned) original simplex. * \pre This function should be called only if `extend_filtration()` has been called first! - * \post The coordinates of the persistence diagram points might be a little different than the - * original filtration values due to the internal transformation (scaling to [-2,-1]) that is - * performed on these values during the computation of extended persistence. - * @param[in] dgm Persistence diagram obtained after calling `extend_filtration()`, - * `initialize_filtration()`, and `Gudhi::persistent_cohomology::Persistent_cohomology< FilteredComplex, CoefficientField >::compute_persistent_cohomology()`. - * @param[in] efd Structure containing the minimum and maximum values of the original filtration - * @return A vector of four persistence diagrams. The first one is Ordinary, the - * second one is Relative, the third one is Extended+ and the fourth one is Extended-. - * See section 2.2 in https://link.springer.com/article/10.1007/s10208-017-9370-z for a description of these subtypes. + * \post The output filtration value is supposed to be the same, but might be a little different, than the + * original filtration value, due to the internal transformation (scaling to [-2,-1]) that is + * performed on the original filtration values during the computation of extended persistence. + * @param[in] f Filtration value of the simplex in the extended (i.e., modified) filtration. + * @param[in] efd Structure containing the minimum and maximum values of the original filtration. This the output of `extend_filtration()`. + * @return A pair containing the original filtration value of the simplex as well as the simplex type. */ - std::vector>>> extended_persistence_subdiagrams(const std::vector>>& dgm, const Extended_filtration_data& efd){ - std::vector>>> new_dgm(4); - Filtration_value x, y; + std::pair decode_extended_filtration(Filtration_value f, const Extended_filtration_data& efd){ + std::pair p; Filtration_value minval = efd.minval; Filtration_value maxval = efd.maxval; - for(unsigned int i = 0; i < dgm.size(); i++){ - int h = dgm[i].first; - Filtration_value px = dgm[i].second.first; - Filtration_value py = dgm[i].second.second; - if(std::isinf(py)) continue; - else{ - if ((px <= -1) & (py <= -1)){ - x = minval + (maxval-minval)*(px + 2); - y = minval + (maxval-minval)*(py + 2); - new_dgm[0].push_back(std::make_pair(h, std::make_pair(x,y))); - } - else if ((px >= 1) & (py >= 1)){ - x = minval - (maxval-minval)*(px - 2); - y = minval - (maxval-minval)*(py - 2); - new_dgm[1].push_back(std::make_pair(h, std::make_pair(x,y))); - } - else { - x = minval + (maxval-minval)*(px + 2); - y = minval - (maxval-minval)*(py - 2); - if (x <= y){ - new_dgm[2].push_back(std::make_pair(h, std::make_pair(x,y))); - } - else{ - new_dgm[3].push_back(std::make_pair(h, std::make_pair(x,y))); - } - } - } + if (f >= -2 && f <= -1){ + p.first = minval + (maxval-minval)*(f + 2); p.second = UP; } - return new_dgm; - } + else if (f >= 1 && f <= 2){ + p.first = minval - (maxval-minval)*(f - 2); p.second = DOWN; + } + else{ + p.first = -3; p.second = EXTRA; + } + return p; + }; /** \brief Extend filtration for computing extended persistence. * This function only uses the filtration values at the 0-dimensional simplices, * and computes the extended persistence diagram induced by the lower-star filtration * computed with these values. * \post Note that after calling this function, the filtration - * values are actually modified. The function `extended_persistence_subdiagrams()` - * retrieves the original values and separates the extended persistence diagram points - * w.r.t. their types (Ord, Rel, Ext+, Ext-) and should always be called after - * computing the persistent homology of the extended simplicial complex. + * values are actually modified. The function `decode_extended_filtration()` + * retrieves the original values and outputs the extended simplex type. * \pre Note that this code creates an extra vertex internally, so you should make sure that - * the Simplex tree does not contain a vertex with the largest Vertex_handle. + * the Simplex tree does not contain a vertex with the largest Vertex_handle. + * @return A data structure containing the maximum and minimum values of the original filtration. + * It is meant to be provided as input to `decode_extended_filtration()` in order to retrieve + * the original filtration values for each simplex. */ Extended_filtration_data extend_filtration() { diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index 3502000a..2cd81c14 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -415,9 +415,9 @@ cdef class SimplexTree: """ return self.get_ptr().compute_extended_filtration() - def extended_persistence(self, homology_coeff_field=11, min_persistence=0, persistence_dim_max = False): + def extended_persistence(self, homology_coeff_field=11, min_persistence=0): """This function retrieves good values for extended persistence, and separate the diagrams - into the ordinary, relative, extended+ and extended- subdiagrams. + into the Ordinary, Relative, Extended+ and Extended- subdiagrams. :param homology_coeff_field: The homology coefficient field. Must be a prime number. Default value is 11. @@ -427,10 +427,6 @@ cdef class SimplexTree: 0.0. Sets min_persistence to -1.0 to see all values. :type min_persistence: float. - :param persistence_dim_max: If true, the persistent homology for the - maximal dimension in the complex is computed. If false, it is - ignored. Default is false. - :type persistence_dim_max: bool :returns: A vector of four persistence diagrams. The first one is Ordinary, the second one is Relative, the third one is Extended+ and the fourth one is Extended-. See section 2.2 in https://link.springer.com/article/10.1007/s10208-017-9370-z for a description of these subtypes. .. note:: @@ -447,7 +443,7 @@ cdef class SimplexTree: """ cdef vector[pair[int, pair[double, double]]] persistence_result if self.pcohptr == NULL: - self.pcohptr = new Simplex_tree_persistence_interface(self.get_ptr(), persistence_dim_max) + self.pcohptr = new Simplex_tree_persistence_interface(self.get_ptr(), True) if self.pcohptr != NULL: self.pcohptr.get_persistence(homology_coeff_field, min_persistence) if self.pcohptr != NULL: diff --git a/src/python/include/Simplex_tree_interface.h b/src/python/include/Simplex_tree_interface.h index 50ed58d0..a6b1a06e 100644 --- a/src/python/include/Simplex_tree_interface.h +++ b/src/python/include/Simplex_tree_interface.h @@ -38,6 +38,7 @@ class Simplex_tree_interface : public Simplex_tree { using Skeleton_simplex_iterator = typename Base::Skeleton_simplex_iterator; using Complex_simplex_iterator = typename Base::Complex_simplex_iterator; using Extended_filtration_data = typename Base::Extended_filtration_data; + using Extended_simplex_type = typename Base::Extended_simplex_type; public: @@ -127,7 +128,31 @@ class Simplex_tree_interface : public Simplex_tree { } std::vector>>> compute_extended_persistence_subdiagrams(const std::vector>>& dgm){ - return this->extended_persistence_subdiagrams(dgm, this->efd); + std::vector>>> new_dgm(4); + for (unsigned int i = 0; i < dgm.size(); i++){ + std::pair px = this->decode_extended_filtration(dgm[i].second.first, this->efd); + std::pair py = this->decode_extended_filtration(dgm[i].second.second, this->efd); + std::pair> pd_point = std::make_pair(dgm[i].first, std::make_pair(px.first, py.first)); + //Ordinary + if (px.second == Base::UP && py.second == Base::UP){ + new_dgm[0].push_back(pd_point); + } + // Relative + else if (px.second == Base::DOWN && py.second == Base::DOWN){ + new_dgm[1].push_back(pd_point); + } + else{ + // Extended+ + if (px.first < py.first){ + new_dgm[2].push_back(pd_point); + } + //Extended- + else{ + new_dgm[3].push_back(pd_point); + } + } + } + return new_dgm; } void create_persistence(Gudhi::Persistent_cohomology_interface* pcoh) { -- cgit v1.2.3 From 1e0e378ab442672ef569e93c4114b0e99ea70f6e Mon Sep 17 00:00:00 2001 From: MathieuCarriere Date: Fri, 20 Mar 2020 12:47:13 -0400 Subject: small fix --- src/python/gudhi/simplex_tree.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index 2cd81c14..5b850462 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -443,7 +443,7 @@ cdef class SimplexTree: """ cdef vector[pair[int, pair[double, double]]] persistence_result if self.pcohptr == NULL: - self.pcohptr = new Simplex_tree_persistence_interface(self.get_ptr(), True) + self.pcohptr = new Simplex_tree_persistence_interface(self.get_ptr(), False) if self.pcohptr != NULL: self.pcohptr.get_persistence(homology_coeff_field, min_persistence) if self.pcohptr != NULL: -- cgit v1.2.3 From cf29f4a485d06469d17c6d12d306901fa3c5ab36 Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Mon, 23 Mar 2020 18:11:15 +0100 Subject: Shorter headers in sphinx: Introduced in -> Since and Copyright -> License --- src/python/doc/alpha_complex_sum.inc | 4 ++-- src/python/doc/bottleneck_distance_sum.inc | 4 ++-- src/python/doc/cubical_complex_sum.inc | 4 ++-- src/python/doc/cubical_complex_user.rst | 2 +- src/python/doc/nerve_gic_complex_sum.inc | 4 ++-- src/python/doc/persistence_graphical_tools_sum.inc | 4 ++-- src/python/doc/persistent_cohomology_sum.inc | 4 ++-- src/python/doc/persistent_cohomology_user.rst | 2 +- src/python/doc/point_cloud_sum.inc | 4 ++-- src/python/doc/representations_sum.inc | 4 ++-- src/python/doc/rips_complex_sum.inc | 4 ++-- src/python/doc/rips_complex_user.rst | 2 +- src/python/doc/simplex_tree_sum.inc | 4 ++-- src/python/doc/tangential_complex_sum.inc | 4 ++-- src/python/doc/wasserstein_distance_sum.inc | 4 ++-- src/python/doc/witness_complex_sum.inc | 4 ++-- 16 files changed, 29 insertions(+), 29 deletions(-) (limited to 'src') diff --git a/src/python/doc/alpha_complex_sum.inc b/src/python/doc/alpha_complex_sum.inc index 00c35155..9e6414d0 100644 --- a/src/python/doc/alpha_complex_sum.inc +++ b/src/python/doc/alpha_complex_sum.inc @@ -4,9 +4,9 @@ +----------------------------------------------------------------+------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------+ | .. figure:: | Alpha complex is a simplicial complex constructed from the finite | :Author: Vincent Rouvreau | | ../../doc/Alpha_complex/alpha_complex_representation.png | cells of a Delaunay Triangulation. | | - | :alt: Alpha complex representation | | :Introduced in: GUDHI 2.0.0 | + | :alt: Alpha complex representation | | :Since: GUDHI 2.0.0 | | :figclass: align-center | The filtration value of each simplex is computed as the **square** of | | - | | the circumradius of the simplex if the circumsphere is empty (the | :Copyright: MIT (`GPL v3 `_) | + | | the circumradius of the simplex if the circumsphere is empty (the | :License: MIT (`GPL v3 `_) | | | simplex is then said to be Gabriel), and as the minimum of the | | | | filtration values of the codimension 1 cofaces that make it not | :Requires: `Eigen `__ :math:`\geq` 3.1.0 and `CGAL `__ :math:`\geq` 4.11.0 | | | Gabriel otherwise. | | diff --git a/src/python/doc/bottleneck_distance_sum.inc b/src/python/doc/bottleneck_distance_sum.inc index a01e7f04..0de4625c 100644 --- a/src/python/doc/bottleneck_distance_sum.inc +++ b/src/python/doc/bottleneck_distance_sum.inc @@ -4,9 +4,9 @@ +-----------------------------------------------------------------+----------------------------------------------------------------------+------------------------------------------------------------------+ | .. figure:: | Bottleneck distance measures the similarity between two persistence | :Author: François Godi | | ../../doc/Bottleneck_distance/perturb_pd.png | diagrams. It's the shortest distance b for which there exists a | | - | :figclass: align-center | perfect matching between the points of the two diagrams (+ all the | :Introduced in: GUDHI 2.0.0 | + | :figclass: align-center | perfect matching between the points of the two diagrams (+ all the | :Since: GUDHI 2.0.0 | | | diagonal points) such that any couple of matched points are at | | - | Bottleneck distance is the length of | distance at most b, where the distance between points is the sup | :Copyright: MIT (`GPL v3 `_) | + | Bottleneck distance is the length of | distance at most b, where the distance between points is the sup | :License: MIT (`GPL v3 `_) | | the longest edge | norm in :math:`\mathbb{R}^2`. | | | | | :Requires: `CGAL `__ :math:`\geq` 4.11.0 | +-----------------------------------------------------------------+----------------------------------------------------------------------+------------------------------------------------------------------+ diff --git a/src/python/doc/cubical_complex_sum.inc b/src/python/doc/cubical_complex_sum.inc index ab6388e5..28bf8e94 100644 --- a/src/python/doc/cubical_complex_sum.inc +++ b/src/python/doc/cubical_complex_sum.inc @@ -4,9 +4,9 @@ +--------------------------------------------------------------------------+----------------------------------------------------------------------+-----------------------------+ | .. figure:: | The cubical complex is an example of a structured complex useful in | :Author: Pawel Dlotko | | ../../doc/Bitmap_cubical_complex/Cubical_complex_representation.png | computational mathematics (specially rigorous numerics) and image | | - | :alt: Cubical complex representation | analysis. | :Introduced in: GUDHI 2.0.0 | + | :alt: Cubical complex representation | analysis. | :Since: GUDHI 2.0.0 | | :figclass: align-center | | | - | | | :Copyright: MIT | + | | | :License: MIT | | | | | +--------------------------------------------------------------------------+----------------------------------------------------------------------+-----------------------------+ | * :doc:`cubical_complex_user` | * :doc:`cubical_complex_ref` | diff --git a/src/python/doc/cubical_complex_user.rst b/src/python/doc/cubical_complex_user.rst index 56cf0170..93ca6b24 100644 --- a/src/python/doc/cubical_complex_user.rst +++ b/src/python/doc/cubical_complex_user.rst @@ -8,7 +8,7 @@ Definition ---------- ===================================== ===================================== ===================================== -:Author: Pawel Dlotko :Introduced in: GUDHI PYTHON 2.0.0 :Copyright: GPL v3 +:Author: Pawel Dlotko :Since: GUDHI PYTHON 2.0.0 :License: GPL v3 ===================================== ===================================== ===================================== +---------------------------------------------+----------------------------------------------------------------------+ diff --git a/src/python/doc/nerve_gic_complex_sum.inc b/src/python/doc/nerve_gic_complex_sum.inc index d5356eca..7fe55aff 100644 --- a/src/python/doc/nerve_gic_complex_sum.inc +++ b/src/python/doc/nerve_gic_complex_sum.inc @@ -4,9 +4,9 @@ +----------------------------------------------------------------+------------------------------------------------------------------------+------------------------------------------------------------------+ | .. figure:: | Nerves and Graph Induced Complexes are cover complexes, i.e. | :Author: Mathieu Carrière | | ../../doc/Nerve_GIC/gicvisu.jpg | simplicial complexes that provably contain topological information | | - | :alt: Graph Induced Complex of a point cloud. | about the input data. They can be computed with a cover of the data, | :Introduced in: GUDHI 2.3.0 | + | :alt: Graph Induced Complex of a point cloud. | about the input data. They can be computed with a cover of the data, | :Since: GUDHI 2.3.0 | | :figclass: align-center | that comes i.e. from the preimage of a family of intervals covering | | - | | the image of a scalar-valued function defined on the data. | :Copyright: MIT (`GPL v3 `_) | + | | the image of a scalar-valued function defined on the data. | :License: MIT (`GPL v3 `_) | | | | | | | | :Requires: `CGAL `__ :math:`\geq` 4.11.0 | | | | | diff --git a/src/python/doc/persistence_graphical_tools_sum.inc b/src/python/doc/persistence_graphical_tools_sum.inc index 723c0f78..b68d3d7e 100644 --- a/src/python/doc/persistence_graphical_tools_sum.inc +++ b/src/python/doc/persistence_graphical_tools_sum.inc @@ -4,9 +4,9 @@ +-----------------------------------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------+ | .. figure:: | These graphical tools comes on top of persistence results and allows | :Author: Vincent Rouvreau, Theo Lacombe | | img/graphical_tools_representation.png | the user to display easily persistence barcode, diagram or density. | | - | | | :Introduced in: GUDHI 2.0.0 | + | | | :Since: GUDHI 2.0.0 | | | Note that these functions return the matplotlib axis, allowing | | - | | for further modifications (title, aspect, etc.) | :Copyright: MIT | + | | for further modifications (title, aspect, etc.) | :License: MIT | | | | | | | | :Requires: matplotlib, numpy and scipy | +-----------------------------------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------+ diff --git a/src/python/doc/persistent_cohomology_sum.inc b/src/python/doc/persistent_cohomology_sum.inc index 9c29bfaa..0effb50f 100644 --- a/src/python/doc/persistent_cohomology_sum.inc +++ b/src/python/doc/persistent_cohomology_sum.inc @@ -4,9 +4,9 @@ +-----------------------------------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------+ | .. figure:: | The theory of homology consists in attaching to a topological space | :Author: Clément Maria | | ../../doc/Persistent_cohomology/3DTorus_poch.png | a sequence of (homology) groups, capturing global topological | | - | :figclass: align-center | features like connected components, holes, cavities, etc. Persistent | :Introduced in: GUDHI 2.0.0 | + | :figclass: align-center | features like connected components, holes, cavities, etc. Persistent | :Since: GUDHI 2.0.0 | | | homology studies the evolution -- birth, life and death -- of these | | - | Rips Persistent Cohomology on a 3D | features when the topological space is changing. Consequently, the | :Copyright: MIT | + | Rips Persistent Cohomology on a 3D | features when the topological space is changing. Consequently, the | :License: MIT | | Torus | theory is essentially composed of three elements: topological spaces, | | | | their homology groups and an evolution scheme. | | | | | | diff --git a/src/python/doc/persistent_cohomology_user.rst b/src/python/doc/persistent_cohomology_user.rst index de83cda1..5f931b3a 100644 --- a/src/python/doc/persistent_cohomology_user.rst +++ b/src/python/doc/persistent_cohomology_user.rst @@ -7,7 +7,7 @@ Persistent cohomology user manual Definition ---------- ===================================== ===================================== ===================================== -:Author: Clément Maria :Introduced in: GUDHI PYTHON 2.0.0 :Copyright: GPL v3 +:Author: Clément Maria :Since: GUDHI PYTHON 2.0.0 :License: GPL v3 ===================================== ===================================== ===================================== +-----------------------------------------------------------------+-----------------------------------------------------------------------+ diff --git a/src/python/doc/point_cloud_sum.inc b/src/python/doc/point_cloud_sum.inc index 77245e86..0a159680 100644 --- a/src/python/doc/point_cloud_sum.inc +++ b/src/python/doc/point_cloud_sum.inc @@ -4,9 +4,9 @@ +----------------------------------------------------------------+------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------+ | | :math:`(x_1, x_2, \ldots, x_d)` | Utilities to process point clouds: read from file, subsample, etc. | :Author: Vincent Rouvreau | | | :math:`(y_1, y_2, \ldots, y_d)` | | | - | | | :Introduced in: GUDHI 2.0.0 | + | | | :Since: GUDHI 2.0.0 | | | | | - | | | :Copyright: MIT (`GPL v3 `_) | + | | | :License: MIT (`GPL v3 `_) | | | Parts of this package require CGAL. | | | | | :Requires: `Eigen `__ :math:`\geq` 3.1.0 and `CGAL `__ :math:`\geq` 4.11.0 | | | | | diff --git a/src/python/doc/representations_sum.inc b/src/python/doc/representations_sum.inc index edb8a448..eac89b9d 100644 --- a/src/python/doc/representations_sum.inc +++ b/src/python/doc/representations_sum.inc @@ -4,9 +4,9 @@ +------------------------------------------------------------------+----------------------------------------------------------------+-----------------------------------------------+ | .. figure:: | Vectorizations, distances and kernels that work on persistence | :Author: Mathieu Carrière | | img/sklearn-tda.png | diagrams, compatible with scikit-learn. | | - | | | :Introduced in: GUDHI 3.1.0 | + | | | :Since: GUDHI 3.1.0 | | | | | - | | | :Copyright: MIT | + | | | :License: MIT | | | | | | | | :Requires: scikit-learn | +------------------------------------------------------------------+----------------------------------------------------------------+-----------------------------------------------+ diff --git a/src/python/doc/rips_complex_sum.inc b/src/python/doc/rips_complex_sum.inc index a1f0e469..6feb74cd 100644 --- a/src/python/doc/rips_complex_sum.inc +++ b/src/python/doc/rips_complex_sum.inc @@ -4,9 +4,9 @@ +----------------------------------------------------------------+------------------------------------------------------------------------+----------------------------------------------------------------------+ | .. figure:: | Rips complex is a simplicial complex constructed from a one skeleton | :Authors: Clément Maria, Pawel Dlotko, Vincent Rouvreau, Marc Glisse | | ../../doc/Rips_complex/rips_complex_representation.png | graph. | | - | :figclass: align-center | | :Introduced in: GUDHI 2.0.0 | + | :figclass: align-center | | :Since: GUDHI 2.0.0 | | | The filtration value of each edge is computed from a user-given | | - | | distance function and is inserted until a user-given threshold | :Copyright: MIT | + | | distance function and is inserted until a user-given threshold | :License: MIT | | | value. | | | | | | | | This complex can be built from a point cloud and a distance function, | | diff --git a/src/python/doc/rips_complex_user.rst b/src/python/doc/rips_complex_user.rst index a27573e8..8efb12e6 100644 --- a/src/python/doc/rips_complex_user.rst +++ b/src/python/doc/rips_complex_user.rst @@ -8,7 +8,7 @@ Definition ---------- ==================================================================== ================================ ====================== -:Authors: Clément Maria, Pawel Dlotko, Vincent Rouvreau, Marc Glisse :Introduced in: GUDHI 2.0.0 :Copyright: GPL v3 +:Authors: Clément Maria, Pawel Dlotko, Vincent Rouvreau, Marc Glisse :Since: GUDHI 2.0.0 :License: GPL v3 ==================================================================== ================================ ====================== +-------------------------------------------+----------------------------------------------------------------------+ diff --git a/src/python/doc/simplex_tree_sum.inc b/src/python/doc/simplex_tree_sum.inc index 3c637b8c..a8858f16 100644 --- a/src/python/doc/simplex_tree_sum.inc +++ b/src/python/doc/simplex_tree_sum.inc @@ -4,9 +4,9 @@ +----------------------------------------------------------------+------------------------------------------------------------------------+-----------------------------+ | .. figure:: | The simplex tree is an efficient and flexible data structure for | :Author: Clément Maria | | ../../doc/Simplex_tree/Simplex_tree_representation.png | representing general (filtered) simplicial complexes. | | - | :alt: Simplex tree representation | | :Introduced in: GUDHI 2.0.0 | + | :alt: Simplex tree representation | | :Since: GUDHI 2.0.0 | | :figclass: align-center | The data structure is described in | | - | | :cite:`boissonnatmariasimplextreealgorithmica` | :Copyright: MIT | + | | :cite:`boissonnatmariasimplextreealgorithmica` | :License: MIT | | | | | +----------------------------------------------------------------+------------------------------------------------------------------------+-----------------------------+ | * :doc:`simplex_tree_user` | * :doc:`simplex_tree_ref` | diff --git a/src/python/doc/tangential_complex_sum.inc b/src/python/doc/tangential_complex_sum.inc index ddc3e609..45ce2a66 100644 --- a/src/python/doc/tangential_complex_sum.inc +++ b/src/python/doc/tangential_complex_sum.inc @@ -4,9 +4,9 @@ +----------------------------------------------------------------+------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------+ | .. figure:: | A Tangential Delaunay complex is a simplicial complex designed to | :Author: Clément Jamin | | ../../doc/Tangential_complex/tc_examples.png | reconstruct a :math:`k`-dimensional manifold embedded in :math:`d`- | | - | :figclass: align-center | dimensional Euclidean space. The input is a point sample coming from | :Introduced in: GUDHI 2.0.0 | + | :figclass: align-center | dimensional Euclidean space. The input is a point sample coming from | :Since: GUDHI 2.0.0 | | | an unknown manifold. The running time depends only linearly on the | | - | | extrinsic dimension :math:`d` and exponentially on the intrinsic | :Copyright: MIT (`GPL v3 `_) | + | | extrinsic dimension :math:`d` and exponentially on the intrinsic | :License: MIT (`GPL v3 `_) | | | dimension :math:`k`. | | | | | :Requires: `Eigen `__ :math:`\geq` 3.1.0 and `CGAL `__ :math:`\geq` 4.11.0 | +----------------------------------------------------------------+------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------+ diff --git a/src/python/doc/wasserstein_distance_sum.inc b/src/python/doc/wasserstein_distance_sum.inc index 1632befa..0ff22035 100644 --- a/src/python/doc/wasserstein_distance_sum.inc +++ b/src/python/doc/wasserstein_distance_sum.inc @@ -4,9 +4,9 @@ +-----------------------------------------------------------------+----------------------------------------------------------------------+------------------------------------------------------------------+ | .. figure:: | The q-Wasserstein distance measures the similarity between two | :Author: Theo Lacombe | | ../../doc/Bottleneck_distance/perturb_pd.png | persistence diagrams. It's the minimum value c that can be achieved | | - | :figclass: align-center | by a perfect matching between the points of the two diagrams (+ all | :Introduced in: GUDHI 3.1.0 | + | :figclass: align-center | by a perfect matching between the points of the two diagrams (+ all | :Since: GUDHI 3.1.0 | | | diagonal points), where the value of a matching is defined as the | | - | Wasserstein distance is the q-th root of the sum of the | q-th root of the sum of all edge lengths to the power q. Edge lengths| :Copyright: MIT | + | Wasserstein distance is the q-th root of the sum of the | q-th root of the sum of all edge lengths to the power q. Edge lengths| :License: MIT | | edge lengths to the power q. | are measured in norm p, for :math:`1 \leq p \leq \infty`. | | | | | :Requires: Python Optimal Transport (POT) :math:`\geq` 0.5.1 | +-----------------------------------------------------------------+----------------------------------------------------------------------+------------------------------------------------------------------+ diff --git a/src/python/doc/witness_complex_sum.inc b/src/python/doc/witness_complex_sum.inc index f9c009ab..34d4df4a 100644 --- a/src/python/doc/witness_complex_sum.inc +++ b/src/python/doc/witness_complex_sum.inc @@ -4,9 +4,9 @@ +-------------------------------------------------------------------+----------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ | .. figure:: | Witness complex :math:`Wit(W,L)` is a simplicial complex defined on | :Author: Siargey Kachanovich | | ../../doc/Witness_complex/Witness_complex_representation.png | two sets of points in :math:`\mathbb{R}^D`. | | - | :alt: Witness complex representation | | :Introduced in: GUDHI 2.0.0 | + | :alt: Witness complex representation | | :Since: GUDHI 2.0.0 | | :figclass: align-center | The data structure is described in | | - | | :cite:`boissonnatmariasimplextreealgorithmica`. | :Copyright: MIT (`GPL v3 `_ for Euclidean versions only) | + | | :cite:`boissonnatmariasimplextreealgorithmica`. | :License: MIT (`GPL v3 `_ for Euclidean versions only) | | | | | | | | :Requires: `Eigen `__ :math:`\geq` 3.1.0 and `CGAL `__ :math:`\geq` 4.11.0 for Euclidean versions only | +-------------------------------------------------------------------+----------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ -- cgit v1.2.3 From bc223c3cc7cb9e9c0bb3573af720fce9c5380b94 Mon Sep 17 00:00:00 2001 From: MathieuCarriere Date: Mon, 23 Mar 2020 21:22:16 -0400 Subject: new fixes --- src/Simplex_tree/include/gudhi/Simplex_tree.h | 25 +++++++++++++++----- src/python/gudhi/simplex_tree.pxd | 2 +- src/python/gudhi/simplex_tree.pyx | 21 +++++++---------- src/python/include/Simplex_tree_interface.h | 34 ++++++++++++++------------- src/python/test/test_simplex_tree.py | 7 ++---- 5 files changed, 48 insertions(+), 41 deletions(-) (limited to 'src') diff --git a/src/Simplex_tree/include/gudhi/Simplex_tree.h b/src/Simplex_tree/include/gudhi/Simplex_tree.h index 9008c5f2..de97d6f2 100644 --- a/src/Simplex_tree/include/gudhi/Simplex_tree.h +++ b/src/Simplex_tree/include/gudhi/Simplex_tree.h @@ -42,6 +42,20 @@ namespace Gudhi { +/** + * \class Extended_simplex_type Simplex_tree.h gudhi/Simplex_tree.h + * \brief Extended simplex type data structure for representing the type of simplices in an extended filtration. + * + * \details The extended simplex type can be either UP (which means + * that the simplex was present originally, and is thus part of the ascending extended filtration), DOWN (which means + * that the simplex is the cone of an original simplex, and is thus part of the descending extended filtration) or + * EXTRA (which means the simplex is the cone point). + * + * Details may be found in section 2.2 in https://link.springer.com/article/10.1007/s10208-017-9370-z. + * + */ +enum class Extended_simplex_type {UP, DOWN, EXTRA}; + struct Simplex_tree_options_full_featured; /** @@ -87,7 +101,7 @@ class Simplex_tree { /* \brief Set of nodes sharing a same parent in the simplex tree. */ typedef Simplex_tree_siblings Siblings; - enum Extended_simplex_type {UP, DOWN, EXTRA}; + struct Key_simplex_base_real { Key_simplex_base_real() : key_(-1) {} @@ -106,7 +120,7 @@ class Simplex_tree { Filtration_value minval; Filtration_value maxval; Extended_filtration_data(){} - Extended_filtration_data(Filtration_value vmin, Filtration_value vmax){ minval = vmin; maxval = vmax; } + Extended_filtration_data(Filtration_value vmin, Filtration_value vmax): minval(vmin), maxval(vmax) {} }; typedef typename std::conditional::type Key_simplex_base; @@ -1370,7 +1384,6 @@ class Simplex_tree { // Replacing if(f=max)) would mean that if f is NaN, we replace it with the max of the children. // That seems more useful than keeping NaN. if (!(simplex.second.filtration() >= max_filt_border_value)) { - // Store the filtration modification information modified = true; simplex.second.assign_filtration(max_filt_border_value); @@ -1509,13 +1522,13 @@ class Simplex_tree { Filtration_value minval = efd.minval; Filtration_value maxval = efd.maxval; if (f >= -2 && f <= -1){ - p.first = minval + (maxval-minval)*(f + 2); p.second = UP; + p.first = minval + (maxval-minval)*(f + 2); p.second = Extended_simplex_type::UP; } else if (f >= 1 && f <= 2){ - p.first = minval - (maxval-minval)*(f - 2); p.second = DOWN; + p.first = minval - (maxval-minval)*(f - 2); p.second = Extended_simplex_type::DOWN; } else{ - p.first = -3; p.second = EXTRA; + p.first = std::numeric_limits::quiet_NaN(); p.second = Extended_simplex_type::EXTRA; } return p; }; diff --git a/src/python/gudhi/simplex_tree.pxd b/src/python/gudhi/simplex_tree.pxd index b6284af4..595f22bb 100644 --- a/src/python/gudhi/simplex_tree.pxd +++ b/src/python/gudhi/simplex_tree.pxd @@ -58,7 +58,7 @@ cdef extern from "Simplex_tree_interface.h" namespace "Gudhi": bool prune_above_filtration(double filtration) bool make_filtration_non_decreasing() void compute_extended_filtration() - vector[vector[pair[int, pair[double, double]]]] compute_extended_persistence_subdiagrams(vector[pair[int, pair[double, double]]] dgm) + vector[vector[pair[int, pair[double, double]]]] compute_extended_persistence_subdiagrams(vector[pair[int, pair[double, double]]] dgm, double min_persistence) # Iterators over Simplex tree pair[vector[int], double] get_simplex_and_filtration(Simplex_tree_simplex_handle f_simplex) Simplex_tree_simplices_iterator get_simplices_iterator_begin() diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index 5b850462..bcb1578d 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -411,7 +411,7 @@ cdef class SimplexTree: .. note:: Note that this code creates an extra vertex internally, so you should make sure that - the Simplex_tree does not contain a vertex with the largest Vertex_handle. + the Simplex_tree does not contain a vertex with the largest possible value (i.e., 4294967295). """ return self.get_ptr().compute_extended_filtration() @@ -422,18 +422,16 @@ cdef class SimplexTree: :param homology_coeff_field: The homology coefficient field. Must be a prime number. Default value is 11. :type homology_coeff_field: int. - :param min_persistence: The minimum persistence value to take into + :param min_persistence: The minimum persistence value (i.e., the absolute value of the difference between the persistence diagram point coordinates) to take into account (strictly greater than min_persistence). Default value is 0.0. Sets min_persistence to -1.0 to see all values. :type min_persistence: float. - :returns: A vector of four persistence diagrams. The first one is Ordinary, the second one is Relative, the third one is Extended+ and the fourth one is Extended-. See section 2.2 in https://link.springer.com/article/10.1007/s10208-017-9370-z for a description of these subtypes. + :returns: A list of four persistence diagrams in the format described in :func:`persistence()`. The first one is Ordinary, the second one is Relative, the third one is Extended+ and the fourth one is Extended-. See section 2.2 in https://link.springer.com/article/10.1007/s10208-017-9370-z for a description of these subtypes. .. note:: - This function should be called only if :func:`extend_filtration()`, - :func:`initialize_filtration()`, - and (optionally) :func:`persistence()` have been called first! + This function should be called only if :func:`extend_filtration()` has been called first! .. note:: @@ -442,14 +440,11 @@ cdef class SimplexTree: performed on these values during the computation of extended persistence. """ cdef vector[pair[int, pair[double, double]]] persistence_result - if self.pcohptr == NULL: - self.pcohptr = new Simplex_tree_persistence_interface(self.get_ptr(), False) - if self.pcohptr != NULL: - self.pcohptr.get_persistence(homology_coeff_field, min_persistence) if self.pcohptr != NULL: - pairs = self.pcohptr.persistence_pairs() - persistence_result = [(len(splx1)-1, [self.filtration(splx1), self.filtration(splx2)]) for [splx1, splx2] in pairs] - return self.get_ptr().compute_extended_persistence_subdiagrams(persistence_result) + del self.pcohptr + self.pcohptr = new Simplex_tree_persistence_interface(self.get_ptr(), False) + persistence_result = self.pcohptr.get_persistence(homology_coeff_field, -1.) + return self.get_ptr().compute_extended_persistence_subdiagrams(persistence_result, min_persistence) def persistence(self, homology_coeff_field=11, min_persistence=0, persistence_dim_max = False): diff --git a/src/python/include/Simplex_tree_interface.h b/src/python/include/Simplex_tree_interface.h index a6b1a06e..1a18aed6 100644 --- a/src/python/include/Simplex_tree_interface.h +++ b/src/python/include/Simplex_tree_interface.h @@ -38,7 +38,6 @@ class Simplex_tree_interface : public Simplex_tree { using Skeleton_simplex_iterator = typename Base::Skeleton_simplex_iterator; using Complex_simplex_iterator = typename Base::Complex_simplex_iterator; using Extended_filtration_data = typename Base::Extended_filtration_data; - using Extended_simplex_type = typename Base::Extended_simplex_type; public: @@ -124,31 +123,34 @@ class Simplex_tree_interface : public Simplex_tree { void compute_extended_filtration() { this->efd = this->extend_filtration(); + this->initialize_filtration(); return; } - std::vector>>> compute_extended_persistence_subdiagrams(const std::vector>>& dgm){ + std::vector>>> compute_extended_persistence_subdiagrams(const std::vector>>& dgm, Filtration_value min_persistence){ std::vector>>> new_dgm(4); for (unsigned int i = 0; i < dgm.size(); i++){ std::pair px = this->decode_extended_filtration(dgm[i].second.first, this->efd); std::pair py = this->decode_extended_filtration(dgm[i].second.second, this->efd); std::pair> pd_point = std::make_pair(dgm[i].first, std::make_pair(px.first, py.first)); - //Ordinary - if (px.second == Base::UP && py.second == Base::UP){ - new_dgm[0].push_back(pd_point); - } - // Relative - else if (px.second == Base::DOWN && py.second == Base::DOWN){ - new_dgm[1].push_back(pd_point); - } - else{ - // Extended+ - if (px.first < py.first){ - new_dgm[2].push_back(pd_point); + if(std::abs(px.first - py.first) > min_persistence){ + //Ordinary + if (px.second == Extended_simplex_type::UP && py.second == Extended_simplex_type::UP){ + new_dgm[0].push_back(pd_point); + } + // Relative + else if (px.second == Extended_simplex_type::DOWN && py.second == Extended_simplex_type::DOWN){ + new_dgm[1].push_back(pd_point); } - //Extended- else{ - new_dgm[3].push_back(pd_point); + // Extended+ + if (px.first < py.first){ + new_dgm[2].push_back(pd_point); + } + //Extended- + else{ + new_dgm[3].push_back(pd_point); + } } } } diff --git a/src/python/test/test_simplex_tree.py b/src/python/test/test_simplex_tree.py index 20f6aabf..70b26e97 100755 --- a/src/python/test/test_simplex_tree.py +++ b/src/python/test/test_simplex_tree.py @@ -291,10 +291,8 @@ def test_extend_filtration(): ([5], 6.0) ] - st.extend_filtration() - st.initialize_filtration() - + assert list(st.get_filtration()) == [ ([6], -3.0), ([0], -2.0), @@ -323,7 +321,7 @@ def test_extend_filtration(): ([0, 3, 6], 2.0) ] - dgms = st.extended_persistence() + dgms = st.extended_persistence(min_persistence=-1.) assert dgms[0][0][1][0] == pytest.approx(2.) assert dgms[0][0][1][1] == pytest.approx(3.) @@ -334,7 +332,6 @@ def test_extend_filtration(): assert dgms[3][0][1][0] == pytest.approx(6.) assert dgms[3][0][1][1] == pytest.approx(1.) - def test_simplices_iterator(): st = SimplexTree() -- cgit v1.2.3 From cb838b2ea4a4db9c54f71103001bdafb90766306 Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Tue, 24 Mar 2020 06:37:00 +0100 Subject: merge https://github.com/mglisse/gudhi-devel/tree/alpha-cache and fix conflicts --- src/Alpha_complex/include/gudhi/Alpha_complex.h | 89 ++++++++++--------------- 1 file changed, 37 insertions(+), 52 deletions(-) (limited to 'src') diff --git a/src/Alpha_complex/include/gudhi/Alpha_complex.h b/src/Alpha_complex/include/gudhi/Alpha_complex.h index 1b5d6997..eb4ef427 100644 --- a/src/Alpha_complex/include/gudhi/Alpha_complex.h +++ b/src/Alpha_complex/include/gudhi/Alpha_complex.h @@ -132,6 +132,8 @@ class Alpha_complex { Delaunay_triangulation* triangulation_; /** \brief Kernel for triangulation_ functions access.*/ Kernel kernel_; + /** \brief Cache for geometric constructions: circumcenter and squared radius of a simplex.*/ + std::vector> cache_; public: /** \brief Alpha_complex constructor from an OFF file name. @@ -246,6 +248,24 @@ class Alpha_complex { } } + template + auto& get_cache(SimplicialComplexForAlpha& cplx, typename SimplicialComplexForAlpha::Simplex_handle s) { + auto k = cplx.key(s); + if(k==cplx.null_key()){ + k = cache_.size(); + cplx.assign_key(s, k); + // Use a transform_range? Check the impact on perf. + thread_local std::vector v; + v.clear(); + for (auto vertex : cplx.simplex_vertex_range(s)) + v.push_back(get_point(vertex)); + Point_d c = kernel_.construct_circumcenter_d_object()(v.cbegin(), v.cend()); + typename Kernel::FT r = kernel_.squared_distance_d_object()(c, v[0]); + cache_.emplace_back(std::move(c), std::move(r)); + } + return cache_[k]; + } + public: /** \brief Inserts all Delaunay triangulation into the simplicial complex. * It also computes the filtration values accordingly to the \ref createcomplexalgorithm if default_filtration_value @@ -324,46 +344,28 @@ class Alpha_complex { if (!default_filtration_value) { // -------------------------------------------------------------------------------------------- - // Will be re-used many times - Vector_of_CGAL_points pointVector; // ### For i : d -> 0 for (int decr_dim = triangulation_->maximal_dimension(); decr_dim >= 0; decr_dim--) { // ### Foreach Sigma of dim i for (Simplex_handle f_simplex : complex.skeleton_simplex_range(decr_dim)) { int f_simplex_dim = complex.dimension(f_simplex); if (decr_dim == f_simplex_dim) { - pointVector.clear(); - #ifdef DEBUG_TRACES - std::clog << "Sigma of dim " << decr_dim << " is"; - #endif // DEBUG_TRACES - for (auto vertex : complex.simplex_vertex_range(f_simplex)) { - pointVector.push_back(get_point(vertex)); - #ifdef DEBUG_TRACES - std::clog << " " << vertex; - #endif // DEBUG_TRACES - } - #ifdef DEBUG_TRACES - std::clog << std::endl; - #endif // DEBUG_TRACES // ### If filt(Sigma) is NaN : filt(Sigma) = alpha(Sigma) if (std::isnan(complex.filtration(f_simplex))) { Filtration_value alpha_complex_filtration = 0.0; // No need to compute squared_radius on a single point - alpha is 0.0 if (f_simplex_dim > 0) { - // squared_radius function initialization - Squared_Radius squared_radius = kernel_.compute_squared_radius_d_object(); - - CGAL::NT_converter cv; - auto sqrad = squared_radius(pointVector.begin(), pointVector.end()); - #if CGAL_VERSION_NR >= 1050000000 + auto const& sqrad = get_cache(complex, f_simplex).second; +#if CGAL_VERSION_NR >= 1050000000 if(exact) CGAL::exact(sqrad); - #endif +#endif + CGAL::NT_converter cv; alpha_complex_filtration = cv(sqrad); } complex.assign_filtration(f_simplex, alpha_complex_filtration); - #ifdef DEBUG_TRACES +#ifdef DEBUG_TRACES std::clog << "filt(Sigma) is NaN : filt(Sigma) =" << complex.filtration(f_simplex) << std::endl; - #endif // DEBUG_TRACES +#endif // DEBUG_TRACES } // No need to propagate further, unweighted points all have value 0 if (decr_dim > 1) @@ -388,9 +390,7 @@ class Alpha_complex { void propagate_alpha_filtration(SimplicialComplexForAlpha& complex, Simplex_handle f_simplex) { // From SimplicialComplexForAlpha type required to assign filtration values. typedef typename SimplicialComplexForAlpha::Filtration_value Filtration_value; -#ifdef DEBUG_TRACES typedef typename SimplicialComplexForAlpha::Vertex_handle Vertex_handle; -#endif // DEBUG_TRACES // ### Foreach Tau face of Sigma for (auto f_boundary : complex.boundary_simplex_range(f_simplex)) { @@ -414,33 +414,18 @@ class Alpha_complex { #endif // DEBUG_TRACES // ### Else } else { - // insert the Tau points in a vector for is_gabriel function - Vector_of_CGAL_points pointVector; -#ifdef DEBUG_TRACES - Vertex_handle vertexForGabriel = Vertex_handle(); -#endif // DEBUG_TRACES - for (auto vertex : complex.simplex_vertex_range(f_boundary)) { - pointVector.push_back(get_point(vertex)); - } - // Retrieve the Sigma point that is not part of Tau - parameter for is_gabriel function - Point_d point_for_gabriel; - for (auto vertex : complex.simplex_vertex_range(f_simplex)) { - point_for_gabriel = get_point(vertex); - if (std::find(pointVector.begin(), pointVector.end(), point_for_gabriel) == pointVector.end()) { -#ifdef DEBUG_TRACES - // vertex is not found in Tau - vertexForGabriel = vertex; -#endif // DEBUG_TRACES - // No need to continue loop - break; - } - } - // is_gabriel function initialization - Is_Gabriel is_gabriel = kernel_.side_of_bounded_sphere_d_object(); - bool is_gab = is_gabriel(pointVector.begin(), pointVector.end(), point_for_gabriel) - != CGAL::ON_BOUNDED_SIDE; + // Find which vertex of f_simplex is missing in f_boundary. We could actually write a variant of boundary_simplex_range that gives pairs (f_boundary, vertex). We rely on the fact that simplex_vertex_range is sorted. + auto longlist = complex.simplex_vertex_range(f_simplex); + auto shortlist = complex.simplex_vertex_range(f_boundary); + auto longiter = std::begin(longlist); + auto shortiter = std::begin(shortlist); + auto enditer = std::end(shortlist); + while(shortiter != enditer && *longiter == *shortiter) { ++longiter; ++shortiter; } + Vertex_handle extra = *longiter; + auto const& cache=get_cache(complex, f_boundary); + bool is_gab = kernel_.squared_distance_d_object()(cache.first, get_point(extra)) >= cache.second; #ifdef DEBUG_TRACES - std::clog << " | Tau is_gabriel(Sigma)=" << is_gab << " - vertexForGabriel=" << vertexForGabriel << std::endl; + std::clog << " | Tau is_gabriel(Sigma)=" << is_gab << " - vertexForGabriel=" << extra << std::endl; #endif // DEBUG_TRACES // ### If Tau is not Gabriel of Sigma if (false == is_gab) { -- cgit v1.2.3 From 20ba972d2a7fd14e564ce4adb3921f3f8190fc71 Mon Sep 17 00:00:00 2001 From: MathieuCarriere Date: Wed, 25 Mar 2020 13:00:58 -0400 Subject: update biblio --- biblio/bibliography.bib | 36 +++++++++++++++++++-------- src/Simplex_tree/include/gudhi/Simplex_tree.h | 4 +-- src/python/gudhi/simplex_tree.pyx | 2 +- 3 files changed, 29 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/biblio/bibliography.bib b/biblio/bibliography.bib index 3bbe7960..b017a07e 100644 --- a/biblio/bibliography.bib +++ b/biblio/bibliography.bib @@ -7,11 +7,13 @@ } @article{Carriere17c, - author = {Carri\`ere, Mathieu and Michel, Bertrand and Oudot, Steve}, - title = {{Statistical Analysis and Parameter Selection for Mapper}}, - journal = {CoRR}, - volume = {abs/1706.00204}, - year = {2017} +author = {Carri{\`{e}}re, Mathieu and Michel, Bertrand and Oudot, Steve}, +journal = {Journal of Machine Learning Research}, +pages = {1--39}, +publisher = {JMLR.org}, +title = {{Statistical analysis and parameter selection for Mapper}}, +volume = {19}, +year = {2018} } @inproceedings{Dey13, @@ -23,11 +25,14 @@ } @article{Carriere16, - title={{Structure and Stability of the 1-Dimensional Mapper}}, - author={Carri\`ere, Mathieu and Oudot, Steve}, - journal={CoRR}, - volume= {abs/1511.05823}, - year={2015} +author = {Carri{\`{e}}re, Mathieu and Oudot, Steve}, +journal = {Foundations of Computational Mathematics}, +number = {6}, +pages = {1333--1396}, +publisher = {Springer-Verlag}, +title = {{Structure and stability of the one-dimensional Mapper}}, +volume = {18}, +year = {2017} } @inproceedings{zigzag_reflection, @@ -36,6 +41,17 @@ year = {2014 $\ \ \ \ \ \ \ \ \ \ \ $ \emph{In Preparation}}, } +@article{Cohen-Steiner2009, +author = {Cohen-Steiner, David and Edelsbrunner, Herbert and Harer, John}, +journal = {Foundations of Computational Mathematics}, +number = {1}, +pages = {79--103}, +publisher = {Springer-Verlag}, +title = {{Extending persistence using Poincar{\'{e}} and Lefschetz duality}}, +volume = {9}, +year = {2009} +} + @misc{gudhi_stpcoh, author = {Cl\'ement Maria}, title = "\textsc{Gudhi}, Simplex Tree and Persistent Cohomology Packages", diff --git a/src/Simplex_tree/include/gudhi/Simplex_tree.h b/src/Simplex_tree/include/gudhi/Simplex_tree.h index de97d6f2..60720567 100644 --- a/src/Simplex_tree/include/gudhi/Simplex_tree.h +++ b/src/Simplex_tree/include/gudhi/Simplex_tree.h @@ -51,7 +51,7 @@ namespace Gudhi { * that the simplex is the cone of an original simplex, and is thus part of the descending extended filtration) or * EXTRA (which means the simplex is the cone point). * - * Details may be found in section 2.2 in https://link.springer.com/article/10.1007/s10208-017-9370-z. + * Details may be found in \cite Cohen-Steiner2009 and section 2.2 in \cite Carriere16. * */ enum class Extended_simplex_type {UP, DOWN, EXTRA}; @@ -1507,7 +1507,7 @@ class Simplex_tree { * in the Simplex_tree. Hence, this function also outputs the type of each simplex. It can be either UP (which means * that the simplex was present originally, and is thus part of the ascending extended filtration), DOWN (which means * that the simplex is the cone of an original simplex, and is thus part of the descending extended filtration) or - * EXTRA (which means the simplex is the cone point). Note that if the simplex type is DOWN, the original filtration value + * EXTRA (which means the simplex is the cone point). See the definition of Extended_simplex_type. Note that if the simplex type is DOWN, the original filtration value * is set to be the original filtration value of the corresponding (not coned) original simplex. * \pre This function should be called only if `extend_filtration()` has been called first! * \post The output filtration value is supposed to be the same, but might be a little different, than the diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index bcb1578d..6bb22171 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -427,7 +427,7 @@ cdef class SimplexTree: 0.0. Sets min_persistence to -1.0 to see all values. :type min_persistence: float. - :returns: A list of four persistence diagrams in the format described in :func:`persistence()`. The first one is Ordinary, the second one is Relative, the third one is Extended+ and the fourth one is Extended-. See section 2.2 in https://link.springer.com/article/10.1007/s10208-017-9370-z for a description of these subtypes. + :returns: A list of four persistence diagrams in the format described in :func:`persistence()`. The first one is Ordinary, the second one is Relative, the third one is Extended+ and the fourth one is Extended-. See and https://link.springer.com/article/10.1007/s10208-008-9027-z and section 2.2 in https://link.springer.com/article/10.1007/s10208-017-9370-z for a description of these subtypes. .. note:: -- cgit v1.2.3 From b2a549c055c2796fe4eb1e4e4265cdd718753416 Mon Sep 17 00:00:00 2001 From: MathieuCarriere Date: Wed, 25 Mar 2020 15:10:35 -0400 Subject: fix biblio --- src/python/gudhi/simplex_tree.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index 6bb22171..cc3753e1 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -427,7 +427,7 @@ cdef class SimplexTree: 0.0. Sets min_persistence to -1.0 to see all values. :type min_persistence: float. - :returns: A list of four persistence diagrams in the format described in :func:`persistence()`. The first one is Ordinary, the second one is Relative, the third one is Extended+ and the fourth one is Extended-. See and https://link.springer.com/article/10.1007/s10208-008-9027-z and section 2.2 in https://link.springer.com/article/10.1007/s10208-017-9370-z for a description of these subtypes. + :returns: A list of four persistence diagrams in the format described in :func:`persistence()`. The first one is Ordinary, the second one is Relative, the third one is Extended+ and the fourth one is Extended-. See https://link.springer.com/article/10.1007/s10208-008-9027-z and/or section 2.2 in https://link.springer.com/article/10.1007/s10208-017-9370-z for a description of these subtypes. .. note:: -- cgit v1.2.3 From c8c942c43643131a7ef9899826a7095e497150fe Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Thu, 26 Mar 2020 22:10:26 +0100 Subject: cmake --- .../modules/GUDHI_third_party_libraries.cmake | 3 + src/python/CMakeLists.txt | 14 ++ src/python/gudhi/point_cloud/dtm.py | 40 +++++ src/python/gudhi/point_cloud/knn.py | 193 +++++++++++++++++++++ src/python/test/test_dtm.py | 32 ++++ 5 files changed, 282 insertions(+) create mode 100644 src/python/gudhi/point_cloud/dtm.py create mode 100644 src/python/gudhi/point_cloud/knn.py create mode 100755 src/python/test/test_dtm.py (limited to 'src') diff --git a/src/cmake/modules/GUDHI_third_party_libraries.cmake b/src/cmake/modules/GUDHI_third_party_libraries.cmake index 2d010483..c2039674 100644 --- a/src/cmake/modules/GUDHI_third_party_libraries.cmake +++ b/src/cmake/modules/GUDHI_third_party_libraries.cmake @@ -160,6 +160,9 @@ if( PYTHONINTERP_FOUND ) find_python_module("sklearn") find_python_module("ot") find_python_module("pybind11") + find_python_module("torch") + find_python_module("hnswlib") + find_python_module("pykeops") endif() if(NOT GUDHI_PYTHON_PATH) diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index f00966a5..d26d3e6e 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -78,6 +78,15 @@ if(PYTHONINTERP_FOUND) if(OT_FOUND) add_gudhi_debug_info("POT version ${OT_VERSION}") endif() + if(HNSWLIB_FOUND) + add_gudhi_debug_info("HNSWlib version ${OT_VERSION}") + endif() + if(TORCH_FOUND) + add_gudhi_debug_info("PyTorch version ${OT_VERSION}") + endif() + if(PYKEOPS_FOUND) + add_gudhi_debug_info("PyKeOps version ${OT_VERSION}") + endif() set(GUDHI_PYTHON_EXTRA_COMPILE_ARGS "${GUDHI_PYTHON_EXTRA_COMPILE_ARGS}'-DBOOST_RESULT_OF_USE_DECLTYPE', ") set(GUDHI_PYTHON_EXTRA_COMPILE_ARGS "${GUDHI_PYTHON_EXTRA_COMPILE_ARGS}'-DBOOST_ALL_NO_LIB', ") @@ -399,6 +408,11 @@ if(PYTHONINTERP_FOUND) # Time Delay add_gudhi_py_test(test_time_delay) + # DTM + if(SCIPY_FOUND AND SKLEARN_FOUND AND TORCH_FOUND AND HNSWLIB_FOUND AND PYKEOPS_FOUND) + add_gudhi_py_test(test_dtm) + endif() + # Documentation generation is available through sphinx - requires all modules if(SPHINX_PATH) if(MATPLOTLIB_FOUND) diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py new file mode 100644 index 00000000..08f9ea60 --- /dev/null +++ b/src/python/gudhi/point_cloud/dtm.py @@ -0,0 +1,40 @@ +from .knn import KNN + + +class DTM: + def __init__(self, k, q=2, **kwargs): + """ + Args: + q (float): order used to compute the distance to measure. Defaults to the dimension, or 2 if input_type is 'distance_matrix'. + kwargs: Same parameters as KNN, except that metric="neighbors" means that transform() expects an array with the distances to the k nearest neighbors. + """ + self.k = k + self.q = q + self.params = kwargs + + def fit_transform(self, X, y=None): + return self.fit(X).transform(X) + + def fit(self, X, y=None): + """ + Args: + X (numpy.array): coordinates for mass points + """ + if self.params.setdefault("metric", "euclidean") != "neighbors": + self.knn = KNN(self.k, return_index=False, return_distance=True, **self.params) + self.knn.fit(X) + return self + + def transform(self, X): + """ + Args: + X (numpy.array): coordinates for query points, or distance matrix if metric is "precomputed", or distances to the k nearest neighbors if metric is "neighbors" (if the array has more than k columns, the remaining ones are ignored). + """ + if self.params["metric"] == "neighbors": + distances = X[:, : self.k] + else: + distances = self.knn.transform(X) + distances = distances ** self.q + dtm = distances.sum(-1) / self.k + dtm = dtm ** (1.0 / self.q) + return dtm diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py new file mode 100644 index 00000000..57078f1e --- /dev/null +++ b/src/python/gudhi/point_cloud/knn.py @@ -0,0 +1,193 @@ +import numpy + + +class KNN: + def __init__(self, k, return_index=True, return_distance=False, metric="euclidean", **kwargs): + """ + Args: + k (int): number of neighbors (including the point itself). + return_index (bool): if True, return the index of each neighbor. + return_distance (bool): if True, return the distance to each neighbor. + implementation (str): Choice of the library that does the real work. + + * 'keops' for a brute-force, CUDA implementation through pykeops. Useful when the dimension becomes + large (10+) but the number of points remains low (less than a million). + Only "minkowski" and its aliases are supported. + * 'ckdtree' for scipy's cKDTree. Only "minkowski" and its aliases are supported. + * 'sklearn' for scikit-learn's NearestNeighbors. + Note that this provides in particular an option algorithm="brute". + * 'hnsw' for hnswlib.Index. It is very fast but does not provide guarantees. + Only supports "euclidean" for now. + * None will try to select a sensible one (scipy if possible, scikit-learn otherwise). + metric (str): see `sklearn.neighbors.NearestNeighbors`. + eps (float): relative error when computing nearest neighbors with the cKDTree. + p (float): norm L^p on input points (including numpy.inf) if metric is "minkowski". Defaults to 2. + n_jobs (int): Number of jobs to schedule for parallel processing of nearest neighbors on the CPU. + If -1 is given all processors are used. Default: 1. + + Additional parameters are forwarded to the backends. + """ + self.k = k + self.return_index = return_index + self.return_distance = return_distance + self.metric = metric + self.params = kwargs + # canonicalize + if metric == "euclidean": + self.params["p"] = 2 + self.metric = "minkowski" + elif metric == "manhattan": + self.params["p"] = 1 + self.metric = "minkowski" + elif metric == "chebyshev": + self.params["p"] = numpy.inf + self.metric = "minkowski" + elif metric == "minkowski": + self.params["p"] = kwargs.get("p", 2) + if self.params.get("implementation") in {"keops", "ckdtree"}: + assert self.metric == "minkowski" + if self.params.get("implementation") == "hnsw": + assert self.metric == "minkowski" and self.params["p"] == 2 + if not self.params.get("implementation"): + if self.metric == "minkowski": + self.params["implementation"] = "ckdtree" + else: + self.params["implementation"] = "sklearn" + + def fit_transform(self, X, y=None): + return self.fit(X).transform(X) + + def fit(self, X, y=None): + """ + Args: + X (numpy.array): coordinates for reference points + """ + self.ref_points = X + if self.params.get("implementation") == "ckdtree": + # sklearn could handle this, but it is much slower + from scipy.spatial import cKDTree + self.kdtree = cKDTree(X) + + if self.params.get("implementation") == "sklearn" and self.metric != "precomputed": + # FIXME: sklearn badly handles "precomputed" + from sklearn.neighbors import NearestNeighbors + + nargs = {k: v for k, v in self.params.items() if k in {"p", "n_jobs", "metric_params", "algorithm", "leaf_size"}} + self.nn = NearestNeighbors(self.k, metric=self.metric, **nargs) + self.nn.fit(X) + + if self.params.get("implementation") == "hnsw": + import hnswlib + self.graph = hnswlib.Index("l2", len(X[0])) # Actually returns squared distances + self.graph.init_index(len(X), **{k:v for k,v in self.params.items() if k in {"ef_construction", "M", "random_seed"}}) + n = self.params.get("num_threads") + if n is None: + n = self.params.get("n_jobs", 1) + self.params["num_threads"] = n + self.graph.add_items(X, num_threads=n) + + return self + + def transform(self, X): + """ + Args: + X (numpy.array): coordinates for query points, or distance matrix if metric is "precomputed" + """ + metric = self.metric + k = self.k + + if metric == "precomputed": + # scikit-learn could handle that, but they insist on calling fit() with an unused square array, which is too unnatural. + X = numpy.array(X) + if self.return_index: + neighbors = numpy.argpartition(X, k - 1)[:, 0:k] + distances = numpy.take_along_axis(X, neighbors, axis=-1) + ngb_order = numpy.argsort(distances, axis=-1) + neighbors = numpy.take_along_axis(neighbors, ngb_order, axis=-1) + if self.return_distance: + distances = numpy.take_along_axis(distances, ngb_order, axis=-1) + return neighbors, distances + else: + return neighbors + if self.return_distance: + distances = numpy.partition(X, k - 1)[:, 0:k] + # partition is not guaranteed to sort the lower half, although it often does + distances.sort(axis=-1) + return distances + return None + + if self.params.get("implementation") == "hnsw": + ef = self.params.get("ef") + if ef is not None: + self.graph.set_ef(ef) + neighbors, distances = self.graph.knn_query(X, k, num_threads=self.params["num_threads"]) + # The k nearest neighbors are always sorted. I couldn't find it in the doc, but the code calls searchKnn, + # which returns a priority_queue, and then fills the return array backwards with top/pop on the queue. + if self.return_index: + if self.return_distance: + return neighbors, numpy.sqrt(distances) + else: + return neighbors + if self.return_distance: + return numpy.sqrt(distances) + return None + + if self.params.get("implementation") == "keops": + import torch + from pykeops.torch import LazyTensor + + # 'float64' is slow except on super expensive GPUs. Allow it with some param? + XX = torch.tensor(X, dtype=torch.float32) + if X is self.ref_points: + YY = XX + else: + YY = torch.tensor(self.ref_points, dtype=torch.float32) + + p = self.params["p"] + if p == numpy.inf: + # Requires a version of pykeops strictly more recent than 1.3 + mat = (LazyTensor(XX[:, None, :]) - LazyTensor(YY[None, :, :])).abs().max(-1) + elif p == 2: # Any even integer? + mat = ((LazyTensor(XX[:, None, :]) - LazyTensor(YY[None, :, :])) ** p).sum(-1) + else: + mat = ((LazyTensor(XX[:, None, :]) - LazyTensor(YY[None, :, :])).abs() ** p).sum(-1) + + if self.return_index: + if self.return_distance: + distances, neighbors = mat.Kmin_argKmin(k, dim=1) + if p != numpy.inf: + distances = distances ** (1.0 / p) + return neighbors, distances + else: + neighbors = mat.argKmin(k, dim=1) + return neighbors + if self.return_distance: + distances = mat.Kmin(k, dim=1) + if p != numpy.inf: + distances = distances ** (1.0 / p) + return distances + return None + # FIXME: convert everything back to numpy arrays or not? + + if hasattr(self, "kdtree"): + qargs = {key: val for key, val in self.params.items() if key in {"p", "eps", "n_jobs"}} + distances, neighbors = self.kdtree.query(X, k=self.k, **qargs) + if self.return_index: + if self.return_distance: + return neighbors, distances + else: + return neighbors + if self.return_distance: + return distances + return None + + if self.return_distance: + distances, neighbors = self.nn.kneighbors(X, return_distance=True) + if self.return_index: + return neighbors, distances + else: + return distances + if self.return_index: + neighbors = self.nn.kneighbors(X, return_distance=False) + return neighbors + return None diff --git a/src/python/test/test_dtm.py b/src/python/test/test_dtm.py new file mode 100755 index 00000000..57fdd131 --- /dev/null +++ b/src/python/test/test_dtm.py @@ -0,0 +1,32 @@ +""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. + See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. + Author(s): Marc Glisse + + Copyright (C) 2020 Inria + + Modification(s): + - YYYY/MM Author: Description of the modification +""" + +from gudhi.point_cloud.dtm import DTM +import numpy + + +def test_dtm_euclidean(): + pts = numpy.random.rand(1000,4) + k = 3 + dtm = DTM(k,implementation="ckdtree") + print(dtm.fit_transform(pts)) + dtm = DTM(k,implementation="sklearn") + print(dtm.fit_transform(pts)) + dtm = DTM(k,implementation="sklearn",algorithm="brute") + print(dtm.fit_transform(pts)) + dtm = DTM(k,implementation="hnsw") + print(dtm.fit_transform(pts)) + from scipy.spatial.distance import cdist + d = cdist(pts,pts) + dtm = DTM(k,metric="precomputed") + print(dtm.fit_transform(d)) + dtm = DTM(k,implementation="keops") + print(dtm.fit_transform(pts)) + -- cgit v1.2.3 From 5c4c398b99fe1b157d64cd43a4977ce1504ca795 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Thu, 26 Mar 2020 22:25:28 +0100 Subject: HNSWlib doesn't define __version__ --- src/cmake/modules/GUDHI_third_party_libraries.cmake | 21 ++++++++++++++++++++- src/python/CMakeLists.txt | 7 ++++--- 2 files changed, 24 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/cmake/modules/GUDHI_third_party_libraries.cmake b/src/cmake/modules/GUDHI_third_party_libraries.cmake index c2039674..a931b3a1 100644 --- a/src/cmake/modules/GUDHI_third_party_libraries.cmake +++ b/src/cmake/modules/GUDHI_third_party_libraries.cmake @@ -150,6 +150,25 @@ function( find_python_module PYTHON_MODULE_NAME ) endif() endfunction( find_python_module ) +# For modules that do not define module.__version__ +function( find_python_module_no_version PYTHON_MODULE_NAME ) + string(TOUPPER ${PYTHON_MODULE_NAME} PYTHON_MODULE_NAME_UP) + execute_process( + COMMAND ${PYTHON_EXECUTABLE} -c "import ${PYTHON_MODULE_NAME}" + RESULT_VARIABLE PYTHON_MODULE_RESULT + ERROR_VARIABLE PYTHON_MODULE_ERROR) + if(PYTHON_MODULE_RESULT EQUAL 0) + # Remove carriage return + message ("++ Python module ${PYTHON_MODULE_NAME} found") + set(${PYTHON_MODULE_NAME_UP}_FOUND TRUE PARENT_SCOPE) + else() + message ("PYTHON_MODULE_NAME = ${PYTHON_MODULE_NAME} + - PYTHON_MODULE_RESULT = ${PYTHON_MODULE_RESULT} + - PYTHON_MODULE_ERROR = ${PYTHON_MODULE_ERROR}") + set(${PYTHON_MODULE_NAME_UP}_FOUND FALSE PARENT_SCOPE) + endif() +endfunction( find_python_module_no_version ) + if( PYTHONINTERP_FOUND ) find_python_module("cython") find_python_module("pytest") @@ -161,8 +180,8 @@ if( PYTHONINTERP_FOUND ) find_python_module("ot") find_python_module("pybind11") find_python_module("torch") - find_python_module("hnswlib") find_python_module("pykeops") + find_python_module_no_version("hnswlib") endif() if(NOT GUDHI_PYTHON_PATH) diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index d26d3e6e..ec0ab1ca 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -79,13 +79,14 @@ if(PYTHONINTERP_FOUND) add_gudhi_debug_info("POT version ${OT_VERSION}") endif() if(HNSWLIB_FOUND) - add_gudhi_debug_info("HNSWlib version ${OT_VERSION}") + # Does not have a version number... + add_gudhi_debug_info("HNSWlib found") endif() if(TORCH_FOUND) - add_gudhi_debug_info("PyTorch version ${OT_VERSION}") + add_gudhi_debug_info("PyTorch version ${TORCH_VERSION}") endif() if(PYKEOPS_FOUND) - add_gudhi_debug_info("PyKeOps version ${OT_VERSION}") + add_gudhi_debug_info("PyKeOps version ${PYKEOPS_VERSION}") endif() set(GUDHI_PYTHON_EXTRA_COMPILE_ARGS "${GUDHI_PYTHON_EXTRA_COMPILE_ARGS}'-DBOOST_RESULT_OF_USE_DECLTYPE', ") -- cgit v1.2.3 From 7ddad8220fdd34fd3ed91e16882feaa3961b2d67 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Thu, 26 Mar 2020 22:59:20 +0100 Subject: license --- src/python/gudhi/point_cloud/dtm.py | 9 +++++++++ src/python/gudhi/point_cloud/knn.py | 9 +++++++++ 2 files changed, 18 insertions(+) (limited to 'src') diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py index 08f9ea60..839e7452 100644 --- a/src/python/gudhi/point_cloud/dtm.py +++ b/src/python/gudhi/point_cloud/dtm.py @@ -1,3 +1,12 @@ +# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. +# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. +# Author(s): Marc Glisse +# +# Copyright (C) 2020 Inria +# +# Modification(s): +# - YYYY/MM Author: Description of the modification + from .knn import KNN diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index 57078f1e..943d4e9f 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -1,3 +1,12 @@ +# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. +# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. +# Author(s): Marc Glisse +# +# Copyright (C) 2020 Inria +# +# Modification(s): +# - YYYY/MM Author: Description of the modification + import numpy -- cgit v1.2.3 From 7120b186471828a9570fdeef37900bd8b98d0d31 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Thu, 26 Mar 2020 23:06:06 +0100 Subject: license --- src/python/doc/point_cloud_sum.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/python/doc/point_cloud_sum.inc b/src/python/doc/point_cloud_sum.inc index 0a159680..ecc18951 100644 --- a/src/python/doc/point_cloud_sum.inc +++ b/src/python/doc/point_cloud_sum.inc @@ -6,7 +6,7 @@ | | :math:`(y_1, y_2, \ldots, y_d)` | | | | | | :Since: GUDHI 2.0.0 | | | | | - | | | :License: MIT (`GPL v3 `_) | + | | | :License: MIT (`GPL v3 `_, BSD-3-Clause, Apache-2.0) | | | Parts of this package require CGAL. | | | | | :Requires: `Eigen `__ :math:`\geq` 3.1.0 and `CGAL `__ :math:`\geq` 4.11.0 | | | | | -- cgit v1.2.3 From af35ea5b4ce631ae826f1db1940798f254aba658 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Thu, 26 Mar 2020 23:39:59 +0100 Subject: clean-up use of "implementation" --- src/python/gudhi/point_cloud/knn.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index 943d4e9f..a4ea3acd 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -72,12 +72,12 @@ class KNN: X (numpy.array): coordinates for reference points """ self.ref_points = X - if self.params.get("implementation") == "ckdtree": + if self.params["implementation"] == "ckdtree": # sklearn could handle this, but it is much slower from scipy.spatial import cKDTree self.kdtree = cKDTree(X) - if self.params.get("implementation") == "sklearn" and self.metric != "precomputed": + if self.params["implementation"] == "sklearn" and self.metric != "precomputed": # FIXME: sklearn badly handles "precomputed" from sklearn.neighbors import NearestNeighbors @@ -85,7 +85,7 @@ class KNN: self.nn = NearestNeighbors(self.k, metric=self.metric, **nargs) self.nn.fit(X) - if self.params.get("implementation") == "hnsw": + if self.params["implementation"] == "hnsw": import hnswlib self.graph = hnswlib.Index("l2", len(X[0])) # Actually returns squared distances self.graph.init_index(len(X), **{k:v for k,v in self.params.items() if k in {"ef_construction", "M", "random_seed"}}) @@ -125,7 +125,7 @@ class KNN: return distances return None - if self.params.get("implementation") == "hnsw": + if self.params["implementation"] == "hnsw": ef = self.params.get("ef") if ef is not None: self.graph.set_ef(ef) @@ -141,7 +141,7 @@ class KNN: return numpy.sqrt(distances) return None - if self.params.get("implementation") == "keops": + if self.params["implementation"] == "keops": import torch from pykeops.torch import LazyTensor @@ -178,7 +178,7 @@ class KNN: return None # FIXME: convert everything back to numpy arrays or not? - if hasattr(self, "kdtree"): + if self.params["implementation"] == "ckdtree": qargs = {key: val for key, val in self.params.items() if key in {"p", "eps", "n_jobs"}} distances, neighbors = self.kdtree.query(X, k=self.k, **qargs) if self.return_index: @@ -190,6 +190,7 @@ class KNN: return distances return None + assert self.params["implementation"] == "sklearn" if self.return_distance: distances, neighbors = self.nn.kneighbors(X, return_distance=True) if self.return_index: -- cgit v1.2.3 From f74c71ca8e474ff927cae029ea63329d30293582 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Fri, 27 Mar 2020 13:43:58 +0100 Subject: Improve coverage --- src/python/gudhi/point_cloud/dtm.py | 2 ++ src/python/test/test_dtm.py | 48 +++++++++++++++++++++++++------------ 2 files changed, 35 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py index 839e7452..541b74a6 100644 --- a/src/python/gudhi/point_cloud/dtm.py +++ b/src/python/gudhi/point_cloud/dtm.py @@ -30,6 +30,8 @@ class DTM: X (numpy.array): coordinates for mass points """ if self.params.setdefault("metric", "euclidean") != "neighbors": + # KNN gives sorted distances, which is unnecessary here. + # Maybe add a parameter to say we don't need sorting? self.knn = KNN(self.k, return_index=False, return_distance=True, **self.params) self.knn.fit(X) return self diff --git a/src/python/test/test_dtm.py b/src/python/test/test_dtm.py index 57fdd131..841f8c3c 100755 --- a/src/python/test/test_dtm.py +++ b/src/python/test/test_dtm.py @@ -10,23 +10,41 @@ from gudhi.point_cloud.dtm import DTM import numpy +import pytest -def test_dtm_euclidean(): - pts = numpy.random.rand(1000,4) +def test_dtm_compare_euclidean(): + pts = numpy.random.rand(1000, 4) k = 3 - dtm = DTM(k,implementation="ckdtree") - print(dtm.fit_transform(pts)) - dtm = DTM(k,implementation="sklearn") - print(dtm.fit_transform(pts)) - dtm = DTM(k,implementation="sklearn",algorithm="brute") - print(dtm.fit_transform(pts)) - dtm = DTM(k,implementation="hnsw") - print(dtm.fit_transform(pts)) + dtm = DTM(k, implementation="ckdtree") + r0 = dtm.fit_transform(pts) + dtm = DTM(k, implementation="sklearn") + r1 = dtm.fit_transform(pts) + assert r1 == pytest.approx(r0) + dtm = DTM(k, implementation="sklearn", algorithm="brute") + r2 = dtm.fit_transform(pts) + assert r2 == pytest.approx(r0) + dtm = DTM(k, implementation="hnsw") + r3 = dtm.fit_transform(pts) + assert r3 == pytest.approx(r0) from scipy.spatial.distance import cdist - d = cdist(pts,pts) - dtm = DTM(k,metric="precomputed") - print(dtm.fit_transform(d)) - dtm = DTM(k,implementation="keops") - print(dtm.fit_transform(pts)) + d = cdist(pts, pts) + dtm = DTM(k, metric="precomputed") + r4 = dtm.fit_transform(d) + assert r4 == pytest.approx(r0) + dtm = DTM(k, implementation="keops") + r5 = dtm.fit_transform(pts) + assert r5 == pytest.approx(r0) + + +def test_dtm_precomputed(): + dist = numpy.array([[1.0, 3, 8], [1, 5, 5], [0, 2, 3]]) + dtm = DTM(2, q=1, metric="neighbors") + r = dtm.fit_transform(dist) + assert r == pytest.approx([2.0, 3, 1]) + + dist = numpy.array([[2.0, 2], [0, 1], [3, 4]]) + dtm = DTM(2, q=2, metric="neighbors") + r = dtm.fit_transform(dist) + assert r == pytest.approx([2.0, .707, 3.5355], rel=.01) -- cgit v1.2.3 From 03376ffe0f6060864ee8908893297f8800b7b8d1 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Fri, 27 Mar 2020 20:27:10 +0100 Subject: doc --- src/python/doc/point_cloud.rst | 17 +++++++++++++++-- src/python/gudhi/point_cloud/dtm.py | 6 +++++- src/python/gudhi/point_cloud/knn.py | 31 ++++++++++++++++++------------- src/python/test/test_dtm.py | 2 +- 4 files changed, 39 insertions(+), 17 deletions(-) (limited to 'src') diff --git a/src/python/doc/point_cloud.rst b/src/python/doc/point_cloud.rst index c0d4b303..351b0786 100644 --- a/src/python/doc/point_cloud.rst +++ b/src/python/doc/point_cloud.rst @@ -21,10 +21,23 @@ Subsampling :special-members: :show-inheritance: -TimeDelayEmbedding ------------------- +Time Delay Embedding +-------------------- .. autoclass:: gudhi.point_cloud.timedelay.TimeDelayEmbedding :members: :special-members: __call__ +Nearest neighbors +----------------- + +.. automodule:: gudhi.point_cloud.knn + :members: + :special-members: __init__ + +Distance to measure +------------------- + +.. automodule:: gudhi.point_cloud.dtm + :members: + :special-members: __init__ diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py index 541b74a6..e4096c5e 100644 --- a/src/python/gudhi/point_cloud/dtm.py +++ b/src/python/gudhi/point_cloud/dtm.py @@ -11,11 +11,15 @@ from .knn import KNN class DTM: + """ + Class to compute the distance to the empirical measure defined by a point set. + """ + def __init__(self, k, q=2, **kwargs): """ Args: q (float): order used to compute the distance to measure. Defaults to the dimension, or 2 if input_type is 'distance_matrix'. - kwargs: Same parameters as KNN, except that metric="neighbors" means that transform() expects an array with the distances to the k nearest neighbors. + kwargs: Same parameters as :class:`~gudhi.point_cloud.knn.KNN`, except that metric="neighbors" means that :func:`transform` expects an array with the distances to the k nearest neighbors. """ self.k = k self.q = q diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index a4ea3acd..02448530 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -11,6 +11,10 @@ import numpy class KNN: + """ + Class wrapping several implementations for computing the k nearest neighbors in a point set. + """ + def __init__(self, k, return_index=True, return_distance=False, metric="euclidean", **kwargs): """ Args: @@ -19,22 +23,17 @@ class KNN: return_distance (bool): if True, return the distance to each neighbor. implementation (str): Choice of the library that does the real work. - * 'keops' for a brute-force, CUDA implementation through pykeops. Useful when the dimension becomes - large (10+) but the number of points remains low (less than a million). - Only "minkowski" and its aliases are supported. + * 'keops' for a brute-force, CUDA implementation through pykeops. Useful when the dimension becomes large (10+) but the number of points remains low (less than a million). Only "minkowski" and its aliases are supported. * 'ckdtree' for scipy's cKDTree. Only "minkowski" and its aliases are supported. - * 'sklearn' for scikit-learn's NearestNeighbors. - Note that this provides in particular an option algorithm="brute". - * 'hnsw' for hnswlib.Index. It is very fast but does not provide guarantees. - Only supports "euclidean" for now. + * 'sklearn' for scikit-learn's NearestNeighbors. Note that this provides in particular an option algorithm="brute". + * 'hnsw' for hnswlib.Index. It can be very fast but does not provide guarantees. Only supports "euclidean" for now. * None will try to select a sensible one (scipy if possible, scikit-learn otherwise). metric (str): see `sklearn.neighbors.NearestNeighbors`. eps (float): relative error when computing nearest neighbors with the cKDTree. p (float): norm L^p on input points (including numpy.inf) if metric is "minkowski". Defaults to 2. n_jobs (int): Number of jobs to schedule for parallel processing of nearest neighbors on the CPU. If -1 is given all processors are used. Default: 1. - - Additional parameters are forwarded to the backends. + kwargs: additional parameters are forwarded to the backends. """ self.k = k self.return_index = return_index @@ -75,20 +74,26 @@ class KNN: if self.params["implementation"] == "ckdtree": # sklearn could handle this, but it is much slower from scipy.spatial import cKDTree + self.kdtree = cKDTree(X) if self.params["implementation"] == "sklearn" and self.metric != "precomputed": # FIXME: sklearn badly handles "precomputed" from sklearn.neighbors import NearestNeighbors - nargs = {k: v for k, v in self.params.items() if k in {"p", "n_jobs", "metric_params", "algorithm", "leaf_size"}} + nargs = { + k: v for k, v in self.params.items() if k in {"p", "n_jobs", "metric_params", "algorithm", "leaf_size"} + } self.nn = NearestNeighbors(self.k, metric=self.metric, **nargs) self.nn.fit(X) if self.params["implementation"] == "hnsw": import hnswlib - self.graph = hnswlib.Index("l2", len(X[0])) # Actually returns squared distances - self.graph.init_index(len(X), **{k:v for k,v in self.params.items() if k in {"ef_construction", "M", "random_seed"}}) + + self.graph = hnswlib.Index("l2", len(X[0])) # Actually returns squared distances + self.graph.init_index( + len(X), **{k: v for k, v in self.params.items() if k in {"ef_construction", "M", "random_seed"}} + ) n = self.params.get("num_threads") if n is None: n = self.params.get("n_jobs", 1) @@ -154,7 +159,7 @@ class KNN: p = self.params["p"] if p == numpy.inf: - # Requires a version of pykeops strictly more recent than 1.3 + # Requires pykeops 1.4 or later mat = (LazyTensor(XX[:, None, :]) - LazyTensor(YY[None, :, :])).abs().max(-1) elif p == 2: # Any even integer? mat = ((LazyTensor(XX[:, None, :]) - LazyTensor(YY[None, :, :])) ** p).sum(-1) diff --git a/src/python/test/test_dtm.py b/src/python/test/test_dtm.py index 841f8c3c..93b13e1a 100755 --- a/src/python/test/test_dtm.py +++ b/src/python/test/test_dtm.py @@ -47,4 +47,4 @@ def test_dtm_precomputed(): dist = numpy.array([[2.0, 2], [0, 1], [3, 4]]) dtm = DTM(2, q=2, metric="neighbors") r = dtm.fit_transform(dist) - assert r == pytest.approx([2.0, .707, 3.5355], rel=.01) + assert r == pytest.approx([2.0, 0.707, 3.5355], rel=0.01) -- cgit v1.2.3 From 68839b95e7751afd04155cd2565cc53362f01fa2 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sat, 28 Mar 2020 10:41:50 +0100 Subject: Missing test --- src/python/CMakeLists.txt | 1 + src/python/test/test_knn.py | 82 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100755 src/python/test/test_knn.py (limited to 'src') diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index ec0ab1ca..d7a6a4db 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -411,6 +411,7 @@ if(PYTHONINTERP_FOUND) # DTM if(SCIPY_FOUND AND SKLEARN_FOUND AND TORCH_FOUND AND HNSWLIB_FOUND AND PYKEOPS_FOUND) + add_gudhi_py_test(test_knn) add_gudhi_py_test(test_dtm) endif() diff --git a/src/python/test/test_knn.py b/src/python/test/test_knn.py new file mode 100755 index 00000000..e455fb48 --- /dev/null +++ b/src/python/test/test_knn.py @@ -0,0 +1,82 @@ +""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. + See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. + Author(s): Marc Glisse + + Copyright (C) 2020 Inria + + Modification(s): + - YYYY/MM Author: Description of the modification +""" + +from gudhi.point_cloud.knn import KNN +import numpy as np +import pytest + + +def test_knn_explicit(): + base = np.array([[1.0, 1], [1, 2], [4, 2], [4, 3]]) + query = np.array([[1.0, 1], [2, 2], [4, 4]]) + knn = KNN(2, metric="manhattan", return_distance=True, return_index=True) + knn.fit(base) + r = knn.transform(query) + assert r[0] == pytest.approx(np.array([[0, 1], [1, 0], [3, 2]])) + assert r[1] == pytest.approx(np.array([[0.0, 1], [1, 2], [1, 2]])) + + knn = KNN(2, metric="chebyshev", return_distance=True, return_index=False) + knn.fit(base) + r = knn.transform(query) + assert r == pytest.approx(np.array([[0.0, 1], [1, 1], [1, 2]])) + r = ( + KNN(2, metric="chebyshev", return_distance=True, return_index=False, implementation="keops") + .fit(base) + .transform(query) + ) + assert r == pytest.approx(np.array([[0.0, 1], [1, 1], [1, 2]])) + + knn = KNN(2, metric="minkowski", p=3, return_distance=False, return_index=True) + knn.fit(base) + r = knn.transform(query) + assert np.array_equal(r, [[0, 1], [1, 0], [3, 2]]) + r = ( + KNN(2, metric="minkowski", p=3, return_distance=False, return_index=True, implementation="keops") + .fit(base) + .transform(query) + ) + assert np.array_equal(r, [[0, 1], [1, 0], [3, 2]]) + + dist = np.array([[0.0, 3, 8], [1, 0, 5], [1, 2, 0]]) + knn = KNN(2, metric="precomputed", return_index=True, return_distance=False) + r = knn.fit_transform(dist) + assert np.array_equal(r, [[0, 1], [1, 0], [2, 0]]) + knn = KNN(2, metric="precomputed", return_index=True, return_distance=True) + r = knn.fit_transform(dist) + assert np.array_equal(r[0], [[0, 1], [1, 0], [2, 0]]) + assert np.array_equal(r[1], [[0, 3], [0, 1], [0, 1]]) + + +def test_knn_compare(): + base = np.array([[1.0, 1], [1, 2], [4, 2], [4, 3]]) + query = np.array([[1.0, 1], [2, 2], [4, 4]]) + r0 = KNN(2, implementation="ckdtree", return_index=True, return_distance=False).fit(base).transform(query) + r1 = KNN(2, implementation="sklearn", return_index=True, return_distance=False).fit(base).transform(query) + r2 = KNN(2, implementation="hnsw", return_index=True, return_distance=False).fit(base).transform(query) + r3 = KNN(2, implementation="keops", return_index=True, return_distance=False).fit(base).transform(query) + assert np.array_equal(r0, r1) and np.array_equal(r0, r2) and np.array_equal(r0, r3) + + r0 = KNN(2, implementation="ckdtree", return_index=True, return_distance=True).fit(base).transform(query) + r1 = KNN(2, implementation="sklearn", return_index=True, return_distance=True).fit(base).transform(query) + r2 = KNN(2, implementation="hnsw", return_index=True, return_distance=True).fit(base).transform(query) + r3 = KNN(2, implementation="keops", return_index=True, return_distance=True).fit(base).transform(query) + assert np.array_equal(r0[0], r1[0]) and np.array_equal(r0[0], r2[0]) and np.array_equal(r0[0], r3[0]) + d0 = pytest.approx(r0[1]) + assert r1[1] == d0 and r2[1] == d0 and r3[1] == d0 + + +def test_knn_nop(): + # This doesn't look super useful... + p = np.array([[0.0]]) + assert None is KNN(k=1, return_index=False, return_distance=False, implementation="sklearn").fit_transform(p) + assert None is KNN(k=1, return_index=False, return_distance=False, implementation="ckdtree").fit_transform(p) + assert None is KNN(k=1, return_index=False, return_distance=False, implementation="hnsw", ef=5).fit_transform(p) + assert None is KNN(k=1, return_index=False, return_distance=False, implementation="keops").fit_transform(p) + assert None is KNN(k=1, return_index=False, return_distance=False, metric="precomputed").fit_transform(p) -- cgit v1.2.3 From 35a12b553c85af8ce31629b90a27a7071b0cc379 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sat, 28 Mar 2020 11:48:43 +0100 Subject: Doc tweaks, default DTM exponent --- src/python/doc/point_cloud.rst | 6 ++++-- src/python/doc/point_cloud_sum.inc | 4 ++-- src/python/gudhi/point_cloud/dtm.py | 17 ++++++++++++----- src/python/gudhi/point_cloud/knn.py | 6 +++--- 4 files changed, 21 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/python/doc/point_cloud.rst b/src/python/doc/point_cloud.rst index 351b0786..192f70db 100644 --- a/src/python/doc/point_cloud.rst +++ b/src/python/doc/point_cloud.rst @@ -28,11 +28,12 @@ Time Delay Embedding :members: :special-members: __call__ -Nearest neighbors ------------------ +K nearest neighbors +------------------- .. automodule:: gudhi.point_cloud.knn :members: + :undoc-members: :special-members: __init__ Distance to measure @@ -40,4 +41,5 @@ Distance to measure .. automodule:: gudhi.point_cloud.dtm :members: + :undoc-members: :special-members: __init__ diff --git a/src/python/doc/point_cloud_sum.inc b/src/python/doc/point_cloud_sum.inc index ecc18951..d4761aba 100644 --- a/src/python/doc/point_cloud_sum.inc +++ b/src/python/doc/point_cloud_sum.inc @@ -2,8 +2,8 @@ :widths: 30 40 30 +----------------------------------------------------------------+------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------+ - | | :math:`(x_1, x_2, \ldots, x_d)` | Utilities to process point clouds: read from file, subsample, etc. | :Author: Vincent Rouvreau | - | | :math:`(y_1, y_2, \ldots, y_d)` | | | + | | :math:`(x_1, x_2, \ldots, x_d)` | Utilities to process point clouds: read from file, subsample, | :Authors: Vincent Rouvreau, Marc Glisse, Masatoshi Takenouchi | + | | :math:`(y_1, y_2, \ldots, y_d)` | find neighbors, embed time series in higher dimension, etc. | | | | | :Since: GUDHI 2.0.0 | | | | | | | | :License: MIT (`GPL v3 `_, BSD-3-Clause, Apache-2.0) | diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py index e4096c5e..520cbea8 100644 --- a/src/python/gudhi/point_cloud/dtm.py +++ b/src/python/gudhi/point_cloud/dtm.py @@ -15,10 +15,11 @@ class DTM: Class to compute the distance to the empirical measure defined by a point set. """ - def __init__(self, k, q=2, **kwargs): + def __init__(self, k, q=None, **kwargs): """ Args: - q (float): order used to compute the distance to measure. Defaults to the dimension, or 2 if input_type is 'distance_matrix'. + k (int): number of neighbors (possibly including the point itself). + q (float): order used to compute the distance to measure. Defaults to the dimension, or 2 if metric is "neighbors" or "distance_matrix". kwargs: Same parameters as :class:`~gudhi.point_cloud.knn.KNN`, except that metric="neighbors" means that :func:`transform` expects an array with the distances to the k nearest neighbors. """ self.k = k @@ -31,7 +32,7 @@ class DTM: def fit(self, X, y=None): """ Args: - X (numpy.array): coordinates for mass points + X (numpy.array): coordinates for mass points. """ if self.params.setdefault("metric", "euclidean") != "neighbors": # KNN gives sorted distances, which is unnecessary here. @@ -45,11 +46,17 @@ class DTM: Args: X (numpy.array): coordinates for query points, or distance matrix if metric is "precomputed", or distances to the k nearest neighbors if metric is "neighbors" (if the array has more than k columns, the remaining ones are ignored). """ + q = self.q + if q is None: + if self.params["metric"] in {"neighbors", "precomputed"}: + q = 2 + else: + q = len(X[0]) if self.params["metric"] == "neighbors": distances = X[:, : self.k] else: distances = self.knn.transform(X) - distances = distances ** self.q + distances = distances ** q dtm = distances.sum(-1) / self.k - dtm = dtm ** (1.0 / self.q) + dtm = dtm ** (1.0 / q) return dtm diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index 02448530..31e4fc9f 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -18,7 +18,7 @@ class KNN: def __init__(self, k, return_index=True, return_distance=False, metric="euclidean", **kwargs): """ Args: - k (int): number of neighbors (including the point itself). + k (int): number of neighbors (possibly including the point itself). return_index (bool): if True, return the index of each neighbor. return_distance (bool): if True, return the distance to each neighbor. implementation (str): Choice of the library that does the real work. @@ -68,7 +68,7 @@ class KNN: def fit(self, X, y=None): """ Args: - X (numpy.array): coordinates for reference points + X (numpy.array): coordinates for reference points. """ self.ref_points = X if self.params["implementation"] == "ckdtree": @@ -105,7 +105,7 @@ class KNN: def transform(self, X): """ Args: - X (numpy.array): coordinates for query points, or distance matrix if metric is "precomputed" + X (numpy.array): coordinates for query points, or distance matrix if metric is "precomputed". """ metric = self.metric k = self.k -- cgit v1.2.3 From a911f9707d44259a38ae3dbb6fbcec75779fc639 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sat, 28 Mar 2020 12:17:29 +0100 Subject: doc --- src/python/gudhi/point_cloud/dtm.py | 2 +- src/python/gudhi/point_cloud/knn.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py index 520cbea8..3ac69f31 100644 --- a/src/python/gudhi/point_cloud/dtm.py +++ b/src/python/gudhi/point_cloud/dtm.py @@ -20,7 +20,7 @@ class DTM: Args: k (int): number of neighbors (possibly including the point itself). q (float): order used to compute the distance to measure. Defaults to the dimension, or 2 if metric is "neighbors" or "distance_matrix". - kwargs: Same parameters as :class:`~gudhi.point_cloud.knn.KNN`, except that metric="neighbors" means that :func:`transform` expects an array with the distances to the k nearest neighbors. + kwargs: same parameters as :class:`~gudhi.point_cloud.knn.KNN`, except that metric="neighbors" means that :func:`transform` expects an array with the distances to the k nearest neighbors. """ self.k = k self.q = q diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index 31e4fc9f..bb7757f2 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -21,7 +21,7 @@ class KNN: k (int): number of neighbors (possibly including the point itself). return_index (bool): if True, return the index of each neighbor. return_distance (bool): if True, return the distance to each neighbor. - implementation (str): Choice of the library that does the real work. + implementation (str): choice of the library that does the real work. * 'keops' for a brute-force, CUDA implementation through pykeops. Useful when the dimension becomes large (10+) but the number of points remains low (less than a million). Only "minkowski" and its aliases are supported. * 'ckdtree' for scipy's cKDTree. Only "minkowski" and its aliases are supported. @@ -31,7 +31,7 @@ class KNN: metric (str): see `sklearn.neighbors.NearestNeighbors`. eps (float): relative error when computing nearest neighbors with the cKDTree. p (float): norm L^p on input points (including numpy.inf) if metric is "minkowski". Defaults to 2. - n_jobs (int): Number of jobs to schedule for parallel processing of nearest neighbors on the CPU. + n_jobs (int): number of jobs to schedule for parallel processing of nearest neighbors on the CPU. If -1 is given all processors are used. Default: 1. kwargs: additional parameters are forwarded to the backends. """ -- cgit v1.2.3 From 990d54f2f13e116f97c1d0f35cbb751015d863fe Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sat, 28 Mar 2020 12:20:57 +0100 Subject: Fix test --- src/python/test/test_dtm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/python/test/test_dtm.py b/src/python/test/test_dtm.py index 93b13e1a..1d080ab4 100755 --- a/src/python/test/test_dtm.py +++ b/src/python/test/test_dtm.py @@ -30,7 +30,7 @@ def test_dtm_compare_euclidean(): from scipy.spatial.distance import cdist d = cdist(pts, pts) - dtm = DTM(k, metric="precomputed") + dtm = DTM(k, q=2, metric="precomputed") r4 = dtm.fit_transform(d) assert r4 == pytest.approx(r0) dtm = DTM(k, implementation="keops") -- cgit v1.2.3 From 40f4b6fb1fe20c3843b1fd80f99996e6d25c9426 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sat, 28 Mar 2020 12:26:36 +0100 Subject: Comment --- src/python/gudhi/point_cloud/dtm.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py index 3ac69f31..ba011eaf 100644 --- a/src/python/gudhi/point_cloud/dtm.py +++ b/src/python/gudhi/point_cloud/dtm.py @@ -59,4 +59,6 @@ class DTM: distances = distances ** q dtm = distances.sum(-1) / self.k dtm = dtm ** (1.0 / q) + # We compute too many powers, 1/p in knn then q in dtm, 1/q in dtm then q or some log in the caller. + # Add option to skip the final root? return dtm -- cgit v1.2.3 From 7f323484acdeafca93efdd9bdd20ed428f8fb95b Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sat, 28 Mar 2020 12:45:00 +0100 Subject: Optional sort_results --- src/python/gudhi/point_cloud/dtm.py | 4 +--- src/python/gudhi/point_cloud/knn.py | 19 +++++++++++++------ 2 files changed, 14 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py index ba011eaf..678524f2 100644 --- a/src/python/gudhi/point_cloud/dtm.py +++ b/src/python/gudhi/point_cloud/dtm.py @@ -35,9 +35,7 @@ class DTM: X (numpy.array): coordinates for mass points. """ if self.params.setdefault("metric", "euclidean") != "neighbors": - # KNN gives sorted distances, which is unnecessary here. - # Maybe add a parameter to say we don't need sorting? - self.knn = KNN(self.k, return_index=False, return_distance=True, **self.params) + self.knn = KNN(self.k, return_index=False, return_distance=True, sort_results=False, **self.params) self.knn.fit(X) return self diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index bb7757f2..8369f1f8 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -33,6 +33,9 @@ class KNN: p (float): norm L^p on input points (including numpy.inf) if metric is "minkowski". Defaults to 2. n_jobs (int): number of jobs to schedule for parallel processing of nearest neighbors on the CPU. If -1 is given all processors are used. Default: 1. + sort_results (bool): if True, then distances and indices of each point are + sorted on return, so that the first column contains the closest points. + Otherwise, neighbors are returned in an arbitrary order. Defaults to True. kwargs: additional parameters are forwarded to the backends. """ self.k = k @@ -115,18 +118,22 @@ class KNN: X = numpy.array(X) if self.return_index: neighbors = numpy.argpartition(X, k - 1)[:, 0:k] - distances = numpy.take_along_axis(X, neighbors, axis=-1) - ngb_order = numpy.argsort(distances, axis=-1) - neighbors = numpy.take_along_axis(neighbors, ngb_order, axis=-1) + if self.params.get("sort_results", True): + X = numpy.take_along_axis(X, neighbors, axis=-1) + ngb_order = numpy.argsort(X, axis=-1) + neighbors = numpy.take_along_axis(neighbors, ngb_order, axis=-1) + else: + ngb_order = neighbors if self.return_distance: - distances = numpy.take_along_axis(distances, ngb_order, axis=-1) + distances = numpy.take_along_axis(X, ngb_order, axis=-1) return neighbors, distances else: return neighbors if self.return_distance: distances = numpy.partition(X, k - 1)[:, 0:k] - # partition is not guaranteed to sort the lower half, although it often does - distances.sort(axis=-1) + if self.params.get("sort_results"): + # partition is not guaranteed to sort the lower half, although it often does + distances.sort(axis=-1) return distances return None -- cgit v1.2.3 From 75286efcf311f0c7c46a7039970d663f60953e14 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sat, 28 Mar 2020 12:59:01 +0100 Subject: Fix test --- src/python/test/test_dtm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/python/test/test_dtm.py b/src/python/test/test_dtm.py index 1d080ab4..33b2f3a2 100755 --- a/src/python/test/test_dtm.py +++ b/src/python/test/test_dtm.py @@ -30,7 +30,7 @@ def test_dtm_compare_euclidean(): from scipy.spatial.distance import cdist d = cdist(pts, pts) - dtm = DTM(k, q=2, metric="precomputed") + dtm = DTM(k, q=4, metric="precomputed") r4 = dtm.fit_transform(d) assert r4 == pytest.approx(r0) dtm = DTM(k, implementation="keops") -- cgit v1.2.3 From dd9457649d0d197bbed6402200e0f2f55655680e Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sat, 28 Mar 2020 15:39:15 +0100 Subject: Default param of 2 for DTM --- src/python/gudhi/point_cloud/dtm.py | 14 ++++---------- src/python/test/test_dtm.py | 2 +- 2 files changed, 5 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py index 678524f2..c26ba844 100644 --- a/src/python/gudhi/point_cloud/dtm.py +++ b/src/python/gudhi/point_cloud/dtm.py @@ -15,11 +15,11 @@ class DTM: Class to compute the distance to the empirical measure defined by a point set. """ - def __init__(self, k, q=None, **kwargs): + def __init__(self, k, q=2, **kwargs): """ Args: k (int): number of neighbors (possibly including the point itself). - q (float): order used to compute the distance to measure. Defaults to the dimension, or 2 if metric is "neighbors" or "distance_matrix". + q (float): order used to compute the distance to measure. Defaults to 2. kwargs: same parameters as :class:`~gudhi.point_cloud.knn.KNN`, except that metric="neighbors" means that :func:`transform` expects an array with the distances to the k nearest neighbors. """ self.k = k @@ -44,19 +44,13 @@ class DTM: Args: X (numpy.array): coordinates for query points, or distance matrix if metric is "precomputed", or distances to the k nearest neighbors if metric is "neighbors" (if the array has more than k columns, the remaining ones are ignored). """ - q = self.q - if q is None: - if self.params["metric"] in {"neighbors", "precomputed"}: - q = 2 - else: - q = len(X[0]) if self.params["metric"] == "neighbors": distances = X[:, : self.k] else: distances = self.knn.transform(X) - distances = distances ** q + distances = distances ** self.q dtm = distances.sum(-1) / self.k - dtm = dtm ** (1.0 / q) + dtm = dtm ** (1.0 / self.q) # We compute too many powers, 1/p in knn then q in dtm, 1/q in dtm then q or some log in the caller. # Add option to skip the final root? return dtm diff --git a/src/python/test/test_dtm.py b/src/python/test/test_dtm.py index 33b2f3a2..93b13e1a 100755 --- a/src/python/test/test_dtm.py +++ b/src/python/test/test_dtm.py @@ -30,7 +30,7 @@ def test_dtm_compare_euclidean(): from scipy.spatial.distance import cdist d = cdist(pts, pts) - dtm = DTM(k, q=4, metric="precomputed") + dtm = DTM(k, metric="precomputed") r4 = dtm.fit_transform(d) assert r4 == pytest.approx(r0) dtm = DTM(k, implementation="keops") -- cgit v1.2.3 From 8d06fbeae596a0372bf9a921de7d04cc734eaa3b Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 30 Mar 2020 08:14:46 +0200 Subject: Biblio --- biblio/bibliography.bib | 15 +++++++++++++++ src/python/gudhi/point_cloud/dtm.py | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/biblio/bibliography.bib b/biblio/bibliography.bib index 3bbe7960..f9d43638 100644 --- a/biblio/bibliography.bib +++ b/biblio/bibliography.bib @@ -1192,3 +1192,18 @@ numpages = {11}, location = {Montr\'{e}al, Canada}, series = {NIPS’18} } +@Article{dtm, +author={Chazal, Fr{\'e}d{\'e}ric +and Cohen-Steiner, David +and M{\'e}rigot, Quentin}, +title={Geometric Inference for Probability Measures}, +journal={Foundations of Computational Mathematics}, +year={2011}, +volume={11}, +number={6}, +pages={733-751}, +abstract={Data often comes in the form of a point cloud sampled from an unknown compact subset of Euclidean space. The general goal of geometric inference is then to recover geometric and topological features (e.g., Betti numbers, normals) of this subset from the approximating point cloud data. It appears that the study of distance functions allows one to address many of these questions successfully. However, one of the main limitations of this framework is that it does not cope well with outliers or with background noise. In this paper, we show how to extend the framework of distance functions to overcome this problem. Replacing compact subsets by measures, we introduce a notion of distance function to a probability distribution in Rd. These functions share many properties with classical distance functions, which make them suitable for inference purposes. In particular, by considering appropriate level sets of these distance functions, we show that it is possible to reconstruct offsets of sampled shapes with topological guarantees even in the presence of outliers. Moreover, in settings where empirical measures are considered, these functions can be easily evaluated, making them of particular practical interest.}, +issn={1615-3383}, +doi={10.1007/s10208-011-9098-0}, +url={https://doi.org/10.1007/s10208-011-9098-0} +} diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py index c26ba844..23c36b88 100644 --- a/src/python/gudhi/point_cloud/dtm.py +++ b/src/python/gudhi/point_cloud/dtm.py @@ -12,7 +12,7 @@ from .knn import KNN class DTM: """ - Class to compute the distance to the empirical measure defined by a point set. + Class to compute the distance to the empirical measure defined by a point set, as introduced in :cite:`dtm`. """ def __init__(self, k, q=2, **kwargs): -- cgit v1.2.3 From c5c565dfd92ce1ad5b318dca40edf9429d6334c2 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 30 Mar 2020 20:46:56 +0200 Subject: Streamline initialize_filtration --- src/Alpha_complex/test/Alpha_complex_unit_test.cpp | 3 -- .../utilities/alpha_complex_3d_persistence.cpp | 3 -- .../utilities/alpha_complex_persistence.cpp | 3 -- .../alpha_rips_persistence_bottleneck_distance.cpp | 6 --- .../example/custom_persistence_sort.cpp | 3 -- .../example/persistence_from_file.cpp | 3 -- .../example/plain_homology.cpp | 3 -- .../example/rips_multifield_persistence.cpp | 3 -- .../example/rips_persistence_step_by_step.cpp | 3 -- .../include/gudhi/Persistent_cohomology.h | 2 - .../rips_correlation_matrix_persistence.cpp | 3 -- .../utilities/rips_distance_matrix_persistence.cpp | 3 -- src/Rips_complex/utilities/rips_persistence.cpp | 3 -- .../utilities/sparse_rips_persistence.cpp | 3 -- src/Simplex_tree/include/gudhi/Simplex_tree.h | 56 ++++++++++++++-------- src/python/doc/simplex_tree_ref.rst | 1 - .../example/alpha_complex_from_points_example.py | 3 -- src/python/example/simplex_tree_example.py | 1 - src/python/gudhi/simplex_tree.pxd | 3 +- src/python/gudhi/simplex_tree.pyx | 50 ++----------------- src/python/include/Alpha_complex_interface.h | 1 - .../Euclidean_strong_witness_complex_interface.h | 2 - .../include/Euclidean_witness_complex_interface.h | 2 - src/python/include/Nerve_gic_interface.h | 1 - src/python/include/Rips_complex_interface.h | 1 - src/python/include/Simplex_tree_interface.h | 15 +++--- .../include/Strong_witness_complex_interface.h | 2 - src/python/include/Tangential_complex_interface.h | 1 - src/python/include/Witness_complex_interface.h | 2 - src/python/test/test_simplex_tree.py | 3 -- 30 files changed, 48 insertions(+), 140 deletions(-) (limited to 'src') diff --git a/src/Alpha_complex/test/Alpha_complex_unit_test.cpp b/src/Alpha_complex/test/Alpha_complex_unit_test.cpp index da1d8004..4b37e4bd 100644 --- a/src/Alpha_complex/test/Alpha_complex_unit_test.cpp +++ b/src/Alpha_complex/test/Alpha_complex_unit_test.cpp @@ -188,9 +188,6 @@ BOOST_AUTO_TEST_CASE(Alpha_complex_from_points) { // Test after prune_above_filtration bool modified = simplex_tree.prune_above_filtration(0.6); - if (modified) { - simplex_tree.initialize_filtration(); - } BOOST_CHECK(modified); // Another way to check num_simplices diff --git a/src/Alpha_complex/utilities/alpha_complex_3d_persistence.cpp b/src/Alpha_complex/utilities/alpha_complex_3d_persistence.cpp index e93c412e..91899040 100644 --- a/src/Alpha_complex/utilities/alpha_complex_3d_persistence.cpp +++ b/src/Alpha_complex/utilities/alpha_complex_3d_persistence.cpp @@ -222,9 +222,6 @@ int main(int argc, char **argv) { break; } - // Sort the simplices in the order of the filtration - simplex_tree.initialize_filtration(); - std::clog << "Simplex_tree dim: " << simplex_tree.dimension() << std::endl; // Compute the persistence diagram of the complex Persistent_cohomology pcoh(simplex_tree, true); diff --git a/src/Alpha_complex/utilities/alpha_complex_persistence.cpp b/src/Alpha_complex/utilities/alpha_complex_persistence.cpp index be60ff78..7c898dfd 100644 --- a/src/Alpha_complex/utilities/alpha_complex_persistence.cpp +++ b/src/Alpha_complex/utilities/alpha_complex_persistence.cpp @@ -75,9 +75,6 @@ int main(int argc, char **argv) { std::clog << "Simplicial complex is of dimension " << simplex.dimension() << " - " << simplex.num_simplices() << " simplices - " << simplex.num_vertices() << " vertices." << std::endl; - // Sort the simplices in the order of the filtration - simplex.initialize_filtration(); - std::clog << "Simplex_tree dim: " << simplex.dimension() << std::endl; // Compute the persistence diagram of the complex Gudhi::persistent_cohomology::Persistent_cohomology pcoh( diff --git a/src/Bottleneck_distance/example/alpha_rips_persistence_bottleneck_distance.cpp b/src/Bottleneck_distance/example/alpha_rips_persistence_bottleneck_distance.cpp index 4769eca3..ceb9e226 100644 --- a/src/Bottleneck_distance/example/alpha_rips_persistence_bottleneck_distance.cpp +++ b/src/Bottleneck_distance/example/alpha_rips_persistence_bottleneck_distance.cpp @@ -71,9 +71,6 @@ int main(int argc, char * argv[]) { std::clog << "The Rips complex contains " << rips_stree.num_simplices() << " simplices and has dimension " << rips_stree.dimension() << " \n"; - // Sort the simplices in the order of the filtration - rips_stree.initialize_filtration(); - // Compute the persistence diagram of the complex Persistent_cohomology rips_pcoh(rips_stree); // initializes the coefficient field for homology @@ -92,9 +89,6 @@ int main(int argc, char * argv[]) { std::clog << "The Alpha complex contains " << alpha_stree.num_simplices() << " simplices and has dimension " << alpha_stree.dimension() << " \n"; - // Sort the simplices in the order of the filtration - alpha_stree.initialize_filtration(); - // Compute the persistence diagram of the complex Persistent_cohomology alpha_pcoh(alpha_stree); // initializes the coefficient field for homology diff --git a/src/Persistent_cohomology/example/custom_persistence_sort.cpp b/src/Persistent_cohomology/example/custom_persistence_sort.cpp index 87e9c207..410cd987 100644 --- a/src/Persistent_cohomology/example/custom_persistence_sort.cpp +++ b/src/Persistent_cohomology/example/custom_persistence_sort.cpp @@ -86,9 +86,6 @@ int main(int argc, char **argv) { " - " << simplex.num_simplices() << " simplices - " << simplex.num_vertices() << " vertices." << std::endl; - // Sort the simplices in the order of the filtration - simplex.initialize_filtration(); - std::clog << "Simplex_tree dim: " << simplex.dimension() << std::endl; Persistent_cohomology pcoh(simplex); diff --git a/src/Persistent_cohomology/example/persistence_from_file.cpp b/src/Persistent_cohomology/example/persistence_from_file.cpp index 79108730..38c44514 100644 --- a/src/Persistent_cohomology/example/persistence_from_file.cpp +++ b/src/Persistent_cohomology/example/persistence_from_file.cpp @@ -59,9 +59,6 @@ int main(int argc, char * argv[]) { std::clog << std::endl; }*/ - // Sort the simplices in the order of the filtration - simplex_tree.initialize_filtration(); - // Compute the persistence diagram of the complex Persistent_cohomology< Simplex_tree<>, Field_Zp > pcoh(simplex_tree); // initializes the coefficient field for homology diff --git a/src/Persistent_cohomology/example/plain_homology.cpp b/src/Persistent_cohomology/example/plain_homology.cpp index 4d329020..236b67de 100644 --- a/src/Persistent_cohomology/example/plain_homology.cpp +++ b/src/Persistent_cohomology/example/plain_homology.cpp @@ -59,9 +59,6 @@ int main() { st.insert_simplex_and_subfaces(edge35); st.insert_simplex(vertex4); - // Sort the simplices in the order of the filtration - st.initialize_filtration(); - // Class for homology computation // By default, since the complex has dimension 1, only 0-dimensional homology would be computed. // Here we also want persistent homology to be computed for the maximal dimension in the complex (persistence_dim_max = true) diff --git a/src/Persistent_cohomology/example/rips_multifield_persistence.cpp b/src/Persistent_cohomology/example/rips_multifield_persistence.cpp index e2e2c0a5..2edf5bc4 100644 --- a/src/Persistent_cohomology/example/rips_multifield_persistence.cpp +++ b/src/Persistent_cohomology/example/rips_multifield_persistence.cpp @@ -59,9 +59,6 @@ int main(int argc, char * argv[]) { std::clog << "The complex contains " << simplex_tree.num_simplices() << " simplices \n"; std::clog << " and has dimension " << simplex_tree.dimension() << " \n"; - // Sort the simplices in the order of the filtration - simplex_tree.initialize_filtration(); - // Compute the persistence diagram of the complex Persistent_cohomology pcoh(simplex_tree); // initializes the coefficient field for homology diff --git a/src/Persistent_cohomology/example/rips_persistence_step_by_step.cpp b/src/Persistent_cohomology/example/rips_persistence_step_by_step.cpp index 7da9f15d..a503d983 100644 --- a/src/Persistent_cohomology/example/rips_persistence_step_by_step.cpp +++ b/src/Persistent_cohomology/example/rips_persistence_step_by_step.cpp @@ -76,9 +76,6 @@ int main(int argc, char * argv[]) { std::clog << "The complex contains " << st.num_simplices() << " simplices \n"; std::clog << " and has dimension " << st.dimension() << " \n"; - // Sort the simplices in the order of the filtration - st.initialize_filtration(); - // Compute the persistence diagram of the complex Persistent_cohomology pcoh(st); // initializes the coefficient field for homology diff --git a/src/Persistent_cohomology/include/gudhi/Persistent_cohomology.h b/src/Persistent_cohomology/include/gudhi/Persistent_cohomology.h index ca4bc10d..bc111f94 100644 --- a/src/Persistent_cohomology/include/gudhi/Persistent_cohomology.h +++ b/src/Persistent_cohomology/include/gudhi/Persistent_cohomology.h @@ -561,7 +561,6 @@ class Persistent_cohomology { void output_diagram(std::ostream& ostream = std::cout) { cmp_intervals_by_length cmp(cpx_); std::sort(std::begin(persistent_pairs_), std::end(persistent_pairs_), cmp); - bool has_infinity = std::numeric_limits::has_infinity; for (auto pair : persistent_pairs_) { ostream << get<2>(pair) << " " << cpx_->dimension(get<0>(pair)) << " " << cpx_->filtration(get<0>(pair)) << " " @@ -573,7 +572,6 @@ class Persistent_cohomology { std::ofstream diagram_out(diagram_name.c_str()); cmp_intervals_by_length cmp(cpx_); std::sort(std::begin(persistent_pairs_), std::end(persistent_pairs_), cmp); - bool has_infinity = std::numeric_limits::has_infinity; for (auto pair : persistent_pairs_) { diagram_out << cpx_->dimension(get<0>(pair)) << " " << cpx_->filtration(get<0>(pair)) << " " diff --git a/src/Rips_complex/utilities/rips_correlation_matrix_persistence.cpp b/src/Rips_complex/utilities/rips_correlation_matrix_persistence.cpp index 67f921a6..b473738e 100644 --- a/src/Rips_complex/utilities/rips_correlation_matrix_persistence.cpp +++ b/src/Rips_complex/utilities/rips_correlation_matrix_persistence.cpp @@ -71,9 +71,6 @@ int main(int argc, char* argv[]) { std::clog << "The complex contains " << simplex_tree.num_simplices() << " simplices \n"; std::clog << " and has dimension " << simplex_tree.dimension() << " \n"; - // Sort the simplices in the order of the filtration - simplex_tree.initialize_filtration(); - // Compute the persistence diagram of the complex Persistent_cohomology pcoh(simplex_tree); // initializes the coefficient field for homology diff --git a/src/Rips_complex/utilities/rips_distance_matrix_persistence.cpp b/src/Rips_complex/utilities/rips_distance_matrix_persistence.cpp index 4ad19675..6306755d 100644 --- a/src/Rips_complex/utilities/rips_distance_matrix_persistence.cpp +++ b/src/Rips_complex/utilities/rips_distance_matrix_persistence.cpp @@ -50,9 +50,6 @@ int main(int argc, char* argv[]) { std::clog << "The complex contains " << simplex_tree.num_simplices() << " simplices \n"; std::clog << " and has dimension " << simplex_tree.dimension() << " \n"; - // Sort the simplices in the order of the filtration - simplex_tree.initialize_filtration(); - // Compute the persistence diagram of the complex Persistent_cohomology pcoh(simplex_tree); // initializes the coefficient field for homology diff --git a/src/Rips_complex/utilities/rips_persistence.cpp b/src/Rips_complex/utilities/rips_persistence.cpp index 4cc63d3c..9d7490b3 100644 --- a/src/Rips_complex/utilities/rips_persistence.cpp +++ b/src/Rips_complex/utilities/rips_persistence.cpp @@ -52,9 +52,6 @@ int main(int argc, char* argv[]) { std::clog << "The complex contains " << simplex_tree.num_simplices() << " simplices \n"; std::clog << " and has dimension " << simplex_tree.dimension() << " \n"; - // Sort the simplices in the order of the filtration - simplex_tree.initialize_filtration(); - // Compute the persistence diagram of the complex Persistent_cohomology pcoh(simplex_tree); // initializes the coefficient field for homology diff --git a/src/Rips_complex/utilities/sparse_rips_persistence.cpp b/src/Rips_complex/utilities/sparse_rips_persistence.cpp index 40606158..ac935b41 100644 --- a/src/Rips_complex/utilities/sparse_rips_persistence.cpp +++ b/src/Rips_complex/utilities/sparse_rips_persistence.cpp @@ -54,9 +54,6 @@ int main(int argc, char* argv[]) { std::clog << "The complex contains " << simplex_tree.num_simplices() << " simplices \n"; std::clog << " and has dimension " << simplex_tree.dimension() << " \n"; - // Sort the simplices in the order of the filtration - simplex_tree.initialize_filtration(); - // Compute the persistence diagram of the complex Persistent_cohomology pcoh(simplex_tree); // initializes the coefficient field for homology diff --git a/src/Simplex_tree/include/gudhi/Simplex_tree.h b/src/Simplex_tree/include/gudhi/Simplex_tree.h index b455ae31..43250795 100644 --- a/src/Simplex_tree/include/gudhi/Simplex_tree.h +++ b/src/Simplex_tree/include/gudhi/Simplex_tree.h @@ -142,7 +142,10 @@ class Simplex_tree { public: /** \brief Handle type to a simplex contained in the simplicial complex represented - * by the simplex tree. */ + * by the simplex tree. + * + * They are essentially pointers into internal vectors, and any insertion or removal + * of a simplex may invalidate any other Simplex_handle in the complex. */ typedef typename Dictionary::iterator Simplex_handle; private: @@ -255,11 +258,9 @@ class Simplex_tree { * * The filtration must be valid. If the filtration has not been initialized yet, the * method initializes it (i.e. order the simplices). If the complex has changed since the last time the filtration - * was initialized, please call `initialize_filtration()` to recompute it. */ + * was initialized, please call `clear_filtration()` or `initialize_filtration()` to recompute it. */ Filtration_simplex_range const& filtration_simplex_range(Indexing_tag = Indexing_tag()) { - if (filtration_vect_.empty()) { - initialize_filtration(); - } + maybe_initialize_filtration(); return filtration_vect_; } @@ -877,15 +878,13 @@ class Simplex_tree { } public: - /** \brief Initializes the filtrations, i.e. sort the - * simplices according to their order in the filtration and initializes all Simplex_keys. + /** \brief Initializes the filtration cache, i.e. sorts the + * simplices according to their order in the filtration. * - * After calling this method, filtration_simplex_range() becomes valid, and each simplex is - * assigned a Simplex_key corresponding to its order in the filtration (from 0 to m-1 for a - * simplicial complex with m simplices). + * It always recomputes the cache, even if one already exists. * - * Will be automatically called when calling filtration_simplex_range() - * if the filtration has never been initialized yet. */ + * Any insertion, deletion or change of filtration value invalidates this cache, + * which can be cleared with clear_filtration(). */ void initialize_filtration() { filtration_vect_.clear(); filtration_vect_.reserve(num_simplices()); @@ -907,6 +906,21 @@ class Simplex_tree { std::stable_sort(filtration_vect_.begin(), filtration_vect_.end(), is_before_in_filtration(this)); #endif } + /** \brief Initializes the filtration cache if it isn't initialized yet. + * + * Automatically called by filtration_simplex_range(). */ + void maybe_initialize_filtration() { + if (filtration_vect_.empty()) { + initialize_filtration(); + } + } + /** \brief Clears the filtration cache produced by initialize_filtration(). + * + * Useful when initialize_filtration() has already been called and we perform an operation + * (say an insertion) that invalidates the cache. */ + void clear_filtration() { + filtration_vect_.clear(); + } private: /** Recursive search of cofaces @@ -1128,6 +1142,7 @@ class Simplex_tree { * 1 when calling the method. */ void expansion(int max_dim) { if (max_dim <= 1) return; + clear_filtration(); // Drop the cache. dimension_ = max_dim; for (Dictionary_it root_it = root_.members_.begin(); root_it != root_.members_.end(); ++root_it) { @@ -1338,9 +1353,6 @@ class Simplex_tree { /** \brief This function ensures that each simplex has a higher filtration value than its faces by increasing the * filtration values. * @return True if any filtration value was modified, false if the filtration was already non-decreasing. - * \post Some simplex tree functions require the filtration to be valid. `make_filtration_non_decreasing()` - * function is not launching `initialize_filtration()` but returns the filtration modification information. If the - * complex has changed , please call `initialize_filtration()` to recompute it. * * If a simplex has a `NaN` filtration value, it is considered lower than any other defined filtration value. */ @@ -1352,6 +1364,8 @@ class Simplex_tree { modified |= rec_make_filtration_non_decreasing(simplex.second.children()); } } + if(modified) + clear_filtration(); // Drop the cache. return modified; } @@ -1391,16 +1405,16 @@ class Simplex_tree { public: /** \brief Prune above filtration value given as parameter. * @param[in] filtration Maximum threshold value. - * @return The filtration modification information. - * \post Some simplex tree functions require the filtration to be valid. `prune_above_filtration()` - * function is not launching `initialize_filtration()` but returns the filtration modification information. If the - * complex has changed , please call `initialize_filtration()` to recompute it. + * @return True if any simplex was removed, false if all simplices already had a value below the threshold. * \post Note that the dimension of the simplicial complex may be lower after calling `prune_above_filtration()` * than it was before. However, `upper_bound_dimension()` will return the old value, which remains a valid upper * bound. If you care, you can call `dimension()` to recompute the exact dimension. */ bool prune_above_filtration(Filtration_value filtration) { - return rec_prune_above_filtration(root(), filtration); + bool modified = rec_prune_above_filtration(root(), filtration); + if(modified) + clear_filtration(); // Drop the cache. + return modified; } private: @@ -1467,7 +1481,6 @@ class Simplex_tree { * @param[in] sh Simplex handle on the maximal simplex to remove. * \pre Please check the simplex has no coface before removing it. * \exception std::invalid_argument In debug mode, if sh has children. - * \post Be aware that removing is shifting data in a flat_map (initialize_filtration to be done). * \post Note that the dimension of the simplicial complex may be lower after calling `remove_maximal_simplex()` * than it was before. However, `upper_bound_dimension()` will return the old value, which remains a valid upper * bound. If you care, you can call `dimension()` to recompute the exact dimension. @@ -1539,6 +1552,7 @@ class Simplex_tree { * the original filtration values for each simplex. */ Extended_filtration_data extend_filtration() { + clear_filtration(); // Drop the cache. // Compute maximum and minimum of filtration values Vertex_handle maxvert = std::numeric_limits::min(); diff --git a/src/python/doc/simplex_tree_ref.rst b/src/python/doc/simplex_tree_ref.rst index 9eb8c199..46b2c1e5 100644 --- a/src/python/doc/simplex_tree_ref.rst +++ b/src/python/doc/simplex_tree_ref.rst @@ -8,7 +8,6 @@ Simplex tree reference manual .. autoclass:: gudhi.SimplexTree :members: - :undoc-members: :show-inheritance: .. automethod:: gudhi.SimplexTree.__init__ diff --git a/src/python/example/alpha_complex_from_points_example.py b/src/python/example/alpha_complex_from_points_example.py index 73faf17c..465632eb 100755 --- a/src/python/example/alpha_complex_from_points_example.py +++ b/src/python/example/alpha_complex_from_points_example.py @@ -46,9 +46,6 @@ if simplex_tree.find([4]): else: print("[4] Not found...") -# Some insertions, simplex_tree needs to initialize filtrations -simplex_tree.initialize_filtration() - print("dimension=", simplex_tree.dimension()) print("filtrations=") for simplex_with_filtration in simplex_tree.get_filtration(): diff --git a/src/python/example/simplex_tree_example.py b/src/python/example/simplex_tree_example.py index 34833899..c4635dc5 100755 --- a/src/python/example/simplex_tree_example.py +++ b/src/python/example/simplex_tree_example.py @@ -42,7 +42,6 @@ print("simplices=") for simplex_with_filtration in st.get_simplices(): print("(%s, %.2f)" % tuple(simplex_with_filtration)) -st.initialize_filtration() print("filtration=") for simplex_with_filtration in st.get_filtration(): print("(%s, %.2f)" % tuple(simplex_with_filtration)) diff --git a/src/python/gudhi/simplex_tree.pxd b/src/python/gudhi/simplex_tree.pxd index 595f22bb..7e3bba2b 100644 --- a/src/python/gudhi/simplex_tree.pxd +++ b/src/python/gudhi/simplex_tree.pxd @@ -48,8 +48,7 @@ cdef extern from "Simplex_tree_interface.h" namespace "Gudhi": int dimension() int upper_bound_dimension() bool find_simplex(vector[int] simplex) - bool insert_simplex_and_subfaces(vector[int] simplex, - double filtration) + bool insert(vector[int] simplex, double filtration) vector[pair[vector[int], double]] get_star(vector[int] simplex) vector[pair[vector[int], double]] get_cofaces(vector[int] simplex, int dimension) diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index cc3753e1..a709980f 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -90,7 +90,7 @@ cdef class SimplexTree: (with more :meth:`assign_filtration` or :meth:`make_filtration_non_decreasing` for instance) before calling any function that relies on the filtration property, like - :meth:`initialize_filtration`. + :meth:`persistence`. """ self.get_ptr().assign_simplex_filtration(simplex, filtration) @@ -98,16 +98,7 @@ cdef class SimplexTree: """This function initializes and sorts the simplicial complex filtration vector. - .. note:: - - This function must be launched before - :func:`persistence()`, - :func:`betti_numbers()`, - :func:`persistent_betti_numbers()`, - or :func:`get_filtration()` - after :func:`inserting` or - :func:`removing` - simplices. + .. deprecated:: 3.2.0 """ self.get_ptr().initialize_filtration() @@ -182,10 +173,7 @@ cdef class SimplexTree: :returns: true if the simplex was found, false otherwise. :rtype: bool """ - cdef vector[int] csimplex - for i in simplex: - csimplex.push_back(i) - return self.get_ptr().find_simplex(csimplex) + return self.get_ptr().find_simplex(simplex) def insert(self, simplex, filtration=0.0): """This function inserts the given N-simplex and its subfaces with the @@ -202,11 +190,7 @@ cdef class SimplexTree: otherwise (whatever its original filtration value). :rtype: bool """ - cdef vector[int] csimplex - for i in simplex: - csimplex.push_back(i) - return self.get_ptr().insert_simplex_and_subfaces(csimplex, - filtration) + return self.get_ptr().insert(simplex, filtration) def get_simplices(self): """This function returns a generator with simplices and their given @@ -306,11 +290,6 @@ cdef class SimplexTree: :param simplex: The N-simplex, represented by a list of vertex. :type simplex: list of int. - .. note:: - - Be aware that removing is shifting data in a flat_map - (:func:`initialize_filtration()` to be done). - .. note:: The dimension of the simplicial complex may be lower after calling @@ -332,16 +311,6 @@ cdef class SimplexTree: :rtype: bool - .. note:: - - Some simplex tree functions require the filtration to be valid. - prune_above_filtration function is not launching - :func:`initialize_filtration()` - but returns the filtration modification - information. If the complex has changed , please call - :func:`initialize_filtration()` - to recompute it. - .. note:: Note that the dimension of the simplicial complex may be lower @@ -382,17 +351,6 @@ cdef class SimplexTree: :returns: True if any filtration value was modified, False if the filtration was already non-decreasing. :rtype: bool - - - .. note:: - - Some simplex tree functions require the filtration to be valid. - make_filtration_non_decreasing function is not launching - :func:`initialize_filtration()` - but returns the filtration modification - information. If the complex has changed , please call - :func:`initialize_filtration()` - to recompute it. """ return self.get_ptr().make_filtration_non_decreasing() diff --git a/src/python/include/Alpha_complex_interface.h b/src/python/include/Alpha_complex_interface.h index 8614eee3..40de88f3 100644 --- a/src/python/include/Alpha_complex_interface.h +++ b/src/python/include/Alpha_complex_interface.h @@ -58,7 +58,6 @@ class Alpha_complex_interface { void create_simplex_tree(Simplex_tree_interface<>* simplex_tree, double max_alpha_square) { alpha_complex_->create_complex(*simplex_tree, max_alpha_square); - simplex_tree->initialize_filtration(); } private: diff --git a/src/python/include/Euclidean_strong_witness_complex_interface.h b/src/python/include/Euclidean_strong_witness_complex_interface.h index c1c72737..f94c51ef 100644 --- a/src/python/include/Euclidean_strong_witness_complex_interface.h +++ b/src/python/include/Euclidean_strong_witness_complex_interface.h @@ -50,12 +50,10 @@ class Euclidean_strong_witness_complex_interface { void create_simplex_tree(Gudhi::Simplex_tree<>* simplex_tree, double max_alpha_square, std::size_t limit_dimension) { witness_complex_->create_complex(*simplex_tree, max_alpha_square, limit_dimension); - simplex_tree->initialize_filtration(); } void create_simplex_tree(Gudhi::Simplex_tree<>* simplex_tree, double max_alpha_square) { witness_complex_->create_complex(*simplex_tree, max_alpha_square); - simplex_tree->initialize_filtration(); } std::vector get_point(unsigned vh) { diff --git a/src/python/include/Euclidean_witness_complex_interface.h b/src/python/include/Euclidean_witness_complex_interface.h index 5d7dbdc2..4411ae79 100644 --- a/src/python/include/Euclidean_witness_complex_interface.h +++ b/src/python/include/Euclidean_witness_complex_interface.h @@ -49,12 +49,10 @@ class Euclidean_witness_complex_interface { void create_simplex_tree(Gudhi::Simplex_tree<>* simplex_tree, double max_alpha_square, std::size_t limit_dimension) { witness_complex_->create_complex(*simplex_tree, max_alpha_square, limit_dimension); - simplex_tree->initialize_filtration(); } void create_simplex_tree(Gudhi::Simplex_tree<>* simplex_tree, double max_alpha_square) { witness_complex_->create_complex(*simplex_tree, max_alpha_square); - simplex_tree->initialize_filtration(); } std::vector get_point(unsigned vh) { diff --git a/src/python/include/Nerve_gic_interface.h b/src/python/include/Nerve_gic_interface.h index 5e7f8ae6..ab14c318 100644 --- a/src/python/include/Nerve_gic_interface.h +++ b/src/python/include/Nerve_gic_interface.h @@ -29,7 +29,6 @@ class Nerve_gic_interface : public Cover_complex> { public: void create_simplex_tree(Simplex_tree_interface<>* simplex_tree) { create_complex(*simplex_tree); - simplex_tree->initialize_filtration(); } void set_cover_from_Euclidean_Voronoi(int m) { set_cover_from_Voronoi(Gudhi::Euclidean_distance(), m); diff --git a/src/python/include/Rips_complex_interface.h b/src/python/include/Rips_complex_interface.h index a66b0e5b..d98b0226 100644 --- a/src/python/include/Rips_complex_interface.h +++ b/src/python/include/Rips_complex_interface.h @@ -53,7 +53,6 @@ class Rips_complex_interface { rips_complex_->create_complex(*simplex_tree, dim_max); else sparse_rips_complex_->create_complex(*simplex_tree, dim_max); - simplex_tree->initialize_filtration(); } private: diff --git a/src/python/include/Simplex_tree_interface.h b/src/python/include/Simplex_tree_interface.h index 1a18aed6..5b456baa 100644 --- a/src/python/include/Simplex_tree_interface.h +++ b/src/python/include/Simplex_tree_interface.h @@ -43,16 +43,19 @@ class Simplex_tree_interface : public Simplex_tree { Extended_filtration_data efd; - bool find_simplex(const Simplex& vh) { - return (Base::find(vh) != Base::null_simplex()); + bool find_simplex(const Simplex& simplex) { + return (Base::find(simplex) != Base::null_simplex()); } - void assign_simplex_filtration(const Simplex& vh, Filtration_value filtration) { - Base::assign_filtration(Base::find(vh), filtration); + void assign_simplex_filtration(const Simplex& simplex, Filtration_value filtration) { + Base::assign_filtration(Base::find(simplex), filtration); + Base::clear_filtration(); } bool insert(const Simplex& simplex, Filtration_value filtration = 0) { Insertion_result result = Base::insert_simplex_and_subfaces(simplex, filtration); + if (result.first != Base::null_simplex()) + Base::clear_filtration(); return (result.second); } @@ -86,7 +89,7 @@ class Simplex_tree_interface : public Simplex_tree { void remove_maximal_simplex(const Simplex& simplex) { Base::remove_maximal_simplex(Base::find(simplex)); - Base::initialize_filtration(); + Base::clear_filtration(); } Simplex_and_filtration get_simplex_and_filtration(Simplex_handle f_simplex) { @@ -123,7 +126,6 @@ class Simplex_tree_interface : public Simplex_tree { void compute_extended_filtration() { this->efd = this->extend_filtration(); - this->initialize_filtration(); return; } @@ -158,7 +160,6 @@ class Simplex_tree_interface : public Simplex_tree { } void create_persistence(Gudhi::Persistent_cohomology_interface* pcoh) { - Base::initialize_filtration(); pcoh = new Gudhi::Persistent_cohomology_interface(*this); } diff --git a/src/python/include/Strong_witness_complex_interface.h b/src/python/include/Strong_witness_complex_interface.h index cda5b514..e9ab0c7b 100644 --- a/src/python/include/Strong_witness_complex_interface.h +++ b/src/python/include/Strong_witness_complex_interface.h @@ -41,13 +41,11 @@ class Strong_witness_complex_interface { void create_simplex_tree(Simplex_tree_interface<>* simplex_tree, double max_alpha_square, std::size_t limit_dimension) { witness_complex_->create_complex(*simplex_tree, max_alpha_square, limit_dimension); - simplex_tree->initialize_filtration(); } void create_simplex_tree(Simplex_tree_interface<>* simplex_tree, double max_alpha_square) { witness_complex_->create_complex(*simplex_tree, max_alpha_square); - simplex_tree->initialize_filtration(); } private: diff --git a/src/python/include/Tangential_complex_interface.h b/src/python/include/Tangential_complex_interface.h index 698226cc..b1afce94 100644 --- a/src/python/include/Tangential_complex_interface.h +++ b/src/python/include/Tangential_complex_interface.h @@ -90,7 +90,6 @@ class Tangential_complex_interface { void create_simplex_tree(Simplex_tree<>* simplex_tree) { tangential_complex_->create_complex>(*simplex_tree); - simplex_tree->initialize_filtration(); } void set_max_squared_edge_length(double max_squared_edge_length) { diff --git a/src/python/include/Witness_complex_interface.h b/src/python/include/Witness_complex_interface.h index 45e14253..76947e53 100644 --- a/src/python/include/Witness_complex_interface.h +++ b/src/python/include/Witness_complex_interface.h @@ -41,13 +41,11 @@ class Witness_complex_interface { void create_simplex_tree(Simplex_tree_interface<>* simplex_tree, double max_alpha_square, std::size_t limit_dimension) { witness_complex_->create_complex(*simplex_tree, max_alpha_square, limit_dimension); - simplex_tree->initialize_filtration(); } void create_simplex_tree(Simplex_tree_interface<>* simplex_tree, double max_alpha_square) { witness_complex_->create_complex(*simplex_tree, max_alpha_square); - simplex_tree->initialize_filtration(); } private: diff --git a/src/python/test/test_simplex_tree.py b/src/python/test/test_simplex_tree.py index 70b26e97..2137d822 100755 --- a/src/python/test/test_simplex_tree.py +++ b/src/python/test/test_simplex_tree.py @@ -46,7 +46,6 @@ def test_insertion(): assert st.find([2, 3]) == False # filtration test - st.initialize_filtration() assert st.filtration([0, 1, 2]) == 4.0 assert st.filtration([0, 2]) == 4.0 assert st.filtration([1, 2]) == 4.0 @@ -93,7 +92,6 @@ def test_insertion(): assert st.find([1]) == True assert st.find([2]) == True - st.initialize_filtration() assert st.persistence(persistence_dim_max=True) == [ (1, (4.0, float("inf"))), (0, (0.0, float("inf"))), @@ -151,7 +149,6 @@ def test_expansion(): st.expansion(3) assert st.num_vertices() == 7 assert st.num_simplices() == 22 - st.initialize_filtration() assert list(st.get_filtration()) == [ ([2], 0.1), -- cgit v1.2.3 From 4cdc7f03fb5917134ba8886b026c8990f56bcfeb Mon Sep 17 00:00:00 2001 From: tlacombe Date: Tue, 31 Mar 2020 11:21:27 +0200 Subject: merged doc from barycenters to wasserstein distance --- src/python/doc/wasserstein_distance_sum.inc | 10 +-- src/python/doc/wasserstein_distance_user.rst | 91 ++++++++++++++++++++++++++-- 2 files changed, 92 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/python/doc/wasserstein_distance_sum.inc b/src/python/doc/wasserstein_distance_sum.inc index a97f428d..09424de2 100644 --- a/src/python/doc/wasserstein_distance_sum.inc +++ b/src/python/doc/wasserstein_distance_sum.inc @@ -3,11 +3,11 @@ +-----------------------------------------------------------------+----------------------------------------------------------------------+------------------------------------------------------------------+ | .. figure:: | The q-Wasserstein distance measures the similarity between two | :Author: Theo Lacombe | - | ../../doc/Bottleneck_distance/perturb_pd.png | persistence diagrams. It's the minimum value c that can be achieved | | - | :figclass: align-center | by a perfect matching between the points of the two diagrams (+ all | :Introduced in: GUDHI 3.1.0 | - | | diagonal points), where the value of a matching is defined as the | | - | Wasserstein distance is the q-th root of the sum of the | q-th root of the sum of all edge lengths to the power q. Edge lengths| :Copyright: MIT | - | edge lengths to the power q. | are measured in norm p, for :math:`1 \leq p \leq \infty`. | | + | ../../doc/Bottleneck_distance/perturb_pd.png | persistence diagrams using the sum of all edges lengths (instead of | | + | :figclass: align-center | the maximum). It allows to define sophisticated objects such as | :Introduced in: GUDHI 3.1.0 | + | | barycenters of a family of persistence diagrams. | | + | Wasserstein distance is the q-th root of the sum of the | | :Copyright: MIT | + | edge lengths to the power q. | | | | | | :Requires: Python Optimal Transport (POT) :math:`\geq` 0.5.1 | +-----------------------------------------------------------------+----------------------------------------------------------------------+------------------------------------------------------------------+ | * :doc:`wasserstein_distance_user` | | diff --git a/src/python/doc/wasserstein_distance_user.rst b/src/python/doc/wasserstein_distance_user.rst index a9b21fa5..6de05afc 100644 --- a/src/python/doc/wasserstein_distance_user.rst +++ b/src/python/doc/wasserstein_distance_user.rst @@ -9,10 +9,16 @@ Definition .. include:: wasserstein_distance_sum.inc -Functions ---------- -This implementation uses the Python Optimal Transport library and is based on -ideas from "Large Scale Computation of Means and Cluster for Persistence +The q-Wasserstein distance is defined as the minimal value +by a perfect matching between the points of the two diagrams (+ all +diagonal points), where the value of a matching is defined as the +q-th root of the sum of all edge lengths to the power q. Edge lengths +are measured in norm p, for :math:`1 \leq p \leq \infty`. + +Distance Functions +------------------ +This first implementation uses the Python Optimal Transport library and is based +on ideas from "Large Scale Computation of Means and Cluster for Persistence Diagrams via Optimal Transport" :cite:`10.5555/3327546.3327645`. .. autofunction:: gudhi.wasserstein.wasserstein_distance @@ -84,3 +90,80 @@ The output is: point 1 in dgm1 is matched to point 2 in dgm2 point 2 in dgm1 is matched to the diagonal point 1 in dgm2 is matched to the diagonal + + +Barycenters +----------- + +A Frechet mean (or barycenter) is a generalization of the arithmetic +mean in a non linear space such as the one of persistence diagrams. +Given a set of persistence diagrams :math:`\mu_1 \dots \mu_n`, it is +defined as a minimizer of the variance functional, that is of +:math:`\mu \mapsto \sum_{i=1}^n d_2(\mu,\mu_i)^2`. +where :math:`d_2` denotes the Wasserstein-2 distance between +persistence diagrams. +It is known to exist and is generically unique. However, an exact +computation is in general untractable. Current implementation +available is based on (Turner et al., 2014), +:cite:`turner2014frechet` +and uses an EM-scheme to +provide a local minimum of the variance functional (somewhat similar +to the Lloyd algorithm to estimate a solution to the k-means +problem). The local minimum returned depends on the initialization of +the barycenter. +The combinatorial structure of the algorithm limits its +scaling on large scale problems (thousands of diagrams and of points +per diagram). + +.. figure:: + ./img/barycenter.png + :figclass: align-center + + Illustration of Frechet mean between persistence + diagrams. + + +.. autofunction:: gudhi.barycenter.lagrangian_barycenter + +Basic example +------------- + +This example computes the Frechet mean (aka Wasserstein barycenter) between +four persistence diagrams. +It is initialized on the 4th diagram. +As the algorithm is not convex, its output depends on the initialization and +is only a local minimum of the objective function. +Initialization can be either given as an integer (in which case the i-th +diagram of the list is used as initial estimate) or as a diagram. +If None, it will randomly select one of the diagram of the list +as initial estimate. +Note that persistence diagrams must be submitted as +(n x 2) numpy arrays and must not contain inf values. + + +.. testcode:: + + import gudhi.barycenter + import numpy as np + + dg1 = np.array([[0.2, 0.5]]) + dg2 = np.array([[0.2, 0.7]]) + dg3 = np.array([[0.3, 0.6], [0.7, 0.8], [0.2, 0.3]]) + dg4 = np.array([]) + pdiagset = [dg1, dg2, dg3, dg4] + bary = gudhi.wasserstein.barycenter.lagrangian_barycenter(pdiagset=pdiagset,init=3) + + message = "Wasserstein barycenter estimated:" + print(message) + print(bary) + +The output is: + +.. testoutput:: + + Wasserstein barycenter estimated: + [[0.27916667 0.55416667] + [0.7375 0.7625 ] + [0.2375 0.2625 ]] + + -- cgit v1.2.3 From 4adbdcf16f311b0b5151311f77cfead5bf065bf4 Mon Sep 17 00:00:00 2001 From: tlacombe Date: Tue, 31 Mar 2020 11:22:50 +0200 Subject: removed barycenters specific doc files as those are included in wasserstein distance now --- src/python/doc/barycenter_sum.inc | 24 --------------- src/python/doc/barycenter_user.rst | 60 -------------------------------------- 2 files changed, 84 deletions(-) delete mode 100644 src/python/doc/barycenter_sum.inc delete mode 100644 src/python/doc/barycenter_user.rst (limited to 'src') diff --git a/src/python/doc/barycenter_sum.inc b/src/python/doc/barycenter_sum.inc deleted file mode 100644 index da2bdd84..00000000 --- a/src/python/doc/barycenter_sum.inc +++ /dev/null @@ -1,24 +0,0 @@ -.. table:: - :widths: 30 50 20 - - +-----------------------------------------------------------------+----------------------------------------------------------------------+------------------------------------------------------------------+ - | .. figure:: | A Frechet mean (or barycenter) is a generalization of the arithmetic | :Author: Theo Lacombe | - | ./img/barycenter.png | mean in a non linear space such as the one of persistence diagrams. | | - | :figclass: align-center | Given a set of persistence diagrams :math:`\mu_1 \dots \mu_n`, it is | :Introduced in: GUDHI 3.1.0 | - | | defined as a minimizer of the variance functional, that is of | | - | Illustration of Frechet mean between persistence | :math:`\mu \mapsto \sum_{i=1}^n d_2(\mu,\mu_i)^2`. | :Copyright: MIT | - | diagrams. | where :math:`d_2` denotes the Wasserstein-2 distance between | | - | | persistence diagrams. | | - | | It is known to exist and is generically unique. However, an exact | | - | | computation is in general untractable. Current implementation | :Requires: Python Optimal Transport (POT) :math:`\geq` 0.5.1 | - | | available is based on [Turner et al, 2014], and uses an EM-scheme to | | - | | provide a local minimum of the variance functional (somewhat similar | | - | | to the Lloyd algorithm to estimate a solution to the k-means | | - | | problem). The local minimum returned depends on the initialization of| | - | | the barycenter. | | - | | The combinatorial structure of the algorithm limits its | | - | | scaling on large scale problems (thousands of diagrams and of points | | - | | per diagram). | | - +-----------------------------------------------------------------+----------------------------------------------------------------------+------------------------------------------------------------------+ - | * :doc:`barycenter_user` | | - +-----------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/src/python/doc/barycenter_user.rst b/src/python/doc/barycenter_user.rst deleted file mode 100644 index 83e9bebb..00000000 --- a/src/python/doc/barycenter_user.rst +++ /dev/null @@ -1,60 +0,0 @@ -:orphan: - -.. To get rid of WARNING: document isn't included in any toctree - -Barycenter user manual -================================ -Definition ----------- - -.. include:: barycenter_sum.inc - -This implementation is based on ideas from "Frechet means for distribution of -persistence diagrams", Turner et al. 2014. - -Function --------- -.. autofunction:: gudhi.barycenter.lagrangian_barycenter - - -Basic example -------------- - -This example computes the Frechet mean (aka Wasserstein barycenter) between -four persistence diagrams. -It is initialized on the 4th diagram. -As the algorithm is not convex, its output depends on the initialization and -is only a local minimum of the objective function. -Initialization can be either given as an integer (in which case the i-th -diagram of the list is used as initial estimate) or as a diagram. -If None, it will randomly select one of the diagram of the list -as initial estimate. -Note that persistence diagrams must be submitted as -(n x 2) numpy arrays and must not contain inf values. - -.. testcode:: - - import gudhi.barycenter - import numpy as np - - dg1 = np.array([[0.2, 0.5]]) - dg2 = np.array([[0.2, 0.7]]) - dg3 = np.array([[0.3, 0.6], [0.7, 0.8], [0.2, 0.3]]) - dg4 = np.array([]) - pdiagset = [dg1, dg2, dg3, dg4] - bary = gudhi.barycenter.lagrangian_barycenter(pdiagset=pdiagset,init=3) - - message = "Wasserstein barycenter estimated:" - print(message) - print(bary) - -The output is: - -.. testoutput:: - - Wasserstein barycenter estimated: - [[0.27916667 0.55416667] - [0.7375 0.7625 ] - [0.2375 0.2625 ]] - - -- cgit v1.2.3 From 9f55afbb17494c67709d9be58bf8bb876f704524 Mon Sep 17 00:00:00 2001 From: tlacombe Date: Tue, 31 Mar 2020 11:24:21 +0200 Subject: added import barycenter on top of the file so that we can call for gudhi.wasserstein.barycenter --- src/python/gudhi/wasserstein.py | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/python/gudhi/wasserstein.py b/src/python/gudhi/wasserstein.py index 3dd993f9..8f03039b 100644 --- a/src/python/gudhi/wasserstein.py +++ b/src/python/gudhi/wasserstein.py @@ -9,6 +9,7 @@ import numpy as np import scipy.spatial.distance as sc +import barycenter try: import ot except ImportError: -- cgit v1.2.3 From 7721ac6181fc394ae0136ee176d63210e727f06f Mon Sep 17 00:00:00 2001 From: tlacombe Date: Tue, 31 Mar 2020 11:40:46 +0200 Subject: modified import in test to get consistent with gudhi.wasserstein.barycenter --- src/python/test/test_wasserstein_barycenter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/python/test/test_wasserstein_barycenter.py b/src/python/test/test_wasserstein_barycenter.py index 4d18616b..f686aef5 100755 --- a/src/python/test/test_wasserstein_barycenter.py +++ b/src/python/test/test_wasserstein_barycenter.py @@ -1,4 +1,4 @@ -from gudhi.barycenter import lagrangian_barycenter +from gudhi.wasserstein.barycenter import lagrangian_barycenter import numpy as np """ This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. -- cgit v1.2.3 From eeeac06a05ee99ae5780b3f37f107680a680985a Mon Sep 17 00:00:00 2001 From: tlacombe Date: Tue, 31 Mar 2020 11:54:06 +0200 Subject: removed unused import --- src/python/gudhi/barycenter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/python/gudhi/barycenter.py b/src/python/gudhi/barycenter.py index 0490fdd1..079bcc57 100644 --- a/src/python/gudhi/barycenter.py +++ b/src/python/gudhi/barycenter.py @@ -12,7 +12,7 @@ import ot import numpy as np import scipy.spatial.distance as sc -from gudhi.wasserstein import wasserstein_distance, _perstot +from gudhi.wasserstein import wasserstein_distance def _mean(x, m): -- cgit v1.2.3 From dae83f0907a5bd94cb483ad0f54755da2d49fb75 Mon Sep 17 00:00:00 2001 From: tlacombe Date: Tue, 31 Mar 2020 12:49:22 +0200 Subject: changed into import .barycenter for local import in wasserstein, and modified index to remove barycenter doc --- src/python/doc/index.rst | 4 ---- src/python/gudhi/wasserstein.py | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) (limited to 'src') diff --git a/src/python/doc/index.rst b/src/python/doc/index.rst index 96cd3513..0e484483 100644 --- a/src/python/doc/index.rst +++ b/src/python/doc/index.rst @@ -71,10 +71,6 @@ Wasserstein distance .. include:: wasserstein_distance_sum.inc -Barycenter -============ - -.. include:: barycenter_sum.inc Persistence representations =========================== diff --git a/src/python/gudhi/wasserstein.py b/src/python/gudhi/wasserstein.py index 8f03039b..760eea8c 100644 --- a/src/python/gudhi/wasserstein.py +++ b/src/python/gudhi/wasserstein.py @@ -9,7 +9,7 @@ import numpy as np import scipy.spatial.distance as sc -import barycenter +import .barycenter try: import ot except ImportError: -- cgit v1.2.3 From a924e71d2f1a649ca389cfeceb678cc45aaf9fa7 Mon Sep 17 00:00:00 2001 From: tlacombe Date: Tue, 31 Mar 2020 12:55:51 +0200 Subject: micro modif changed a word to avoid repetition --- src/python/doc/wasserstein_distance_user.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/python/doc/wasserstein_distance_user.rst b/src/python/doc/wasserstein_distance_user.rst index 6de05afc..a077f9a4 100644 --- a/src/python/doc/wasserstein_distance_user.rst +++ b/src/python/doc/wasserstein_distance_user.rst @@ -112,7 +112,7 @@ to the Lloyd algorithm to estimate a solution to the k-means problem). The local minimum returned depends on the initialization of the barycenter. The combinatorial structure of the algorithm limits its -scaling on large scale problems (thousands of diagrams and of points +performances on large scale problems (thousands of diagrams and of points per diagram). .. figure:: -- cgit v1.2.3 From 1aaffd2e1fab45988d92f5e51a9d294696ff5492 Mon Sep 17 00:00:00 2001 From: tlacombe Date: Tue, 31 Mar 2020 13:18:42 +0200 Subject: changed import to import gudhi.barycenter as barycenter --- src/python/gudhi/wasserstein.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/python/gudhi/wasserstein.py b/src/python/gudhi/wasserstein.py index 760eea8c..51d1d83c 100644 --- a/src/python/gudhi/wasserstein.py +++ b/src/python/gudhi/wasserstein.py @@ -9,7 +9,7 @@ import numpy as np import scipy.spatial.distance as sc -import .barycenter +import gudhi.barycenter as barycenter try: import ot except ImportError: -- cgit v1.2.3 From 842475615841f864b4ce41a2a4b69f1e189a2946 Mon Sep 17 00:00:00 2001 From: tlacombe Date: Tue, 31 Mar 2020 15:02:32 +0200 Subject: created wasserstein repo --- src/python/gudhi/barycenter.py | 158 ---------------------------- src/python/gudhi/wasserstein.py | 125 ---------------------- src/python/gudhi/wasserstein/__init__.py | 1 + src/python/gudhi/wasserstein/barycenter.py | 158 ++++++++++++++++++++++++++++ src/python/gudhi/wasserstein/wasserstein.py | 125 ++++++++++++++++++++++ 5 files changed, 284 insertions(+), 283 deletions(-) delete mode 100644 src/python/gudhi/barycenter.py delete mode 100644 src/python/gudhi/wasserstein.py create mode 100644 src/python/gudhi/wasserstein/__init__.py create mode 100644 src/python/gudhi/wasserstein/barycenter.py create mode 100644 src/python/gudhi/wasserstein/wasserstein.py (limited to 'src') diff --git a/src/python/gudhi/barycenter.py b/src/python/gudhi/barycenter.py deleted file mode 100644 index 079bcc57..00000000 --- a/src/python/gudhi/barycenter.py +++ /dev/null @@ -1,158 +0,0 @@ -# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. -# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. -# Author(s): Theo Lacombe -# -# Copyright (C) 2019 Inria -# -# Modification(s): -# - YYYY/MM Author: Description of the modification - - -import ot -import numpy as np -import scipy.spatial.distance as sc - -from gudhi.wasserstein import wasserstein_distance - - -def _mean(x, m): - ''' - :param x: a list of 2D-points, off diagonal, x_0... x_{k-1} - :param m: total amount of points taken into account, - that is we have (m-k) copies of diagonal - :returns: the weighted mean of x with (m-k) copies of the diagonal - ''' - k = len(x) - if k > 0: - w = np.mean(x, axis=0) - w_delta = (w[0] + w[1]) / 2 * np.ones(2) - return (k * w + (m-k) * w_delta) / m - else: - return np.array([0, 0]) - - -def lagrangian_barycenter(pdiagset, init=None, verbose=False): - ''' - :param pdiagset: a list of size m containing numpy.array of shape (n x 2) - (n can variate), encoding a set of - persistence diagrams with only finite coordinates. - :param init: The initial value for barycenter estimate. - If None, init is made on a random diagram from the dataset. - Otherwise, it must be an int - (then we init with diagset[init]) - or a (n x 2) numpy.array enconding - a persistence diagram with n points. - :param verbose: if True, returns additional information about the - barycenter. - :returns: If not verbose (default), a numpy.array encoding - the barycenter estimate of pdiagset - (local minima of the energy function). - If pdiagset is empty, returns None. - If verbose, returns a couple (Y, log) - where Y is the barycenter estimate, - and log is a dict that contains additional informations: - - groupings, a list of list of pairs (i,j), - That is, G[k] = [(i, j) ...], where (i,j) indicates - that X[i] is matched to Y[j] - if i = -1 or j = -1, it means they - represent the diagonal. - - energy, a float representing the Frechet - energy value obtained, - that is the mean of squared distances - of observations to the output. - - nb_iter, integer representing the number of iterations - performed before convergence of the algorithm. - ''' - X = pdiagset # to shorten notations, not a copy - m = len(X) # number of diagrams we are averaging - if m == 0: - print("Warning: computing barycenter of empty diag set. Returns None") - return None - - # store the number of off-diagonal point for each of the X_i - nb_off_diag = np.array([len(X_i) for X_i in X]) - # Initialisation of barycenter - if init is None: - i0 = np.random.randint(m) # Index of first state for the barycenter - Y = X[i0].copy() - else: - if type(init)==int: - Y = X[init].copy() - else: - Y = init.copy() - - nb_iter = 0 - - converged = False # stoping criterion - while not converged: - nb_iter += 1 - K = len(Y) # current nb of points in Y (some might be on diagonal) - G = np.full((K, m), -1, dtype=int) # will store for each j, the (index) - # point matched in each other diagram - #(might be the diagonal). - # that is G[j, i] = k <=> y_j is matched to - # x_k in the diagram i-th diagram X[i] - updated_points = np.zeros((K, 2)) # will store the new positions of - # the points of Y. - # If points disappear, there thrown - # on [0,0] by default. - new_created_points = [] # will store potential new points. - - # Step 1 : compute optimal matching (Y, X_i) for each X_i - # and create new points in Y if needed - for i in range(m): - _, indices = wasserstein_distance(Y, X[i], matching=True, order=2., internal_p=2.) - for y_j, x_i_j in indices: - if y_j >= 0: # we matched an off diagonal point to x_i_j... - if x_i_j >= 0: # ...which is also an off-diagonal point. - G[y_j, i] = x_i_j - else: # ...which is a diagonal point - G[y_j, i] = -1 # -1 stands for the diagonal (mask) - else: # We matched a diagonal point to x_i_j... - if x_i_j >= 0: # which is a off-diag point ! - # need to create new point in Y - new_y = _mean(np.array([X[i][x_i_j]]), m) - # Average this point with (m-1) copies of Delta - new_created_points.append(new_y) - - # Step 2 : Update current point position thanks to groupings computed - to_delete = [] - for j in range(K): - matched_points = [X[i][G[j, i]] for i in range(m) if G[j, i] > -1] - new_y_j = _mean(matched_points, m) - if not np.array_equal(new_y_j, np.array([0,0])): - updated_points[j] = new_y_j - else: # this points is no longer of any use. - to_delete.append(j) - # we remove the point to be deleted now. - updated_points = np.delete(updated_points, to_delete, axis=0) - - # we cannot converge if there have been new created points. - if new_created_points: - Y = np.concatenate((updated_points, new_created_points)) - else: - # Step 3 : we check convergence - if np.array_equal(updated_points, Y): - converged = True - Y = updated_points - - - if verbose: - groupings = [] - energy = 0 - log = {} - n_y = len(Y) - for i in range(m): - cost, edges = wasserstein_distance(Y, X[i], matching=True, order=2., internal_p=2.) - groupings.append(edges) - energy += cost - log["groupings"] = groupings - energy = energy/m - print(energy) - log["energy"] = energy - log["nb_iter"] = nb_iter - - return Y, log - else: - return Y - diff --git a/src/python/gudhi/wasserstein.py b/src/python/gudhi/wasserstein.py deleted file mode 100644 index 51d1d83c..00000000 --- a/src/python/gudhi/wasserstein.py +++ /dev/null @@ -1,125 +0,0 @@ -# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. -# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. -# Author(s): Theo Lacombe -# -# Copyright (C) 2019 Inria -# -# Modification(s): -# - YYYY/MM Author: Description of the modification - -import numpy as np -import scipy.spatial.distance as sc -import gudhi.barycenter as barycenter -try: - import ot -except ImportError: - print("POT (Python Optimal Transport) package is not installed. Try to run $ conda install -c conda-forge pot ; or $ pip install POT") - -def _proj_on_diag(X): - ''' - :param X: (n x 2) array encoding the points of a persistent diagram. - :returns: (n x 2) array encoding the (respective orthogonal) projections of the points onto the diagonal - ''' - Z = (X[:,0] + X[:,1]) / 2. - return np.array([Z , Z]).T - - -def _build_dist_matrix(X, Y, order=2., internal_p=2.): - ''' - :param X: (n x 2) numpy.array encoding the (points of the) first diagram. - :param Y: (m x 2) numpy.array encoding the second diagram. - :param order: exponent for the Wasserstein metric. - :param internal_p: Ground metric (i.e. norm L^p). - :returns: (n+1) x (m+1) np.array encoding the cost matrix C. - For 0 <= i < n, 0 <= j < m, C[i,j] encodes the distance between X[i] and Y[j], - while C[i, m] (resp. C[n, j]) encodes the distance (to the p) between X[i] (resp Y[j]) - and its orthogonal projection onto the diagonal. - note also that C[n, m] = 0 (it costs nothing to move from the diagonal to the diagonal). - ''' - Xdiag = _proj_on_diag(X) - Ydiag = _proj_on_diag(Y) - if np.isinf(internal_p): - C = sc.cdist(X,Y, metric='chebyshev')**order - Cxd = np.linalg.norm(X - Xdiag, ord=internal_p, axis=1)**order - Cdy = np.linalg.norm(Y - Ydiag, ord=internal_p, axis=1)**order - else: - C = sc.cdist(X,Y, metric='minkowski', p=internal_p)**order - Cxd = np.linalg.norm(X - Xdiag, ord=internal_p, axis=1)**order - Cdy = np.linalg.norm(Y - Ydiag, ord=internal_p, axis=1)**order - Cf = np.hstack((C, Cxd[:,None])) - Cdy = np.append(Cdy, 0) - - Cf = np.vstack((Cf, Cdy[None,:])) - - return Cf - - -def _perstot(X, order, internal_p): - ''' - :param X: (n x 2) numpy.array (points of a given diagram). - :param order: exponent for Wasserstein. Default value is 2. - :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); Default value is 2 (Euclidean norm). - :returns: float, the total persistence of the diagram (that is, its distance to the empty diagram). - ''' - Xdiag = _proj_on_diag(X) - return (np.sum(np.linalg.norm(X - Xdiag, ord=internal_p, axis=1)**order))**(1./order) - - -def wasserstein_distance(X, Y, matching=False, order=2., internal_p=2.): - ''' - :param X: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points - (i.e. with infinite coordinate). - :param Y: (m x 2) numpy.array encoding the second diagram. - :param matching: if True, computes and returns the optimal matching between X and Y, encoded as - a (n x 2) np.array [...[i,j]...], meaning the i-th point in X is matched to - the j-th point in Y, with the convention (-1) represents the diagonal. - :param order: exponent for Wasserstein; Default value is 2. - :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); - Default value is 2 (Euclidean norm). - :returns: the Wasserstein distance of order q (1 <= q < infinity) between persistence diagrams with - respect to the internal_p-norm as ground metric. - If matching is set to True, also returns the optimal matching between X and Y. - ''' - n = len(X) - m = len(Y) - - # handle empty diagrams - if X.size == 0: - if Y.size == 0: - if not matching: - return 0. - else: - return 0., np.array([]) - else: - if not matching: - return _perstot(Y, order, internal_p) - else: - return _perstot(Y, order, internal_p), np.array([[-1, j] for j in range(m)]) - elif Y.size == 0: - if not matching: - return _perstot(X, order, internal_p) - else: - return _perstot(X, order, internal_p), np.array([[i, -1] for i in range(n)]) - - M = _build_dist_matrix(X, Y, order=order, internal_p=internal_p) - a = np.ones(n+1) # weight vector of the input diagram. Uniform here. - a[-1] = m - b = np.ones(m+1) # weight vector of the input diagram. Uniform here. - b[-1] = n - - if matching: - P = ot.emd(a=a,b=b,M=M, numItermax=2000000) - ot_cost = np.sum(np.multiply(P,M)) - P[-1, -1] = 0 # Remove matching corresponding to the diagonal - match = np.argwhere(P) - # Now we turn to -1 points encoding the diagonal - match[:,0][match[:,0] >= n] = -1 - match[:,1][match[:,1] >= m] = -1 - return ot_cost ** (1./order) , match - - # Comptuation of the otcost using the ot.emd2 library. - # Note: it is the Wasserstein distance to the power q. - # The default numItermax=100000 is not sufficient for some examples with 5000 points, what is a good value? - ot_cost = ot.emd2(a, b, M, numItermax=2000000) - - return ot_cost ** (1./order) diff --git a/src/python/gudhi/wasserstein/__init__.py b/src/python/gudhi/wasserstein/__init__.py new file mode 100644 index 00000000..ed225ba4 --- /dev/null +++ b/src/python/gudhi/wasserstein/__init__.py @@ -0,0 +1 @@ +from .wasserstein import wasserstein_distance diff --git a/src/python/gudhi/wasserstein/barycenter.py b/src/python/gudhi/wasserstein/barycenter.py new file mode 100644 index 00000000..079bcc57 --- /dev/null +++ b/src/python/gudhi/wasserstein/barycenter.py @@ -0,0 +1,158 @@ +# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. +# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. +# Author(s): Theo Lacombe +# +# Copyright (C) 2019 Inria +# +# Modification(s): +# - YYYY/MM Author: Description of the modification + + +import ot +import numpy as np +import scipy.spatial.distance as sc + +from gudhi.wasserstein import wasserstein_distance + + +def _mean(x, m): + ''' + :param x: a list of 2D-points, off diagonal, x_0... x_{k-1} + :param m: total amount of points taken into account, + that is we have (m-k) copies of diagonal + :returns: the weighted mean of x with (m-k) copies of the diagonal + ''' + k = len(x) + if k > 0: + w = np.mean(x, axis=0) + w_delta = (w[0] + w[1]) / 2 * np.ones(2) + return (k * w + (m-k) * w_delta) / m + else: + return np.array([0, 0]) + + +def lagrangian_barycenter(pdiagset, init=None, verbose=False): + ''' + :param pdiagset: a list of size m containing numpy.array of shape (n x 2) + (n can variate), encoding a set of + persistence diagrams with only finite coordinates. + :param init: The initial value for barycenter estimate. + If None, init is made on a random diagram from the dataset. + Otherwise, it must be an int + (then we init with diagset[init]) + or a (n x 2) numpy.array enconding + a persistence diagram with n points. + :param verbose: if True, returns additional information about the + barycenter. + :returns: If not verbose (default), a numpy.array encoding + the barycenter estimate of pdiagset + (local minima of the energy function). + If pdiagset is empty, returns None. + If verbose, returns a couple (Y, log) + where Y is the barycenter estimate, + and log is a dict that contains additional informations: + - groupings, a list of list of pairs (i,j), + That is, G[k] = [(i, j) ...], where (i,j) indicates + that X[i] is matched to Y[j] + if i = -1 or j = -1, it means they + represent the diagonal. + - energy, a float representing the Frechet + energy value obtained, + that is the mean of squared distances + of observations to the output. + - nb_iter, integer representing the number of iterations + performed before convergence of the algorithm. + ''' + X = pdiagset # to shorten notations, not a copy + m = len(X) # number of diagrams we are averaging + if m == 0: + print("Warning: computing barycenter of empty diag set. Returns None") + return None + + # store the number of off-diagonal point for each of the X_i + nb_off_diag = np.array([len(X_i) for X_i in X]) + # Initialisation of barycenter + if init is None: + i0 = np.random.randint(m) # Index of first state for the barycenter + Y = X[i0].copy() + else: + if type(init)==int: + Y = X[init].copy() + else: + Y = init.copy() + + nb_iter = 0 + + converged = False # stoping criterion + while not converged: + nb_iter += 1 + K = len(Y) # current nb of points in Y (some might be on diagonal) + G = np.full((K, m), -1, dtype=int) # will store for each j, the (index) + # point matched in each other diagram + #(might be the diagonal). + # that is G[j, i] = k <=> y_j is matched to + # x_k in the diagram i-th diagram X[i] + updated_points = np.zeros((K, 2)) # will store the new positions of + # the points of Y. + # If points disappear, there thrown + # on [0,0] by default. + new_created_points = [] # will store potential new points. + + # Step 1 : compute optimal matching (Y, X_i) for each X_i + # and create new points in Y if needed + for i in range(m): + _, indices = wasserstein_distance(Y, X[i], matching=True, order=2., internal_p=2.) + for y_j, x_i_j in indices: + if y_j >= 0: # we matched an off diagonal point to x_i_j... + if x_i_j >= 0: # ...which is also an off-diagonal point. + G[y_j, i] = x_i_j + else: # ...which is a diagonal point + G[y_j, i] = -1 # -1 stands for the diagonal (mask) + else: # We matched a diagonal point to x_i_j... + if x_i_j >= 0: # which is a off-diag point ! + # need to create new point in Y + new_y = _mean(np.array([X[i][x_i_j]]), m) + # Average this point with (m-1) copies of Delta + new_created_points.append(new_y) + + # Step 2 : Update current point position thanks to groupings computed + to_delete = [] + for j in range(K): + matched_points = [X[i][G[j, i]] for i in range(m) if G[j, i] > -1] + new_y_j = _mean(matched_points, m) + if not np.array_equal(new_y_j, np.array([0,0])): + updated_points[j] = new_y_j + else: # this points is no longer of any use. + to_delete.append(j) + # we remove the point to be deleted now. + updated_points = np.delete(updated_points, to_delete, axis=0) + + # we cannot converge if there have been new created points. + if new_created_points: + Y = np.concatenate((updated_points, new_created_points)) + else: + # Step 3 : we check convergence + if np.array_equal(updated_points, Y): + converged = True + Y = updated_points + + + if verbose: + groupings = [] + energy = 0 + log = {} + n_y = len(Y) + for i in range(m): + cost, edges = wasserstein_distance(Y, X[i], matching=True, order=2., internal_p=2.) + groupings.append(edges) + energy += cost + log["groupings"] = groupings + energy = energy/m + print(energy) + log["energy"] = energy + log["nb_iter"] = nb_iter + + return Y, log + else: + return Y + diff --git a/src/python/gudhi/wasserstein/wasserstein.py b/src/python/gudhi/wasserstein/wasserstein.py new file mode 100644 index 00000000..e1233eec --- /dev/null +++ b/src/python/gudhi/wasserstein/wasserstein.py @@ -0,0 +1,125 @@ +# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. +# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. +# Author(s): Theo Lacombe +# +# Copyright (C) 2019 Inria +# +# Modification(s): +# - YYYY/MM Author: Description of the modification + +import numpy as np +import scipy.spatial.distance as sc + +try: + import ot +except ImportError: + print("POT (Python Optimal Transport) package is not installed. Try to run $ conda install -c conda-forge pot ; or $ pip install POT") + +def _proj_on_diag(X): + ''' + :param X: (n x 2) array encoding the points of a persistent diagram. + :returns: (n x 2) array encoding the (respective orthogonal) projections of the points onto the diagonal + ''' + Z = (X[:,0] + X[:,1]) / 2. + return np.array([Z , Z]).T + + +def _build_dist_matrix(X, Y, order=2., internal_p=2.): + ''' + :param X: (n x 2) numpy.array encoding the (points of the) first diagram. + :param Y: (m x 2) numpy.array encoding the second diagram. + :param order: exponent for the Wasserstein metric. + :param internal_p: Ground metric (i.e. norm L^p). + :returns: (n+1) x (m+1) np.array encoding the cost matrix C. + For 0 <= i < n, 0 <= j < m, C[i,j] encodes the distance between X[i] and Y[j], + while C[i, m] (resp. C[n, j]) encodes the distance (to the p) between X[i] (resp Y[j]) + and its orthogonal projection onto the diagonal. + note also that C[n, m] = 0 (it costs nothing to move from the diagonal to the diagonal). + ''' + Xdiag = _proj_on_diag(X) + Ydiag = _proj_on_diag(Y) + if np.isinf(internal_p): + C = sc.cdist(X,Y, metric='chebyshev')**order + Cxd = np.linalg.norm(X - Xdiag, ord=internal_p, axis=1)**order + Cdy = np.linalg.norm(Y - Ydiag, ord=internal_p, axis=1)**order + else: + C = sc.cdist(X,Y, metric='minkowski', p=internal_p)**order + Cxd = np.linalg.norm(X - Xdiag, ord=internal_p, axis=1)**order + Cdy = np.linalg.norm(Y - Ydiag, ord=internal_p, axis=1)**order + Cf = np.hstack((C, Cxd[:,None])) + Cdy = np.append(Cdy, 0) + + Cf = np.vstack((Cf, Cdy[None,:])) + + return Cf + + +def _perstot(X, order, internal_p): + ''' + :param X: (n x 2) numpy.array (points of a given diagram). + :param order: exponent for Wasserstein. Default value is 2. + :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); Default value is 2 (Euclidean norm). + :returns: float, the total persistence of the diagram (that is, its distance to the empty diagram). + ''' + Xdiag = _proj_on_diag(X) + return (np.sum(np.linalg.norm(X - Xdiag, ord=internal_p, axis=1)**order))**(1./order) + + +def wasserstein_distance(X, Y, matching=False, order=2., internal_p=2.): + ''' + :param X: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points + (i.e. with infinite coordinate). + :param Y: (m x 2) numpy.array encoding the second diagram. + :param matching: if True, computes and returns the optimal matching between X and Y, encoded as + a (n x 2) np.array [...[i,j]...], meaning the i-th point in X is matched to + the j-th point in Y, with the convention (-1) represents the diagonal. + :param order: exponent for Wasserstein; Default value is 2. + :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); + Default value is 2 (Euclidean norm). + :returns: the Wasserstein distance of order q (1 <= q < infinity) between persistence diagrams with + respect to the internal_p-norm as ground metric. + If matching is set to True, also returns the optimal matching between X and Y. + ''' + n = len(X) + m = len(Y) + + # handle empty diagrams + if X.size == 0: + if Y.size == 0: + if not matching: + return 0. + else: + return 0., np.array([]) + else: + if not matching: + return _perstot(Y, order, internal_p) + else: + return _perstot(Y, order, internal_p), np.array([[-1, j] for j in range(m)]) + elif Y.size == 0: + if not matching: + return _perstot(X, order, internal_p) + else: + return _perstot(X, order, internal_p), np.array([[i, -1] for i in range(n)]) + + M = _build_dist_matrix(X, Y, order=order, internal_p=internal_p) + a = np.ones(n+1) # weight vector of the input diagram. Uniform here. + a[-1] = m + b = np.ones(m+1) # weight vector of the input diagram. Uniform here. + b[-1] = n + + if matching: + P = ot.emd(a=a,b=b,M=M, numItermax=2000000) + ot_cost = np.sum(np.multiply(P,M)) + P[-1, -1] = 0 # Remove matching corresponding to the diagonal + match = np.argwhere(P) + # Now we turn to -1 points encoding the diagonal + match[:,0][match[:,0] >= n] = -1 + match[:,1][match[:,1] >= m] = -1 + return ot_cost ** (1./order) , match + + # Comptuation of the otcost using the ot.emd2 library. + # Note: it is the Wasserstein distance to the power q. + # The default numItermax=100000 is not sufficient for some examples with 5000 points, what is a good value? + ot_cost = ot.emd2(a, b, M, numItermax=2000000) + + return ot_cost ** (1./order) -- cgit v1.2.3 From 266f1eb706ecf31733acbcdded3b04d8d270fb60 Mon Sep 17 00:00:00 2001 From: tlacombe Date: Tue, 31 Mar 2020 17:43:53 +0200 Subject: update CMakeLists to make things compatible with wasserstein/ repo --- src/python/CMakeLists.txt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'src') diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index b7d43bea..a91ca30a 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -56,7 +56,6 @@ if(PYTHONINTERP_FOUND) # Modules that should not be auto-imported in __init__.py set(GUDHI_PYTHON_MODULES_EXTRA "${GUDHI_PYTHON_MODULES_EXTRA}'representations', ") set(GUDHI_PYTHON_MODULES_EXTRA "${GUDHI_PYTHON_MODULES_EXTRA}'wasserstein', ") - set(GUDHI_PYTHON_MODULES_EXTRA "${GUDHI_PYTHON_MODULES_EXTRA}'barycenter', ") set(GUDHI_PYTHON_MODULES_EXTRA "${GUDHI_PYTHON_MODULES_EXTRA}'point_cloud', ") add_gudhi_debug_info("Python version ${PYTHON_VERSION_STRING}") @@ -217,8 +216,7 @@ if(PYTHONINTERP_FOUND) # Other .py files file(COPY "gudhi/persistence_graphical_tools.py" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi") file(COPY "gudhi/representations" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi/") - file(COPY "gudhi/wasserstein.py" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi") - file(COPY "gudhi/barycenter.py" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi") + file(COPY "gudhi/wasserstein" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi") file(COPY "gudhi/point_cloud" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi") add_custom_command( -- cgit v1.2.3 From af76331b5b4c709f46a3d705320bfedcf3a60924 Mon Sep 17 00:00:00 2001 From: tlacombe Date: Tue, 31 Mar 2020 18:08:05 +0200 Subject: correction typo user.rst --- src/python/doc/wasserstein_distance_user.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/python/doc/wasserstein_distance_user.rst b/src/python/doc/wasserstein_distance_user.rst index a077f9a4..c6d49db1 100644 --- a/src/python/doc/wasserstein_distance_user.rst +++ b/src/python/doc/wasserstein_distance_user.rst @@ -128,7 +128,7 @@ per diagram). Basic example ------------- -This example computes the Frechet mean (aka Wasserstein barycenter) between +This example estimates the Frechet mean (aka Wasserstein barycenter) between four persistence diagrams. It is initialized on the 4th diagram. As the algorithm is not convex, its output depends on the initialization and @@ -143,7 +143,7 @@ Note that persistence diagrams must be submitted as .. testcode:: - import gudhi.barycenter + from gudhi.wasserstein.barycenter import lagrangian_barycenter import numpy as np dg1 = np.array([[0.2, 0.5]]) @@ -151,7 +151,7 @@ Note that persistence diagrams must be submitted as dg3 = np.array([[0.3, 0.6], [0.7, 0.8], [0.2, 0.3]]) dg4 = np.array([]) pdiagset = [dg1, dg2, dg3, dg4] - bary = gudhi.wasserstein.barycenter.lagrangian_barycenter(pdiagset=pdiagset,init=3) + bary = lagrangian_barycenter(pdiagset=pdiagset,init=3) message = "Wasserstein barycenter estimated:" print(message) -- cgit v1.2.3 From 9d89f57376e619515d99ad88c2cdeef35daaedd5 Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Wed, 1 Apr 2020 09:04:18 +0200 Subject: code review: use operator[] instead of at() --- src/Alpha_complex/include/gudhi/Alpha_complex.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/Alpha_complex/include/gudhi/Alpha_complex.h b/src/Alpha_complex/include/gudhi/Alpha_complex.h index eb4ef427..4369071c 100644 --- a/src/Alpha_complex/include/gudhi/Alpha_complex.h +++ b/src/Alpha_complex/include/gudhi/Alpha_complex.h @@ -248,6 +248,16 @@ class Alpha_complex { } } + /** \brief get_point_ returns the point corresponding to the vertex given as parameter. + * Only for internal use for faster access. + * + * @param[in] vertex Vertex handle of the point to retrieve. + * @return The point found. + */ + const Point_d& get_point_(std::size_t vertex) const { + return vertex_handle_to_iterator_[vertex]->point(); + } + template auto& get_cache(SimplicialComplexForAlpha& cplx, typename SimplicialComplexForAlpha::Simplex_handle s) { auto k = cplx.key(s); @@ -258,7 +268,7 @@ class Alpha_complex { thread_local std::vector v; v.clear(); for (auto vertex : cplx.simplex_vertex_range(s)) - v.push_back(get_point(vertex)); + v.push_back(get_point_(vertex)); Point_d c = kernel_.construct_circumcenter_d_object()(v.cbegin(), v.cend()); typename Kernel::FT r = kernel_.squared_distance_d_object()(c, v[0]); cache_.emplace_back(std::move(c), std::move(r)); @@ -423,7 +433,7 @@ class Alpha_complex { while(shortiter != enditer && *longiter == *shortiter) { ++longiter; ++shortiter; } Vertex_handle extra = *longiter; auto const& cache=get_cache(complex, f_boundary); - bool is_gab = kernel_.squared_distance_d_object()(cache.first, get_point(extra)) >= cache.second; + bool is_gab = kernel_.squared_distance_d_object()(cache.first, get_point_(extra)) >= cache.second; #ifdef DEBUG_TRACES std::clog << " | Tau is_gabriel(Sigma)=" << is_gab << " - vertexForGabriel=" << extra << std::endl; #endif // DEBUG_TRACES -- cgit v1.2.3 From cfcbe923f132a770363e6a240df8f6911cdd39e9 Mon Sep 17 00:00:00 2001 From: tlacombe Date: Wed, 1 Apr 2020 10:34:48 +0200 Subject: improved doc, turns Basic examples as subsections using * --- src/python/doc/wasserstein_distance_sum.inc | 6 +++--- src/python/doc/wasserstein_distance_user.rst | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/python/doc/wasserstein_distance_sum.inc b/src/python/doc/wasserstein_distance_sum.inc index f10472bc..f9308e5e 100644 --- a/src/python/doc/wasserstein_distance_sum.inc +++ b/src/python/doc/wasserstein_distance_sum.inc @@ -4,10 +4,10 @@ +-----------------------------------------------------------------+----------------------------------------------------------------------+------------------------------------------------------------------+ | .. figure:: | The q-Wasserstein distance measures the similarity between two | :Author: Theo Lacombe | | ../../doc/Bottleneck_distance/perturb_pd.png | persistence diagrams using the sum of all edges lengths (instead of | | - | :figclass: align-center | the maximum). It allows to define sophisticated objects such as | :Introduced in: GUDHI 3.1.0 | + | :figclass: align-center | the maximum). It allows to define sophisticated objects such as | :Since: GUDHI 3.1.0 | | | barycenters of a family of persistence diagrams. | | - | Wasserstein distance is the q-th root of the sum of the | | :Copyright: MIT | - | edge lengths to the power q. | | | + | | | :License: MIT | + | | | | | | | :Requires: Python Optimal Transport (POT) :math:`\geq` 0.5.1 | +-----------------------------------------------------------------+----------------------------------------------------------------------+------------------------------------------------------------------+ | * :doc:`wasserstein_distance_user` | | diff --git a/src/python/doc/wasserstein_distance_user.rst b/src/python/doc/wasserstein_distance_user.rst index c6d49db1..c5c250b5 100644 --- a/src/python/doc/wasserstein_distance_user.rst +++ b/src/python/doc/wasserstein_distance_user.rst @@ -9,7 +9,7 @@ Definition .. include:: wasserstein_distance_sum.inc -The q-Wasserstein distance is defined as the minimal value +The q-Wasserstein distance is defined as the minimal value achieved by a perfect matching between the points of the two diagrams (+ all diagonal points), where the value of a matching is defined as the q-th root of the sum of all edge lengths to the power q. Edge lengths @@ -32,7 +32,7 @@ Morozov, and Arnur Nigmetov. .. autofunction:: gudhi.hera.wasserstein_distance Basic example -------------- +************* This example computes the 1-Wasserstein distance from 2 persistence diagrams with Euclidean ground metric. Note that persistence diagrams must be submitted as (n x 2) numpy arrays and must not contain inf values. @@ -123,10 +123,10 @@ per diagram). diagrams. -.. autofunction:: gudhi.barycenter.lagrangian_barycenter +.. autofunction:: gudhi.wasserstein.barycenter.lagrangian_barycenter Basic example -------------- +************* This example estimates the Frechet mean (aka Wasserstein barycenter) between four persistence diagrams. @@ -135,7 +135,7 @@ As the algorithm is not convex, its output depends on the initialization and is only a local minimum of the objective function. Initialization can be either given as an integer (in which case the i-th diagram of the list is used as initial estimate) or as a diagram. -If None, it will randomly select one of the diagram of the list +If None, it will randomly select one of the diagrams of the list as initial estimate. Note that persistence diagrams must be submitted as (n x 2) numpy arrays and must not contain inf values. -- cgit v1.2.3 From 0b19e1f991fdebbdb622d3101135eaee65c4ed5d Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Wed, 1 Apr 2020 14:45:37 +0200 Subject: Split the cache per dimension Try to reduce slightly the memory use. --- src/Alpha_complex/include/gudhi/Alpha_complex.h | 33 +++++++++++++++++++------ 1 file changed, 25 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/Alpha_complex/include/gudhi/Alpha_complex.h b/src/Alpha_complex/include/gudhi/Alpha_complex.h index 4369071c..ba91998d 100644 --- a/src/Alpha_complex/include/gudhi/Alpha_complex.h +++ b/src/Alpha_complex/include/gudhi/Alpha_complex.h @@ -112,9 +112,6 @@ class Alpha_complex { typedef typename Kernel::Side_of_bounded_sphere_d Is_Gabriel; typedef typename Kernel::Point_dimension_d Point_Dimension; - // Type required to compute squared radius, or side of bounded sphere on a vector of points. - typedef typename std::vector Vector_of_CGAL_points; - // Vertex_iterator type from CGAL. typedef typename Delaunay_triangulation::Vertex_iterator CGAL_vertex_iterator; @@ -124,6 +121,9 @@ class Alpha_complex { // Structure to switch from simplex tree vertex handle to CGAL vertex iterator. typedef typename std::vector< CGAL_vertex_iterator > Vector_vertex_iterator; + // Numeric type of coordinates in the kernel + typedef typename Kernel::FT FT; + private: /** \brief Vertex iterator vector to switch from simplex tree vertex handle to CGAL vertex iterator. * Vertex handles are inserted sequentially, starting at 0.*/ @@ -133,7 +133,7 @@ class Alpha_complex { /** \brief Kernel for triangulation_ functions access.*/ Kernel kernel_; /** \brief Cache for geometric constructions: circumcenter and squared radius of a simplex.*/ - std::vector> cache_; + std::vector> cache_, old_cache_; public: /** \brief Alpha_complex constructor from an OFF file name. @@ -258,24 +258,39 @@ class Alpha_complex { return vertex_handle_to_iterator_[vertex]->point(); } + /// Return a reference to the circumcenter and circumradius, writing them in the cache if necessary. template auto& get_cache(SimplicialComplexForAlpha& cplx, typename SimplicialComplexForAlpha::Simplex_handle s) { auto k = cplx.key(s); if(k==cplx.null_key()){ k = cache_.size(); cplx.assign_key(s, k); - // Use a transform_range? Check the impact on perf. + // Using a transform_range is slower, currently. thread_local std::vector v; v.clear(); for (auto vertex : cplx.simplex_vertex_range(s)) v.push_back(get_point_(vertex)); Point_d c = kernel_.construct_circumcenter_d_object()(v.cbegin(), v.cend()); - typename Kernel::FT r = kernel_.squared_distance_d_object()(c, v[0]); + FT r = kernel_.squared_distance_d_object()(c, v[0]); cache_.emplace_back(std::move(c), std::move(r)); } return cache_[k]; } + /// Return the circumradius, either from the old cache or computed, without writing to the cache. + template + auto radius(SimplicialComplexForAlpha& cplx, typename SimplicialComplexForAlpha::Simplex_handle s) { + auto k = cplx.key(s); + if(k!=cplx.null_key()) + return old_cache_[k].second; + // Using a transform_range is slower, currently. + thread_local std::vector v; + v.clear(); + for (auto vertex : cplx.simplex_vertex_range(s)) + v.push_back(get_point_(vertex)); + return kernel_.compute_squared_radius_d_object()(v.cbegin(), v.cend()); + } + public: /** \brief Inserts all Delaunay triangulation into the simplicial complex. * It also computes the filtration values accordingly to the \ref createcomplexalgorithm if default_filtration_value @@ -365,11 +380,11 @@ class Alpha_complex { Filtration_value alpha_complex_filtration = 0.0; // No need to compute squared_radius on a single point - alpha is 0.0 if (f_simplex_dim > 0) { - auto const& sqrad = get_cache(complex, f_simplex).second; + auto const& sqrad = radius(complex, f_simplex); #if CGAL_VERSION_NR >= 1050000000 if(exact) CGAL::exact(sqrad); #endif - CGAL::NT_converter cv; + CGAL::NT_converter cv; alpha_complex_filtration = cv(sqrad); } complex.assign_filtration(f_simplex, alpha_complex_filtration); @@ -382,6 +397,8 @@ class Alpha_complex { propagate_alpha_filtration(complex, f_simplex); } } + old_cache_ = std::move(cache_); + cache_.clear(); } // -------------------------------------------------------------------------------------------- -- cgit v1.2.3 From c36080ab9e478cd0d44bfd8d5bb8f4726a8aa937 Mon Sep 17 00:00:00 2001 From: tlacombe Date: Wed, 1 Apr 2020 20:24:01 +0200 Subject: improved doc readability --- src/python/gudhi/wasserstein/barycenter.py | 54 ++++++++++++++++-------------- 1 file changed, 28 insertions(+), 26 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/wasserstein/barycenter.py b/src/python/gudhi/wasserstein/barycenter.py index 079bcc57..fae6b68f 100644 --- a/src/python/gudhi/wasserstein/barycenter.py +++ b/src/python/gudhi/wasserstein/barycenter.py @@ -33,35 +33,37 @@ def _mean(x, m): def lagrangian_barycenter(pdiagset, init=None, verbose=False): ''' - :param pdiagset: a list of size m containing numpy.array of shape (n x 2) - (n can variate), encoding a set of + :param pdiagset: a list of ``numpy.array`` of shape `(n x 2)` + (`n` can variate), encoding a set of persistence diagrams with only finite coordinates. :param init: The initial value for barycenter estimate. - If None, init is made on a random diagram from the dataset. - Otherwise, it must be an int - (then we init with diagset[init]) - or a (n x 2) numpy.array enconding - a persistence diagram with n points. - :param verbose: if True, returns additional information about the + If ``None``, init is made on a random diagram from the dataset. + Otherwise, it can be an ``int`` + (then initialization is made on ``pdiagset[init]``) + or a `(n x 2)` ``numpy.array`` enconding + a persistence diagram with `n` points. + :type init: int, (n x 2) np.array + :param verbose: if ``True``, returns additional information about the barycenter. - :returns: If not verbose (default), a numpy.array encoding - the barycenter estimate of pdiagset - (local minima of the energy function). - If pdiagset is empty, returns None. - If verbose, returns a couple (Y, log) - where Y is the barycenter estimate, - and log is a dict that contains additional informations: - - groupings, a list of list of pairs (i,j), - That is, G[k] = [(i, j) ...], where (i,j) indicates - that X[i] is matched to Y[j] - if i = -1 or j = -1, it means they - represent the diagonal. - - energy, a float representing the Frechet - energy value obtained, - that is the mean of squared distances - of observations to the output. - - nb_iter, integer representing the number of iterations - performed before convergence of the algorithm. + :type verbose: boolean + :returns: If not verbose (default), a ``numpy.array`` encoding + the barycenter estimate of pdiagset + (local minimum of the energy function). + If ``pdiagset`` is empty, returns ``None``. + If verbose, returns a couple ``(Y, log)`` + where ``Y`` is the barycenter estimate, + and ``log`` is a ``dict`` that contains additional informations: + + - `"groupings"`, a list of list of pairs ``(i,j)``. + Namely, ``G[k] = [...(i, j)...]``, where ``(i,j)`` indicates + that ``pdiagset[k][i]`` is matched to ``Y[j]`` + if ``i = -1`` or ``j = -1``, it means they + represent the diagonal. + + - `"energy"`, ``float`` representing the Frechet energy value obtained. + It is the mean of squared distances of observations to the output. + + - `"nb_iter"`, ``int`` number of iterations performed before convergence of the algorithm. ''' X = pdiagset # to shorten notations, not a copy m = len(X) # number of diagrams we are averaging -- cgit v1.2.3 From 731358cbfe3880b02a58c70923b5a990ddff7644 Mon Sep 17 00:00:00 2001 From: tlacombe Date: Wed, 1 Apr 2020 20:27:27 +0200 Subject: improved doc, adding double quot for init --- src/python/gudhi/wasserstein/barycenter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/python/gudhi/wasserstein/barycenter.py b/src/python/gudhi/wasserstein/barycenter.py index fae6b68f..e879b7dd 100644 --- a/src/python/gudhi/wasserstein/barycenter.py +++ b/src/python/gudhi/wasserstein/barycenter.py @@ -42,7 +42,7 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False): (then initialization is made on ``pdiagset[init]``) or a `(n x 2)` ``numpy.array`` enconding a persistence diagram with `n` points. - :type init: int, (n x 2) np.array + :type init: ``int``, or (n x 2) ``np.array`` :param verbose: if ``True``, returns additional information about the barycenter. :type verbose: boolean -- cgit v1.2.3 From 4cfe8411f808f52bee0ba37e28fa9f6cc3519abb Mon Sep 17 00:00:00 2001 From: tlacombe Date: Fri, 3 Apr 2020 17:27:47 +0200 Subject: removed the print of energy in verbose mode, added by error --- src/python/gudhi/wasserstein/barycenter.py | 1 - 1 file changed, 1 deletion(-) (limited to 'src') diff --git a/src/python/gudhi/wasserstein/barycenter.py b/src/python/gudhi/wasserstein/barycenter.py index e879b7dd..99f29a1e 100644 --- a/src/python/gudhi/wasserstein/barycenter.py +++ b/src/python/gudhi/wasserstein/barycenter.py @@ -150,7 +150,6 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False): energy += cost log["groupings"] = groupings energy = energy/m - print(energy) log["energy"] = energy log["nb_iter"] = nb_iter -- cgit v1.2.3 From 6acbc89d185d1c537778fb2d4a8503bab61fca31 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Fri, 3 Apr 2020 21:04:52 +0200 Subject: Split compute_persistence from get_persistence. --- src/python/gudhi/cubical_complex.pyx | 6 +++-- src/python/gudhi/periodic_cubical_complex.pyx | 6 +++-- src/python/gudhi/simplex_tree.pxd | 3 ++- src/python/gudhi/simplex_tree.pyx | 6 +++-- .../include/Persistent_cohomology_interface.h | 29 ++++++++++++---------- 5 files changed, 30 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/cubical_complex.pyx b/src/python/gudhi/cubical_complex.pyx index d5ad1266..ce844558 100644 --- a/src/python/gudhi/cubical_complex.pyx +++ b/src/python/gudhi/cubical_complex.pyx @@ -35,7 +35,8 @@ cdef extern from "Cubical_complex_interface.h" namespace "Gudhi": cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi": cdef cppclass Cubical_complex_persistence_interface "Gudhi::Persistent_cohomology_interface>": Cubical_complex_persistence_interface(Bitmap_cubical_complex_base_interface * st, bool persistence_dim_max) - vector[pair[int, pair[double, double]]] get_persistence(int homology_coeff_field, double min_persistence) + void compute_persistence(int homology_coeff_field, double min_persistence) + vector[pair[int, pair[double, double]]] get_persistence() vector[int] betti_numbers() vector[int] persistent_betti_numbers(double from_value, double to_value) vector[pair[double,double]] intervals_in_dimension(int dimension) @@ -149,7 +150,8 @@ cdef class CubicalComplex: self.pcohptr = new Cubical_complex_persistence_interface(self.thisptr, True) cdef vector[pair[int, pair[double, double]]] persistence_result if self.pcohptr != NULL: - persistence_result = self.pcohptr.get_persistence(homology_coeff_field, min_persistence) + self.pcohptr.compute_persistence(homology_coeff_field, min_persistence) + persistence_result = self.pcohptr.get_persistence() return persistence_result def betti_numbers(self): diff --git a/src/python/gudhi/periodic_cubical_complex.pyx b/src/python/gudhi/periodic_cubical_complex.pyx index fd08b976..ff5ef3bd 100644 --- a/src/python/gudhi/periodic_cubical_complex.pyx +++ b/src/python/gudhi/periodic_cubical_complex.pyx @@ -32,7 +32,8 @@ cdef extern from "Cubical_complex_interface.h" namespace "Gudhi": cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi": cdef cppclass Periodic_cubical_complex_persistence_interface "Gudhi::Persistent_cohomology_interface>>": Periodic_cubical_complex_persistence_interface(Periodic_cubical_complex_base_interface * st, bool persistence_dim_max) - vector[pair[int, pair[double, double]]] get_persistence(int homology_coeff_field, double min_persistence) + void compute_persistence(int homology_coeff_field, double min_persistence) + vector[pair[int, pair[double, double]]] get_persistence() vector[int] betti_numbers() vector[int] persistent_betti_numbers(double from_value, double to_value) vector[pair[double,double]] intervals_in_dimension(int dimension) @@ -154,7 +155,8 @@ cdef class PeriodicCubicalComplex: self.pcohptr = new Periodic_cubical_complex_persistence_interface(self.thisptr, True) cdef vector[pair[int, pair[double, double]]] persistence_result if self.pcohptr != NULL: - persistence_result = self.pcohptr.get_persistence(homology_coeff_field, min_persistence) + self.pcohptr.compute_persistence(homology_coeff_field, min_persistence) + persistence_result = self.pcohptr.get_persistence() return persistence_result def betti_numbers(self): diff --git a/src/python/gudhi/simplex_tree.pxd b/src/python/gudhi/simplex_tree.pxd index 595f22bb..44040bcb 100644 --- a/src/python/gudhi/simplex_tree.pxd +++ b/src/python/gudhi/simplex_tree.pxd @@ -71,7 +71,8 @@ cdef extern from "Simplex_tree_interface.h" namespace "Gudhi": cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi": cdef cppclass Simplex_tree_persistence_interface "Gudhi::Persistent_cohomology_interface>": Simplex_tree_persistence_interface(Simplex_tree_interface_full_featured * st, bool persistence_dim_max) - vector[pair[int, pair[double, double]]] get_persistence(int homology_coeff_field, double min_persistence) + void compute_persistence(int homology_coeff_field, double min_persistence) + vector[pair[int, pair[double, double]]] get_persistence() vector[int] betti_numbers() vector[int] persistent_betti_numbers(double from_value, double to_value) vector[pair[double,double]] intervals_in_dimension(int dimension) diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index cc3753e1..69e645b4 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -443,7 +443,8 @@ cdef class SimplexTree: if self.pcohptr != NULL: del self.pcohptr self.pcohptr = new Simplex_tree_persistence_interface(self.get_ptr(), False) - persistence_result = self.pcohptr.get_persistence(homology_coeff_field, -1.) + self.pcohptr.compute_persistence(homology_coeff_field, -1.) + persistence_result = self.pcohptr.get_persistence() return self.get_ptr().compute_extended_persistence_subdiagrams(persistence_result, min_persistence) @@ -470,7 +471,8 @@ cdef class SimplexTree: self.pcohptr = new Simplex_tree_persistence_interface(self.get_ptr(), persistence_dim_max) cdef vector[pair[int, pair[double, double]]] persistence_result if self.pcohptr != NULL: - persistence_result = self.pcohptr.get_persistence(homology_coeff_field, min_persistence) + self.pcohptr.compute_persistence(homology_coeff_field, min_persistence) + persistence_result = self.pcohptr.get_persistence() return persistence_result def betti_numbers(self): diff --git a/src/python/include/Persistent_cohomology_interface.h b/src/python/include/Persistent_cohomology_interface.h index 8c79e6f3..a29ebbee 100644 --- a/src/python/include/Persistent_cohomology_interface.h +++ b/src/python/include/Persistent_cohomology_interface.h @@ -23,6 +23,7 @@ template class Persistent_cohomology_interface : public persistent_cohomology::Persistent_cohomology { private: + typedef persistent_cohomology::Persistent_cohomology Base; /* * Compare two intervals by dimension, then by length. */ @@ -43,25 +44,28 @@ persistent_cohomology::Persistent_cohomology(*stptr), + : Base(*stptr), stptr_(stptr) { } Persistent_cohomology_interface(FilteredComplex* stptr, bool persistence_dim_max) - : persistent_cohomology::Persistent_cohomology(*stptr, persistence_dim_max), + : Base(*stptr, persistence_dim_max), stptr_(stptr) { } - std::vector>> get_persistence(int homology_coeff_field, - double min_persistence) { - persistent_cohomology::Persistent_cohomology::init_coefficients(homology_coeff_field); - persistent_cohomology::Persistent_cohomology::compute_persistent_cohomology(min_persistence); + void compute_persistence(int homology_coeff_field, double min_persistence) { + Base::init_coefficients(homology_coeff_field); + Base::compute_persistent_cohomology(min_persistence); + } + + void maybe_compute_persistence(int homology_coeff_field, double min_persistence) { + // Currently get_persistent_pairs safely returns an empty vector before compute_persistent_cohomology + if(Base::get_persistent_pairs().empty()) + compute_persistence(homology_coeff_field, min_persistence); + } + std::vector>> get_persistence() { // Custom sort and output persistence cmp_intervals_by_dim_then_length cmp(stptr_); - auto persistent_pairs = persistent_cohomology::Persistent_cohomology::get_persistent_pairs(); + auto persistent_pairs = Base::get_persistent_pairs(); std::sort(std::begin(persistent_pairs), std::end(persistent_pairs), cmp); std::vector>> persistence; @@ -74,8 +78,7 @@ persistent_cohomology::Persistent_cohomology, std::vector>> persistence_pairs() { - auto pairs = persistent_cohomology::Persistent_cohomology::get_persistent_pairs(); + auto pairs = Base::get_persistent_pairs(); std::vector, std::vector>> persistence_pairs; persistence_pairs.reserve(pairs.size()); -- cgit v1.2.3 From 7830d93607257fd75f09b371e88741a517347579 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Fri, 3 Apr 2020 21:11:57 +0200 Subject: Dead code --- src/python/include/Simplex_tree_interface.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'src') diff --git a/src/python/include/Simplex_tree_interface.h b/src/python/include/Simplex_tree_interface.h index 1a18aed6..27b123f8 100644 --- a/src/python/include/Simplex_tree_interface.h +++ b/src/python/include/Simplex_tree_interface.h @@ -16,8 +16,6 @@ #include #include -#include "Persistent_cohomology_interface.h" - #include #include #include // std::pair @@ -157,11 +155,6 @@ class Simplex_tree_interface : public Simplex_tree { return new_dgm; } - void create_persistence(Gudhi::Persistent_cohomology_interface* pcoh) { - Base::initialize_filtration(); - pcoh = new Gudhi::Persistent_cohomology_interface(*this); - } - // Iterator over the simplex tree Complex_simplex_iterator get_simplices_iterator_begin() { // this specific case works because the range is just a pair of iterators - won't work if range was a vector -- cgit v1.2.3 From b2cfc0691147ca122861bc423d41495c4b444dde Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Fri, 3 Apr 2020 21:27:01 +0200 Subject: Simplify some code --- src/python/gudhi/simplex_tree.pyx | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index 69e645b4..d8bd0b79 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -413,7 +413,7 @@ cdef class SimplexTree: Note that this code creates an extra vertex internally, so you should make sure that the Simplex_tree does not contain a vertex with the largest possible value (i.e., 4294967295). """ - return self.get_ptr().compute_extended_filtration() + self.get_ptr().compute_extended_filtration() def extended_persistence(self, homology_coeff_field=11, min_persistence=0): """This function retrieves good values for extended persistence, and separate the diagrams @@ -469,11 +469,8 @@ cdef class SimplexTree: if self.pcohptr != NULL: del self.pcohptr self.pcohptr = new Simplex_tree_persistence_interface(self.get_ptr(), persistence_dim_max) - cdef vector[pair[int, pair[double, double]]] persistence_result - if self.pcohptr != NULL: - self.pcohptr.compute_persistence(homology_coeff_field, min_persistence) - persistence_result = self.pcohptr.get_persistence() - return persistence_result + self.pcohptr.compute_persistence(homology_coeff_field, min_persistence) + return self.pcohptr.get_persistence() def betti_numbers(self): """This function returns the Betti numbers of the simplicial complex. -- cgit v1.2.3 From f0224ea1c97c7dcb32debeda176139ba10bd21e7 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sat, 4 Apr 2020 05:39:19 +0200 Subject: Local bibliographies in sphinx --- src/python/doc/alpha_complex_user.rst | 2 +- src/python/doc/bottleneck_distance_user.rst | 7 +++++++ src/python/doc/cubical_complex_user.rst | 2 +- src/python/doc/index.rst | 2 +- src/python/doc/nerve_gic_complex_user.rst | 7 +++++++ src/python/doc/persistent_cohomology_user.rst | 2 +- src/python/doc/rips_complex_user.rst | 7 +++++++ src/python/doc/simplex_tree_user.rst | 7 +++++++ src/python/doc/tangential_complex_user.rst | 2 +- src/python/doc/wasserstein_distance_user.rst | 7 +++++++ src/python/doc/witness_complex_user.rst | 2 +- 11 files changed, 41 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/python/doc/alpha_complex_user.rst b/src/python/doc/alpha_complex_user.rst index 60319e84..6e926fc8 100644 --- a/src/python/doc/alpha_complex_user.rst +++ b/src/python/doc/alpha_complex_user.rst @@ -207,5 +207,5 @@ CGAL citations ============== .. bibliography:: ../../biblio/how_to_cite_cgal.bib - :filter: docnames + :filter: docname in docnames :style: unsrt diff --git a/src/python/doc/bottleneck_distance_user.rst b/src/python/doc/bottleneck_distance_user.rst index 9435c7f1..95c4e575 100644 --- a/src/python/doc/bottleneck_distance_user.rst +++ b/src/python/doc/bottleneck_distance_user.rst @@ -65,3 +65,10 @@ The output is: Bottleneck distance approximation = 0.81 Bottleneck distance value = 0.75 + +Bibliography +============ + +.. bibliography:: ../../biblio/bibliography.bib + :filter: docname in docnames + :style: unsrt diff --git a/src/python/doc/cubical_complex_user.rst b/src/python/doc/cubical_complex_user.rst index 93ca6b24..94f59954 100644 --- a/src/python/doc/cubical_complex_user.rst +++ b/src/python/doc/cubical_complex_user.rst @@ -163,5 +163,5 @@ Bibliography ============ .. bibliography:: ../../biblio/bibliography.bib - :filter: docnames + :filter: docname in docnames :style: unsrt diff --git a/src/python/doc/index.rst b/src/python/doc/index.rst index 3387a64f..df1dff68 100644 --- a/src/python/doc/index.rst +++ b/src/python/doc/index.rst @@ -90,5 +90,5 @@ Bibliography ************ .. bibliography:: ../../biblio/bibliography.bib - :filter: docnames + :filter: docname in docnames :style: unsrt diff --git a/src/python/doc/nerve_gic_complex_user.rst b/src/python/doc/nerve_gic_complex_user.rst index 9101f45d..208031fb 100644 --- a/src/python/doc/nerve_gic_complex_user.rst +++ b/src/python/doc/nerve_gic_complex_user.rst @@ -313,3 +313,10 @@ the program outputs again SC.dot which gives the following visualization after u :alt: Visualization with neato Visualization with neato + +Bibliography +============ + +.. bibliography:: ../../biblio/bibliography.bib + :filter: docname in docnames + :style: unsrt diff --git a/src/python/doc/persistent_cohomology_user.rst b/src/python/doc/persistent_cohomology_user.rst index 5f931b3a..0a5be3a9 100644 --- a/src/python/doc/persistent_cohomology_user.rst +++ b/src/python/doc/persistent_cohomology_user.rst @@ -116,5 +116,5 @@ Bibliography ============ .. bibliography:: ../../biblio/bibliography.bib - :filter: docnames + :filter: docname in docnames :style: unsrt diff --git a/src/python/doc/rips_complex_user.rst b/src/python/doc/rips_complex_user.rst index 8efb12e6..325added 100644 --- a/src/python/doc/rips_complex_user.rst +++ b/src/python/doc/rips_complex_user.rst @@ -347,3 +347,10 @@ until dimension 1 - one skeleton graph in other words), the output is: points in the persistence diagram will be under the diagonal, and bottleneck distance and persistence graphical tool will not work properly, this is a known issue. + +Bibliography +============ + +.. bibliography:: ../../biblio/bibliography.bib + :filter: docname in docnames + :style: unsrt diff --git a/src/python/doc/simplex_tree_user.rst b/src/python/doc/simplex_tree_user.rst index 3df7617f..b0b7153e 100644 --- a/src/python/doc/simplex_tree_user.rst +++ b/src/python/doc/simplex_tree_user.rst @@ -66,3 +66,10 @@ The output is: ([1, 2], 4.0) ([1], 0.0) ([2], 4.0) + +Bibliography +============ + +.. bibliography:: ../../biblio/bibliography.bib + :filter: docname in docnames + :style: unsrt diff --git a/src/python/doc/tangential_complex_user.rst b/src/python/doc/tangential_complex_user.rst index 852cf5b6..0bcbc848 100644 --- a/src/python/doc/tangential_complex_user.rst +++ b/src/python/doc/tangential_complex_user.rst @@ -200,5 +200,5 @@ Bibliography ============ .. bibliography:: ../../biblio/bibliography.bib - :filter: docnames + :filter: docname in docnames :style: unsrt diff --git a/src/python/doc/wasserstein_distance_user.rst b/src/python/doc/wasserstein_distance_user.rst index a9b21fa5..9b94573e 100644 --- a/src/python/doc/wasserstein_distance_user.rst +++ b/src/python/doc/wasserstein_distance_user.rst @@ -84,3 +84,10 @@ The output is: point 1 in dgm1 is matched to point 2 in dgm2 point 2 in dgm1 is matched to the diagonal point 1 in dgm2 is matched to the diagonal + +Bibliography +============ + +.. bibliography:: ../../biblio/bibliography.bib + :filter: docname in docnames + :style: unsrt diff --git a/src/python/doc/witness_complex_user.rst b/src/python/doc/witness_complex_user.rst index 7087fa98..b932ed0d 100644 --- a/src/python/doc/witness_complex_user.rst +++ b/src/python/doc/witness_complex_user.rst @@ -131,5 +131,5 @@ Bibliography ============ .. bibliography:: ../../biblio/bibliography.bib - :filter: docnames + :filter: docname in docnames :style: unsrt -- cgit v1.2.3 From d9e6b4f51bc8517453653be2904ab6db9aaab85e Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sat, 4 Apr 2020 06:01:59 +0200 Subject: sphinx label warnings --- src/python/doc/alpha_complex_user.rst | 1 + src/python/doc/bottleneck_distance_user.rst | 1 + src/python/doc/cubical_complex_user.rst | 1 + src/python/doc/index.rst | 1 + src/python/doc/nerve_gic_complex_user.rst | 1 + src/python/doc/persistent_cohomology_user.rst | 1 + src/python/doc/rips_complex_user.rst | 1 + src/python/doc/simplex_tree_user.rst | 1 + src/python/doc/tangential_complex_user.rst | 1 + src/python/doc/wasserstein_distance_user.rst | 1 + src/python/doc/witness_complex_user.rst | 1 + 11 files changed, 11 insertions(+) (limited to 'src') diff --git a/src/python/doc/alpha_complex_user.rst b/src/python/doc/alpha_complex_user.rst index 6e926fc8..e1903688 100644 --- a/src/python/doc/alpha_complex_user.rst +++ b/src/python/doc/alpha_complex_user.rst @@ -209,3 +209,4 @@ CGAL citations .. bibliography:: ../../biblio/how_to_cite_cgal.bib :filter: docname in docnames :style: unsrt + :labelprefix: A diff --git a/src/python/doc/bottleneck_distance_user.rst b/src/python/doc/bottleneck_distance_user.rst index 95c4e575..23a87c19 100644 --- a/src/python/doc/bottleneck_distance_user.rst +++ b/src/python/doc/bottleneck_distance_user.rst @@ -72,3 +72,4 @@ Bibliography .. bibliography:: ../../biblio/bibliography.bib :filter: docname in docnames :style: unsrt + :labelprefix: B diff --git a/src/python/doc/cubical_complex_user.rst b/src/python/doc/cubical_complex_user.rst index 94f59954..cdc5b5dc 100644 --- a/src/python/doc/cubical_complex_user.rst +++ b/src/python/doc/cubical_complex_user.rst @@ -165,3 +165,4 @@ Bibliography .. bibliography:: ../../biblio/bibliography.bib :filter: docname in docnames :style: unsrt + :labelprefix: CC diff --git a/src/python/doc/index.rst b/src/python/doc/index.rst index df1dff68..089efe23 100644 --- a/src/python/doc/index.rst +++ b/src/python/doc/index.rst @@ -92,3 +92,4 @@ Bibliography .. bibliography:: ../../biblio/bibliography.bib :filter: docname in docnames :style: unsrt + :labelprefix: I diff --git a/src/python/doc/nerve_gic_complex_user.rst b/src/python/doc/nerve_gic_complex_user.rst index 208031fb..b022dca7 100644 --- a/src/python/doc/nerve_gic_complex_user.rst +++ b/src/python/doc/nerve_gic_complex_user.rst @@ -320,3 +320,4 @@ Bibliography .. bibliography:: ../../biblio/bibliography.bib :filter: docname in docnames :style: unsrt + :labelprefix: N diff --git a/src/python/doc/persistent_cohomology_user.rst b/src/python/doc/persistent_cohomology_user.rst index 0a5be3a9..f97fc759 100644 --- a/src/python/doc/persistent_cohomology_user.rst +++ b/src/python/doc/persistent_cohomology_user.rst @@ -118,3 +118,4 @@ Bibliography .. bibliography:: ../../biblio/bibliography.bib :filter: docname in docnames :style: unsrt + :labelprefix: PC diff --git a/src/python/doc/rips_complex_user.rst b/src/python/doc/rips_complex_user.rst index 325added..fb6e4b1b 100644 --- a/src/python/doc/rips_complex_user.rst +++ b/src/python/doc/rips_complex_user.rst @@ -354,3 +354,4 @@ Bibliography .. bibliography:: ../../biblio/bibliography.bib :filter: docname in docnames :style: unsrt + :labelprefix: R diff --git a/src/python/doc/simplex_tree_user.rst b/src/python/doc/simplex_tree_user.rst index b0b7153e..5a97b3d7 100644 --- a/src/python/doc/simplex_tree_user.rst +++ b/src/python/doc/simplex_tree_user.rst @@ -73,3 +73,4 @@ Bibliography .. bibliography:: ../../biblio/bibliography.bib :filter: docname in docnames :style: unsrt + :labelprefix: ST diff --git a/src/python/doc/tangential_complex_user.rst b/src/python/doc/tangential_complex_user.rst index 0bcbc848..6cdd6125 100644 --- a/src/python/doc/tangential_complex_user.rst +++ b/src/python/doc/tangential_complex_user.rst @@ -202,3 +202,4 @@ Bibliography .. bibliography:: ../../biblio/bibliography.bib :filter: docname in docnames :style: unsrt + :labelprefix: TA diff --git a/src/python/doc/wasserstein_distance_user.rst b/src/python/doc/wasserstein_distance_user.rst index 9b94573e..817e6981 100644 --- a/src/python/doc/wasserstein_distance_user.rst +++ b/src/python/doc/wasserstein_distance_user.rst @@ -91,3 +91,4 @@ Bibliography .. bibliography:: ../../biblio/bibliography.bib :filter: docname in docnames :style: unsrt + :labelprefix: WA diff --git a/src/python/doc/witness_complex_user.rst b/src/python/doc/witness_complex_user.rst index b932ed0d..c258ad38 100644 --- a/src/python/doc/witness_complex_user.rst +++ b/src/python/doc/witness_complex_user.rst @@ -133,3 +133,4 @@ Bibliography .. bibliography:: ../../biblio/bibliography.bib :filter: docname in docnames :style: unsrt + :labelprefix: WI -- cgit v1.2.3 From dc80ab48359521dac415292f4d2b1f496f326263 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sat, 4 Apr 2020 06:05:57 +0200 Subject: Revert "sphinx label warnings" This reverts commit d9e6b4f51bc8517453653be2904ab6db9aaab85e. It was able to remove the warnings about duplicate labels, but then it shows [WA1] instead of [1] in the generated doc. And for things cited on multiple pages, it uses the same everywhere, so on a single page, you can have a mix of [I1], [WI2], etc. Not very pretty. --- src/python/doc/alpha_complex_user.rst | 1 - src/python/doc/bottleneck_distance_user.rst | 1 - src/python/doc/cubical_complex_user.rst | 1 - src/python/doc/index.rst | 1 - src/python/doc/nerve_gic_complex_user.rst | 1 - src/python/doc/persistent_cohomology_user.rst | 1 - src/python/doc/rips_complex_user.rst | 1 - src/python/doc/simplex_tree_user.rst | 1 - src/python/doc/tangential_complex_user.rst | 1 - src/python/doc/wasserstein_distance_user.rst | 1 - src/python/doc/witness_complex_user.rst | 1 - 11 files changed, 11 deletions(-) (limited to 'src') diff --git a/src/python/doc/alpha_complex_user.rst b/src/python/doc/alpha_complex_user.rst index e1903688..6e926fc8 100644 --- a/src/python/doc/alpha_complex_user.rst +++ b/src/python/doc/alpha_complex_user.rst @@ -209,4 +209,3 @@ CGAL citations .. bibliography:: ../../biblio/how_to_cite_cgal.bib :filter: docname in docnames :style: unsrt - :labelprefix: A diff --git a/src/python/doc/bottleneck_distance_user.rst b/src/python/doc/bottleneck_distance_user.rst index 23a87c19..95c4e575 100644 --- a/src/python/doc/bottleneck_distance_user.rst +++ b/src/python/doc/bottleneck_distance_user.rst @@ -72,4 +72,3 @@ Bibliography .. bibliography:: ../../biblio/bibliography.bib :filter: docname in docnames :style: unsrt - :labelprefix: B diff --git a/src/python/doc/cubical_complex_user.rst b/src/python/doc/cubical_complex_user.rst index cdc5b5dc..94f59954 100644 --- a/src/python/doc/cubical_complex_user.rst +++ b/src/python/doc/cubical_complex_user.rst @@ -165,4 +165,3 @@ Bibliography .. bibliography:: ../../biblio/bibliography.bib :filter: docname in docnames :style: unsrt - :labelprefix: CC diff --git a/src/python/doc/index.rst b/src/python/doc/index.rst index 089efe23..df1dff68 100644 --- a/src/python/doc/index.rst +++ b/src/python/doc/index.rst @@ -92,4 +92,3 @@ Bibliography .. bibliography:: ../../biblio/bibliography.bib :filter: docname in docnames :style: unsrt - :labelprefix: I diff --git a/src/python/doc/nerve_gic_complex_user.rst b/src/python/doc/nerve_gic_complex_user.rst index b022dca7..208031fb 100644 --- a/src/python/doc/nerve_gic_complex_user.rst +++ b/src/python/doc/nerve_gic_complex_user.rst @@ -320,4 +320,3 @@ Bibliography .. bibliography:: ../../biblio/bibliography.bib :filter: docname in docnames :style: unsrt - :labelprefix: N diff --git a/src/python/doc/persistent_cohomology_user.rst b/src/python/doc/persistent_cohomology_user.rst index f97fc759..0a5be3a9 100644 --- a/src/python/doc/persistent_cohomology_user.rst +++ b/src/python/doc/persistent_cohomology_user.rst @@ -118,4 +118,3 @@ Bibliography .. bibliography:: ../../biblio/bibliography.bib :filter: docname in docnames :style: unsrt - :labelprefix: PC diff --git a/src/python/doc/rips_complex_user.rst b/src/python/doc/rips_complex_user.rst index fb6e4b1b..325added 100644 --- a/src/python/doc/rips_complex_user.rst +++ b/src/python/doc/rips_complex_user.rst @@ -354,4 +354,3 @@ Bibliography .. bibliography:: ../../biblio/bibliography.bib :filter: docname in docnames :style: unsrt - :labelprefix: R diff --git a/src/python/doc/simplex_tree_user.rst b/src/python/doc/simplex_tree_user.rst index 5a97b3d7..b0b7153e 100644 --- a/src/python/doc/simplex_tree_user.rst +++ b/src/python/doc/simplex_tree_user.rst @@ -73,4 +73,3 @@ Bibliography .. bibliography:: ../../biblio/bibliography.bib :filter: docname in docnames :style: unsrt - :labelprefix: ST diff --git a/src/python/doc/tangential_complex_user.rst b/src/python/doc/tangential_complex_user.rst index 6cdd6125..0bcbc848 100644 --- a/src/python/doc/tangential_complex_user.rst +++ b/src/python/doc/tangential_complex_user.rst @@ -202,4 +202,3 @@ Bibliography .. bibliography:: ../../biblio/bibliography.bib :filter: docname in docnames :style: unsrt - :labelprefix: TA diff --git a/src/python/doc/wasserstein_distance_user.rst b/src/python/doc/wasserstein_distance_user.rst index 817e6981..9b94573e 100644 --- a/src/python/doc/wasserstein_distance_user.rst +++ b/src/python/doc/wasserstein_distance_user.rst @@ -91,4 +91,3 @@ Bibliography .. bibliography:: ../../biblio/bibliography.bib :filter: docname in docnames :style: unsrt - :labelprefix: WA diff --git a/src/python/doc/witness_complex_user.rst b/src/python/doc/witness_complex_user.rst index c258ad38..b932ed0d 100644 --- a/src/python/doc/witness_complex_user.rst +++ b/src/python/doc/witness_complex_user.rst @@ -133,4 +133,3 @@ Bibliography .. bibliography:: ../../biblio/bibliography.bib :filter: docname in docnames :style: unsrt - :labelprefix: WI -- cgit v1.2.3 From da3b4a79ca40d08ae5597341f4db2418f20fe3d2 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sat, 4 Apr 2020 12:52:52 +0200 Subject: Missing biblio in one file, change title level --- src/python/doc/alpha_complex_user.rst | 2 +- src/python/doc/bottleneck_distance_user.rst | 2 +- src/python/doc/cubical_complex_user.rst | 2 +- src/python/doc/nerve_gic_complex_ref.rst | 7 +++++++ src/python/doc/nerve_gic_complex_user.rst | 2 +- src/python/doc/persistent_cohomology_user.rst | 2 +- src/python/doc/rips_complex_user.rst | 2 +- src/python/doc/simplex_tree_user.rst | 2 +- src/python/doc/tangential_complex_user.rst | 2 +- src/python/doc/wasserstein_distance_user.rst | 2 +- src/python/doc/witness_complex_user.rst | 2 +- 11 files changed, 17 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/python/doc/alpha_complex_user.rst b/src/python/doc/alpha_complex_user.rst index 6e926fc8..265a82d2 100644 --- a/src/python/doc/alpha_complex_user.rst +++ b/src/python/doc/alpha_complex_user.rst @@ -204,7 +204,7 @@ the program output is: [3, 6] -> 30.25 CGAL citations -============== +-------------- .. bibliography:: ../../biblio/how_to_cite_cgal.bib :filter: docname in docnames diff --git a/src/python/doc/bottleneck_distance_user.rst b/src/python/doc/bottleneck_distance_user.rst index 95c4e575..206fcb63 100644 --- a/src/python/doc/bottleneck_distance_user.rst +++ b/src/python/doc/bottleneck_distance_user.rst @@ -67,7 +67,7 @@ The output is: Bottleneck distance value = 0.75 Bibliography -============ +------------ .. bibliography:: ../../biblio/bibliography.bib :filter: docname in docnames diff --git a/src/python/doc/cubical_complex_user.rst b/src/python/doc/cubical_complex_user.rst index 94f59954..e8c94bf6 100644 --- a/src/python/doc/cubical_complex_user.rst +++ b/src/python/doc/cubical_complex_user.rst @@ -160,7 +160,7 @@ Examples. End user programs are available in python/example/ folder. Bibliography -============ +------------ .. bibliography:: ../../biblio/bibliography.bib :filter: docname in docnames diff --git a/src/python/doc/nerve_gic_complex_ref.rst b/src/python/doc/nerve_gic_complex_ref.rst index abde2e8c..6a81b7af 100644 --- a/src/python/doc/nerve_gic_complex_ref.rst +++ b/src/python/doc/nerve_gic_complex_ref.rst @@ -12,3 +12,10 @@ Cover complexes reference manual :show-inheritance: .. automethod:: gudhi.CoverComplex.__init__ + +Bibliography +------------ + +.. bibliography:: ../../biblio/bibliography.bib + :filter: docname in docnames + :style: unsrt diff --git a/src/python/doc/nerve_gic_complex_user.rst b/src/python/doc/nerve_gic_complex_user.rst index 208031fb..f709ce91 100644 --- a/src/python/doc/nerve_gic_complex_user.rst +++ b/src/python/doc/nerve_gic_complex_user.rst @@ -315,7 +315,7 @@ the program outputs again SC.dot which gives the following visualization after u Visualization with neato Bibliography -============ +------------ .. bibliography:: ../../biblio/bibliography.bib :filter: docname in docnames diff --git a/src/python/doc/persistent_cohomology_user.rst b/src/python/doc/persistent_cohomology_user.rst index 0a5be3a9..506fa3a7 100644 --- a/src/python/doc/persistent_cohomology_user.rst +++ b/src/python/doc/persistent_cohomology_user.rst @@ -113,7 +113,7 @@ We provide several example files: run these examples with -h for details on thei * :download:`tangential_complex_plain_homology_from_off_file_example.py <../example/tangential_complex_plain_homology_from_off_file_example.py>` Bibliography -============ +------------ .. bibliography:: ../../biblio/bibliography.bib :filter: docname in docnames diff --git a/src/python/doc/rips_complex_user.rst b/src/python/doc/rips_complex_user.rst index 325added..c4bbcfb6 100644 --- a/src/python/doc/rips_complex_user.rst +++ b/src/python/doc/rips_complex_user.rst @@ -349,7 +349,7 @@ until dimension 1 - one skeleton graph in other words), the output is: this is a known issue. Bibliography -============ +------------ .. bibliography:: ../../biblio/bibliography.bib :filter: docname in docnames diff --git a/src/python/doc/simplex_tree_user.rst b/src/python/doc/simplex_tree_user.rst index b0b7153e..1b272c35 100644 --- a/src/python/doc/simplex_tree_user.rst +++ b/src/python/doc/simplex_tree_user.rst @@ -68,7 +68,7 @@ The output is: ([2], 4.0) Bibliography -============ +------------ .. bibliography:: ../../biblio/bibliography.bib :filter: docname in docnames diff --git a/src/python/doc/tangential_complex_user.rst b/src/python/doc/tangential_complex_user.rst index 0bcbc848..cf8199cc 100644 --- a/src/python/doc/tangential_complex_user.rst +++ b/src/python/doc/tangential_complex_user.rst @@ -197,7 +197,7 @@ The output is: Bibliography -============ +------------ .. bibliography:: ../../biblio/bibliography.bib :filter: docname in docnames diff --git a/src/python/doc/wasserstein_distance_user.rst b/src/python/doc/wasserstein_distance_user.rst index 9b94573e..2ae72351 100644 --- a/src/python/doc/wasserstein_distance_user.rst +++ b/src/python/doc/wasserstein_distance_user.rst @@ -86,7 +86,7 @@ The output is: point 1 in dgm2 is matched to the diagonal Bibliography -============ +------------ .. bibliography:: ../../biblio/bibliography.bib :filter: docname in docnames diff --git a/src/python/doc/witness_complex_user.rst b/src/python/doc/witness_complex_user.rst index b932ed0d..799f5444 100644 --- a/src/python/doc/witness_complex_user.rst +++ b/src/python/doc/witness_complex_user.rst @@ -128,7 +128,7 @@ Here is an example of constructing a strong witness complex filtration and compu * :download:`euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py>` Bibliography -============ +------------ .. bibliography:: ../../biblio/bibliography.bib :filter: docname in docnames -- cgit v1.2.3 From 3ca13b31e5f48fbaef2ba7db980643716c18725c Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sun, 5 Apr 2020 00:35:23 +0200 Subject: compute_persistence in python Also simplify references, and replace print with assert for errors --- src/python/gudhi/simplex_tree.pyx | 105 ++++++++++----------- .../include/Persistent_cohomology_interface.h | 13 +-- 2 files changed, 52 insertions(+), 66 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index d8bd0b79..c34a64e6 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -139,9 +139,9 @@ cdef class SimplexTree: This function is not constant time because it can recompute dimension if required (can be triggered by - :func:`remove_maximal_simplex()` + :func:`remove_maximal_simplex` or - :func:`prune_above_filtration()` + :func:`prune_above_filtration` methods). """ return self.get_ptr().dimension() @@ -166,9 +166,9 @@ cdef class SimplexTree: This function must be used with caution because it disables dimension recomputation when required (this recomputation can be triggered by - :func:`remove_maximal_simplex()` + :func:`remove_maximal_simplex` or - :func:`prune_above_filtration()` + :func:`prune_above_filtration` ). """ self.get_ptr().set_dimension(dimension) @@ -315,10 +315,10 @@ cdef class SimplexTree: The dimension of the simplicial complex may be lower after calling remove_maximal_simplex than it was before. However, - :func:`upper_bound_dimension()` + :func:`upper_bound_dimension` method will return the old value, which remains a valid upper bound. If you care, you can call - :func:`dimension()` + :func:`dimension` to recompute the exact dimension. """ self.get_ptr().remove_maximal_simplex(simplex) @@ -346,12 +346,12 @@ cdef class SimplexTree: Note that the dimension of the simplicial complex may be lower after calling - :func:`prune_above_filtration()` + :func:`prune_above_filtration` than it was before. However, - :func:`upper_bound_dimension()` + :func:`upper_bound_dimension` will return the old value, which remains a valid upper bound. If you care, you can call - :func:`dimension()` + :func:`dimension` method to recompute the exact dimension. """ return self.get_ptr().prune_above_filtration(filtration) @@ -405,7 +405,7 @@ cdef class SimplexTree: Note that after calling this function, the filtration values are actually modified within the Simplex_tree. - The function :func:`extended_persistence()` + The function :func:`extended_persistence` retrieves the original values. .. note:: @@ -427,11 +427,11 @@ cdef class SimplexTree: 0.0. Sets min_persistence to -1.0 to see all values. :type min_persistence: float. - :returns: A list of four persistence diagrams in the format described in :func:`persistence()`. The first one is Ordinary, the second one is Relative, the third one is Extended+ and the fourth one is Extended-. See https://link.springer.com/article/10.1007/s10208-008-9027-z and/or section 2.2 in https://link.springer.com/article/10.1007/s10208-017-9370-z for a description of these subtypes. + :returns: A list of four persistence diagrams in the format described in :func:`persistence`. The first one is Ordinary, the second one is Relative, the third one is Extended+ and the fourth one is Extended-. See https://link.springer.com/article/10.1007/s10208-008-9027-z and/or section 2.2 in https://link.springer.com/article/10.1007/s10208-017-9370-z for a description of these subtypes. .. note:: - This function should be called only if :func:`extend_filtration()` has been called first! + This function should be called only if :func:`extend_filtration` has been called first! .. note:: @@ -466,11 +466,32 @@ cdef class SimplexTree: :returns: The persistence of the simplicial complex. :rtype: list of pairs(dimension, pair(birth, death)) """ + self.compute_persistence(homology_coeff_field, min_persistence, persistence_dim_max) + return self.pcohptr.get_persistence() + + def compute_persistence(self, homology_coeff_field=11, min_persistence=0, persistence_dim_max = False): + """This function computes the persistence of the simplicial complex, so it can be accessed through + :func:`persistent_betti_numbers`, :func:`persistence_pairs`, etc. This function is equivalent to :func:`persistence` + when you do not want the list :func:`persistence` returns. + + :param homology_coeff_field: The homology coefficient field. Must be a + prime number. Default value is 11. + :type homology_coeff_field: int. + :param min_persistence: The minimum persistence value to take into + account (strictly greater than min_persistence). Default value is + 0.0. + Sets min_persistence to -1.0 to see all values. + :type min_persistence: float. + :param persistence_dim_max: If true, the persistent homology for the + maximal dimension in the complex is computed. If false, it is + ignored. Default is false. + :type persistence_dim_max: bool + :returns: Nothing. + """ if self.pcohptr != NULL: del self.pcohptr self.pcohptr = new Simplex_tree_persistence_interface(self.get_ptr(), persistence_dim_max) self.pcohptr.compute_persistence(homology_coeff_field, min_persistence) - return self.pcohptr.get_persistence() def betti_numbers(self): """This function returns the Betti numbers of the simplicial complex. @@ -479,16 +500,11 @@ cdef class SimplexTree: :rtype: list of int :note: betti_numbers function requires - :func:`persistence()` + :func:`compute_persistence` function to be launched first. """ - cdef vector[int] bn_result - if self.pcohptr != NULL: - bn_result = self.pcohptr.betti_numbers() - else: - print("betti_numbers function requires persistence function" - " to be launched first.") - return bn_result + assert self.pcohptr != NULL, "compute_persistence() must be called before betti_numbers()" + return self.pcohptr.betti_numbers() def persistent_betti_numbers(self, from_value, to_value): """This function returns the persistent Betti numbers of the @@ -505,16 +521,11 @@ cdef class SimplexTree: :rtype: list of int :note: persistent_betti_numbers function requires - :func:`persistence()` + :func:`compute_persistence` function to be launched first. """ - cdef vector[int] pbn_result - if self.pcohptr != NULL: - pbn_result = self.pcohptr.persistent_betti_numbers(from_value, to_value) - else: - print("persistent_betti_numbers function requires persistence function" - " to be launched first.") - return pbn_result + assert self.pcohptr != NULL, "compute_persistence() must be called before persistent_betti_numbers()" + return self.pcohptr.persistent_betti_numbers(from_value, to_value) def persistence_intervals_in_dimension(self, dimension): """This function returns the persistence intervals of the simplicial @@ -526,16 +537,11 @@ cdef class SimplexTree: :rtype: numpy array of dimension 2 :note: intervals_in_dim function requires - :func:`persistence()` + :func:`compute_persistence` function to be launched first. """ - cdef vector[pair[double,double]] intervals_result - if self.pcohptr != NULL: - intervals_result = self.pcohptr.intervals_in_dimension(dimension) - else: - print("intervals_in_dim function requires persistence function" - " to be launched first.") - return np_array(intervals_result) + assert self.pcohptr != NULL, "compute_persistence() must be called before persistence_intervals_in_dimension()" + return np_array(self.pcohptr.intervals_in_dimension(dimension)) def persistence_pairs(self): """This function returns a list of persistence birth and death simplices pairs. @@ -544,18 +550,13 @@ cdef class SimplexTree: :rtype: list of pair of list of int :note: persistence_pairs function requires - :func:`persistence()` + :func:`compute_persistence` function to be launched first. """ - cdef vector[pair[vector[int],vector[int]]] persistence_pairs_result - if self.pcohptr != NULL: - persistence_pairs_result = self.pcohptr.persistence_pairs() - else: - print("persistence_pairs function requires persistence function" - " to be launched first.") - return persistence_pairs_result + assert self.pcohptr != NULL, "compute_persistence() must be called before persistence_pairs()" + return self.pcohptr.persistence_pairs() - def write_persistence_diagram(self, persistence_file=''): + def write_persistence_diagram(self, persistence_file): """This function writes the persistence intervals of the simplicial complex in a user given file name. @@ -563,14 +564,8 @@ cdef class SimplexTree: :type persistence_file: string. :note: intervals_in_dim function requires - :func:`persistence()` + :func:`compute_persistence` function to be launched first. """ - if self.pcohptr != NULL: - if persistence_file != '': - self.pcohptr.write_output_diagram(persistence_file.encode('utf-8')) - else: - print("persistence_file must be specified") - else: - print("intervals_in_dim function requires persistence function" - " to be launched first.") + assert self.pcohptr != NULL, "compute_persistence() must be called before write_persistence_diagram()" + self.pcohptr.write_output_diagram(persistence_file.encode('utf-8')) diff --git a/src/python/include/Persistent_cohomology_interface.h b/src/python/include/Persistent_cohomology_interface.h index a29ebbee..e2b69a52 100644 --- a/src/python/include/Persistent_cohomology_interface.h +++ b/src/python/include/Persistent_cohomology_interface.h @@ -43,25 +43,16 @@ persistent_cohomology::Persistent_cohomology>> get_persistence() { // Custom sort and output persistence cmp_intervals_by_dim_then_length cmp(stptr_); -- cgit v1.2.3 From 73a40006dad55b0a9ce6ca270e566ce91efe6af4 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sun, 5 Apr 2020 12:27:15 +0200 Subject: Proper exception in write_output_diagram --- src/Persistent_cohomology/include/gudhi/Persistent_cohomology.h | 1 + src/python/gudhi/simplex_tree.pxd | 2 +- src/python/gudhi/simplex_tree.pyx | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/Persistent_cohomology/include/gudhi/Persistent_cohomology.h b/src/Persistent_cohomology/include/gudhi/Persistent_cohomology.h index ca4bc10d..5e41edb4 100644 --- a/src/Persistent_cohomology/include/gudhi/Persistent_cohomology.h +++ b/src/Persistent_cohomology/include/gudhi/Persistent_cohomology.h @@ -571,6 +571,7 @@ class Persistent_cohomology { void write_output_diagram(std::string diagram_name) { std::ofstream diagram_out(diagram_name.c_str()); + diagram_out.exceptions(diagram_out.failbit); cmp_intervals_by_length cmp(cpx_); std::sort(std::begin(persistent_pairs_), std::end(persistent_pairs_), cmp); bool has_infinity = std::numeric_limits::has_infinity; diff --git a/src/python/gudhi/simplex_tree.pxd b/src/python/gudhi/simplex_tree.pxd index 44040bcb..c46b36ba 100644 --- a/src/python/gudhi/simplex_tree.pxd +++ b/src/python/gudhi/simplex_tree.pxd @@ -76,5 +76,5 @@ cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi": vector[int] betti_numbers() vector[int] persistent_betti_numbers(double from_value, double to_value) vector[pair[double,double]] intervals_in_dimension(int dimension) - void write_output_diagram(string diagram_file_name) + void write_output_diagram(string diagram_file_name) except + vector[pair[vector[int], vector[int]]] persistence_pairs() diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index c34a64e6..7728ebfc 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -449,7 +449,7 @@ cdef class SimplexTree: def persistence(self, homology_coeff_field=11, min_persistence=0, persistence_dim_max = False): - """This function returns the persistence of the simplicial complex. + """This function computes and returns the persistence of the simplicial complex. :param homology_coeff_field: The homology coefficient field. Must be a prime number. Default value is 11. -- cgit v1.2.3 From 5eaca3ed69c564a6f44e6ff21ac33e2cc576bafa Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 6 Apr 2020 15:58:10 +0200 Subject: compute_persistence for cubical --- src/python/gudhi/cubical_complex.pyx | 63 ++++++++++++++------------ src/python/gudhi/periodic_cubical_complex.pyx | 65 +++++++++++++++------------ 2 files changed, 71 insertions(+), 57 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/cubical_complex.pyx b/src/python/gudhi/cubical_complex.pyx index ce844558..007abcb6 100644 --- a/src/python/gudhi/cubical_complex.pyx +++ b/src/python/gudhi/cubical_complex.pyx @@ -130,8 +130,31 @@ cdef class CubicalComplex: """ return self.thisptr.dimension() + def compute_persistence(self, homology_coeff_field=11, min_persistence=0): + """This function computes the persistence of the complex, so it can be + accessed through :func:`persistent_betti_numbers`, + :func:`persistence_intervals_in_dimension`, etc. This function is + equivalent to :func:`persistence` when you do not want the list + :func:`persistence` returns. + + :param homology_coeff_field: The homology coefficient field. Must be a + prime number + :type homology_coeff_field: int. + :param min_persistence: The minimum persistence value to take into + account (strictly greater than min_persistence). Default value is + 0.0. + Sets min_persistence to -1.0 to see all values. + :type min_persistence: float. + :returns: Nothing. + """ + if self.pcohptr != NULL: + del self.pcohptr + assert self.__is_defined() + self.pcohptr = new Cubical_complex_persistence_interface(self.thisptr, True) + self.pcohptr.compute_persistence(homology_coeff_field, min_persistence) + def persistence(self, homology_coeff_field=11, min_persistence=0): - """This function returns the persistence of the complex. + """This function computes and returns the persistence of the complex. :param homology_coeff_field: The homology coefficient field. Must be a prime number @@ -144,31 +167,22 @@ cdef class CubicalComplex: :returns: list of pairs(dimension, pair(birth, death)) -- the persistence of the complex. """ - if self.pcohptr != NULL: - del self.pcohptr - if self.thisptr != NULL: - self.pcohptr = new Cubical_complex_persistence_interface(self.thisptr, True) - cdef vector[pair[int, pair[double, double]]] persistence_result - if self.pcohptr != NULL: - self.pcohptr.compute_persistence(homology_coeff_field, min_persistence) - persistence_result = self.pcohptr.get_persistence() - return persistence_result + self.compute_persistence(homology_coeff_field, min_persistence) + return self.pcohptr.get_persistence() def betti_numbers(self): """This function returns the Betti numbers of the complex. :returns: list of int -- The Betti numbers ([B0, B1, ..., Bn]). - :note: betti_numbers function requires persistence function to be + :note: betti_numbers function requires :func:`compute_persistence` function to be launched first. :note: betti_numbers function always returns [1, 0, 0, ...] as infinity filtration cubes are not removed from the complex. """ - cdef vector[int] bn_result - if self.pcohptr != NULL: - bn_result = self.pcohptr.betti_numbers() - return bn_result + assert self.pcohptr != NULL, "compute_persistence() must be called before betti_numbers()" + return self.pcohptr.betti_numbers() def persistent_betti_numbers(self, from_value, to_value): """This function returns the persistent Betti numbers of the complex. @@ -183,13 +197,11 @@ cdef class CubicalComplex: :returns: list of int -- The persistent Betti numbers ([B0, B1, ..., Bn]). - :note: persistent_betti_numbers function requires persistence + :note: persistent_betti_numbers function requires :func:`compute_persistence` function to be launched first. """ - cdef vector[int] pbn_result - if self.pcohptr != NULL: - pbn_result = self.pcohptr.persistent_betti_numbers(from_value, to_value) - return pbn_result + assert self.pcohptr != NULL, "compute_persistence() must be called before persistent_betti_numbers()" + return self.pcohptr.persistent_betti_numbers(from_value, to_value) def persistence_intervals_in_dimension(self, dimension): """This function returns the persistence intervals of the complex in a @@ -200,13 +212,8 @@ cdef class CubicalComplex: :returns: The persistence intervals. :rtype: numpy array of dimension 2 - :note: intervals_in_dim function requires persistence function to be + :note: intervals_in_dim function requires :func:`compute_persistence` function to be launched first. """ - cdef vector[pair[double,double]] intervals_result - if self.pcohptr != NULL: - intervals_result = self.pcohptr.intervals_in_dimension(dimension) - else: - print("intervals_in_dim function requires persistence function" - " to be launched first.", file=sys.stderr) - return np.array(intervals_result) + assert self.pcohptr != NULL, "compute_persistence() must be called before persistence_intervals_in_dimension()" + return np.array(self.pcohptr.intervals_in_dimension(dimension)) diff --git a/src/python/gudhi/periodic_cubical_complex.pyx b/src/python/gudhi/periodic_cubical_complex.pyx index ff5ef3bd..246a3a02 100644 --- a/src/python/gudhi/periodic_cubical_complex.pyx +++ b/src/python/gudhi/periodic_cubical_complex.pyx @@ -135,8 +135,31 @@ cdef class PeriodicCubicalComplex: """ return self.thisptr.dimension() + def compute_persistence(self, homology_coeff_field=11, min_persistence=0): + """This function computes the persistence of the complex, so it can be + accessed through :func:`persistent_betti_numbers`, + :func:`persistence_intervals_in_dimension`, etc. This function is + equivalent to :func:`persistence` when you do not want the list + :func:`persistence` returns. + + :param homology_coeff_field: The homology coefficient field. Must be a + prime number + :type homology_coeff_field: int. + :param min_persistence: The minimum persistence value to take into + account (strictly greater than min_persistence). Default value is + 0.0. + Sets min_persistence to -1.0 to see all values. + :type min_persistence: float. + :returns: Nothing. + """ + if self.pcohptr != NULL: + del self.pcohptr + assert self.__is_defined() + self.pcohptr = new Periodic_cubical_complex_persistence_interface(self.thisptr, True) + self.pcohptr.compute_persistence(homology_coeff_field, min_persistence) + def persistence(self, homology_coeff_field=11, min_persistence=0): - """This function returns the persistence of the complex. + """This function computes and returns the persistence of the complex. :param homology_coeff_field: The homology coefficient field. Must be a prime number @@ -149,31 +172,22 @@ cdef class PeriodicCubicalComplex: :returns: list of pairs(dimension, pair(birth, death)) -- the persistence of the complex. """ - if self.pcohptr != NULL: - del self.pcohptr - if self.thisptr != NULL: - self.pcohptr = new Periodic_cubical_complex_persistence_interface(self.thisptr, True) - cdef vector[pair[int, pair[double, double]]] persistence_result - if self.pcohptr != NULL: - self.pcohptr.compute_persistence(homology_coeff_field, min_persistence) - persistence_result = self.pcohptr.get_persistence() - return persistence_result + self.compute_persistence(homology_coeff_field, min_persistence) + return self.pcohptr.get_persistence() def betti_numbers(self): """This function returns the Betti numbers of the complex. :returns: list of int -- The Betti numbers ([B0, B1, ..., Bn]). - :note: betti_numbers function requires persistence function to be + :note: betti_numbers function requires :func:`compute_persistence` function to be launched first. - :note: betti_numbers function always returns [1, 0, 0, ...] as infinity + :note: This function always returns the Betti numbers of a torus as infinity filtration cubes are not removed from the complex. """ - cdef vector[int] bn_result - if self.pcohptr != NULL: - bn_result = self.pcohptr.betti_numbers() - return bn_result + assert self.pcohptr != NULL, "compute_persistence() must be called before betti_numbers()" + return self.pcohptr.betti_numbers() def persistent_betti_numbers(self, from_value, to_value): """This function returns the persistent Betti numbers of the complex. @@ -188,13 +202,11 @@ cdef class PeriodicCubicalComplex: :returns: list of int -- The persistent Betti numbers ([B0, B1, ..., Bn]). - :note: persistent_betti_numbers function requires persistence + :note: persistent_betti_numbers function requires :func:`compute_persistence` function to be launched first. """ - cdef vector[int] pbn_result - if self.pcohptr != NULL: - pbn_result = self.pcohptr.persistent_betti_numbers(from_value, to_value) - return pbn_result + assert self.pcohptr != NULL, "compute_persistence() must be called before persistent_betti_numbers()" + return self.pcohptr.persistent_betti_numbers(from_value, to_value) def persistence_intervals_in_dimension(self, dimension): """This function returns the persistence intervals of the complex in a @@ -205,13 +217,8 @@ cdef class PeriodicCubicalComplex: :returns: The persistence intervals. :rtype: numpy array of dimension 2 - :note: intervals_in_dim function requires persistence function to be + :note: intervals_in_dim function requires :func:`compute_persistence` function to be launched first. """ - cdef vector[pair[double,double]] intervals_result - if self.pcohptr != NULL: - intervals_result = self.pcohptr.intervals_in_dimension(dimension) - else: - print("intervals_in_dim function requires persistence function" - " to be launched first.", file=sys.stderr) - return np.array(intervals_result) + assert self.pcohptr != NULL, "compute_persistence() must be called before persistence_intervals_in_dimension()" + return np.array(self.pcohptr.intervals_in_dimension(dimension)) -- cgit v1.2.3 From 173506323471cf5175ea2b340abec63968c5cd5f Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 6 Apr 2020 16:51:32 +0200 Subject: Use compute_persistence in an example --- .../example/alpha_rips_persistence_bottleneck_distance.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/python/example/alpha_rips_persistence_bottleneck_distance.py b/src/python/example/alpha_rips_persistence_bottleneck_distance.py index f156826d..3e12b0d5 100755 --- a/src/python/example/alpha_rips_persistence_bottleneck_distance.py +++ b/src/python/example/alpha_rips_persistence_bottleneck_distance.py @@ -5,6 +5,7 @@ import argparse import math import errno import os +import numpy as np """ This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. @@ -56,7 +57,7 @@ with open(args.file, "r") as f: message = "Number of simplices=" + repr(rips_stree.num_simplices()) print(message) - rips_diag = rips_stree.persistence() + rips_stree.compute_persistence() print("##############################################################") print("AlphaComplex creation from points read in a OFF file") @@ -72,18 +73,13 @@ with open(args.file, "r") as f: message = "Number of simplices=" + repr(alpha_stree.num_simplices()) print(message) - alpha_diag = alpha_stree.persistence() + alpha_stree.compute_persistence() max_b_distance = 0.0 for dim in range(args.max_dimension): # Alpha persistence values needs to be transform because filtration # values are alpha square values - funcs = [math.sqrt, math.sqrt] - alpha_intervals = [] - for interval in alpha_stree.persistence_intervals_in_dimension(dim): - alpha_intervals.append( - map(lambda func, value: func(value), funcs, interval) - ) + alpha_intervals = np.sqrt(alpha_stree.persistence_intervals_in_dimension(dim)) rips_intervals = rips_stree.persistence_intervals_in_dimension(dim) bottleneck_distance = gudhi.bottleneck_distance( -- cgit v1.2.3 From dd96965e521313b6210391f511c82cced9b2a950 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 6 Apr 2020 19:37:58 +0200 Subject: Remove trailing whitespace --- src/python/doc/wasserstein_distance_user.rst | 72 +++++++++++++------------- src/python/gudhi/wasserstein/barycenter.py | 42 +++++++-------- src/python/gudhi/wasserstein/wasserstein.py | 14 ++--- src/python/test/test_wasserstein_barycenter.py | 6 +-- src/python/test/test_wasserstein_distance.py | 2 +- 5 files changed, 68 insertions(+), 68 deletions(-) (limited to 'src') diff --git a/src/python/doc/wasserstein_distance_user.rst b/src/python/doc/wasserstein_distance_user.rst index b821b6fa..c24da74d 100644 --- a/src/python/doc/wasserstein_distance_user.rst +++ b/src/python/doc/wasserstein_distance_user.rst @@ -10,10 +10,10 @@ Definition .. include:: wasserstein_distance_sum.inc The q-Wasserstein distance is defined as the minimal value achieved -by a perfect matching between the points of the two diagrams (+ all -diagonal points), where the value of a matching is defined as the +by a perfect matching between the points of the two diagrams (+ all +diagonal points), where the value of a matching is defined as the q-th root of the sum of all edge lengths to the power q. Edge lengths -are measured in norm p, for :math:`1 \leq p \leq \infty`. +are measured in norm p, for :math:`1 \leq p \leq \infty`. Distance Functions ------------------ @@ -54,9 +54,9 @@ The output is: Wasserstein distance value = 1.45 -We can also have access to the optimal matching by letting `matching=True`. +We can also have access to the optimal matching by letting `matching=True`. It is encoded as a list of indices (i,j), meaning that the i-th point in X -is mapped to the j-th point in Y. +is mapped to the j-th point in Y. An index of -1 represents the diagonal. .. testcode:: @@ -84,7 +84,7 @@ An index of -1 represents the diagonal. The output is: .. testoutput:: - + Wasserstein distance value = 2.15 point 0 in dgm1 is matched to point 0 in dgm2 point 1 in dgm1 is matched to point 2 in dgm2 @@ -94,32 +94,32 @@ The output is: Barycenters ----------- -A Frechet mean (or barycenter) is a generalization of the arithmetic -mean in a non linear space such as the one of persistence diagrams. -Given a set of persistence diagrams :math:`\mu_1 \dots \mu_n`, it is -defined as a minimizer of the variance functional, that is of -:math:`\mu \mapsto \sum_{i=1}^n d_2(\mu,\mu_i)^2`. -where :math:`d_2` denotes the Wasserstein-2 distance between -persistence diagrams. -It is known to exist and is generically unique. However, an exact -computation is in general untractable. Current implementation -available is based on (Turner et al., 2014), +A Frechet mean (or barycenter) is a generalization of the arithmetic +mean in a non linear space such as the one of persistence diagrams. +Given a set of persistence diagrams :math:`\mu_1 \dots \mu_n`, it is +defined as a minimizer of the variance functional, that is of +:math:`\mu \mapsto \sum_{i=1}^n d_2(\mu,\mu_i)^2`. +where :math:`d_2` denotes the Wasserstein-2 distance between +persistence diagrams. +It is known to exist and is generically unique. However, an exact +computation is in general untractable. Current implementation +available is based on (Turner et al., 2014), :cite:`turner2014frechet` -and uses an EM-scheme to -provide a local minimum of the variance functional (somewhat similar -to the Lloyd algorithm to estimate a solution to the k-means +and uses an EM-scheme to +provide a local minimum of the variance functional (somewhat similar +to the Lloyd algorithm to estimate a solution to the k-means problem). The local minimum returned depends on the initialization of -the barycenter. -The combinatorial structure of the algorithm limits its -performances on large scale problems (thousands of diagrams and of points -per diagram). +the barycenter. +The combinatorial structure of the algorithm limits its +performances on large scale problems (thousands of diagrams and of points +per diagram). + +.. figure:: + ./img/barycenter.png + :figclass: align-center -.. figure:: - ./img/barycenter.png - :figclass: align-center - - Illustration of Frechet mean between persistence - diagrams. + Illustration of Frechet mean between persistence + diagrams. .. autofunction:: gudhi.wasserstein.barycenter.lagrangian_barycenter @@ -127,16 +127,16 @@ per diagram). Basic example ************* -This example estimates the Frechet mean (aka Wasserstein barycenter) between +This example estimates the Frechet mean (aka Wasserstein barycenter) between four persistence diagrams. It is initialized on the 4th diagram. -As the algorithm is not convex, its output depends on the initialization and +As the algorithm is not convex, its output depends on the initialization and is only a local minimum of the objective function. -Initialization can be either given as an integer (in which case the i-th -diagram of the list is used as initial estimate) or as a diagram. -If None, it will randomly select one of the diagrams of the list +Initialization can be either given as an integer (in which case the i-th +diagram of the list is used as initial estimate) or as a diagram. +If None, it will randomly select one of the diagrams of the list as initial estimate. -Note that persistence diagrams must be submitted as +Note that persistence diagrams must be submitted as (n x 2) numpy arrays and must not contain inf values. @@ -152,7 +152,7 @@ Note that persistence diagrams must be submitted as pdiagset = [dg1, dg2, dg3, dg4] bary = lagrangian_barycenter(pdiagset=pdiagset,init=3) - message = "Wasserstein barycenter estimated:" + message = "Wasserstein barycenter estimated:" print(message) print(bary) diff --git a/src/python/gudhi/wasserstein/barycenter.py b/src/python/gudhi/wasserstein/barycenter.py index 99f29a1e..de7aea81 100644 --- a/src/python/gudhi/wasserstein/barycenter.py +++ b/src/python/gudhi/wasserstein/barycenter.py @@ -18,7 +18,7 @@ from gudhi.wasserstein import wasserstein_distance def _mean(x, m): ''' :param x: a list of 2D-points, off diagonal, x_0... x_{k-1} - :param m: total amount of points taken into account, + :param m: total amount of points taken into account, that is we have (m-k) copies of diagonal :returns: the weighted mean of x with (m-k) copies of the diagonal ''' @@ -33,14 +33,14 @@ def _mean(x, m): def lagrangian_barycenter(pdiagset, init=None, verbose=False): ''' - :param pdiagset: a list of ``numpy.array`` of shape `(n x 2)` - (`n` can variate), encoding a set of - persistence diagrams with only finite coordinates. - :param init: The initial value for barycenter estimate. - If ``None``, init is made on a random diagram from the dataset. - Otherwise, it can be an ``int`` + :param pdiagset: a list of ``numpy.array`` of shape `(n x 2)` + (`n` can variate), encoding a set of + persistence diagrams with only finite coordinates. + :param init: The initial value for barycenter estimate. + If ``None``, init is made on a random diagram from the dataset. + Otherwise, it can be an ``int`` (then initialization is made on ``pdiagset[init]``) - or a `(n x 2)` ``numpy.array`` enconding + or a `(n x 2)` ``numpy.array`` enconding a persistence diagram with `n` points. :type init: ``int``, or (n x 2) ``np.array`` :param verbose: if ``True``, returns additional information about the @@ -48,16 +48,16 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False): :type verbose: boolean :returns: If not verbose (default), a ``numpy.array`` encoding the barycenter estimate of pdiagset - (local minimum of the energy function). + (local minimum of the energy function). If ``pdiagset`` is empty, returns ``None``. If verbose, returns a couple ``(Y, log)`` where ``Y`` is the barycenter estimate, and ``log`` is a ``dict`` that contains additional informations: - `"groupings"`, a list of list of pairs ``(i,j)``. - Namely, ``G[k] = [...(i, j)...]``, where ``(i,j)`` indicates + Namely, ``G[k] = [...(i, j)...]``, where ``(i,j)`` indicates that ``pdiagset[k][i]`` is matched to ``Y[j]`` - if ``i = -1`` or ``j = -1``, it means they + if ``i = -1`` or ``j = -1``, it means they represent the diagonal. - `"energy"`, ``float`` representing the Frechet energy value obtained. @@ -70,13 +70,13 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False): if m == 0: print("Warning: computing barycenter of empty diag set. Returns None") return None - + # store the number of off-diagonal point for each of the X_i - nb_off_diag = np.array([len(X_i) for X_i in X]) + nb_off_diag = np.array([len(X_i) for X_i in X]) # Initialisation of barycenter if init is None: i0 = np.random.randint(m) # Index of first state for the barycenter - Y = X[i0].copy() + Y = X[i0].copy() else: if type(init)==int: Y = X[init].copy() @@ -90,8 +90,8 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False): nb_iter += 1 K = len(Y) # current nb of points in Y (some might be on diagonal) G = np.full((K, m), -1, dtype=int) # will store for each j, the (index) - # point matched in each other diagram - #(might be the diagonal). + # point matched in each other diagram + #(might be the diagonal). # that is G[j, i] = k <=> y_j is matched to # x_k in the diagram i-th diagram X[i] updated_points = np.zeros((K, 2)) # will store the new positions of @@ -111,7 +111,7 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False): else: # ...which is a diagonal point G[y_j, i] = -1 # -1 stands for the diagonal (mask) else: # We matched a diagonal point to x_i_j... - if x_i_j >= 0: # which is a off-diag point ! + if x_i_j >= 0: # which is a off-diag point ! # need to create new point in Y new_y = _mean(np.array([X[i][x_i_j]]), m) # Average this point with (m-1) copies of Delta @@ -123,19 +123,19 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False): matched_points = [X[i][G[j, i]] for i in range(m) if G[j, i] > -1] new_y_j = _mean(matched_points, m) if not np.array_equal(new_y_j, np.array([0,0])): - updated_points[j] = new_y_j + updated_points[j] = new_y_j else: # this points is no longer of any use. to_delete.append(j) # we remove the point to be deleted now. - updated_points = np.delete(updated_points, to_delete, axis=0) + updated_points = np.delete(updated_points, to_delete, axis=0) # we cannot converge if there have been new created points. - if new_created_points: + if new_created_points: Y = np.concatenate((updated_points, new_created_points)) else: # Step 3 : we check convergence if np.array_equal(updated_points, Y): - converged = True + converged = True Y = updated_points diff --git a/src/python/gudhi/wasserstein/wasserstein.py b/src/python/gudhi/wasserstein/wasserstein.py index e1233eec..35315939 100644 --- a/src/python/gudhi/wasserstein/wasserstein.py +++ b/src/python/gudhi/wasserstein/wasserstein.py @@ -30,9 +30,9 @@ def _build_dist_matrix(X, Y, order=2., internal_p=2.): :param Y: (m x 2) numpy.array encoding the second diagram. :param order: exponent for the Wasserstein metric. :param internal_p: Ground metric (i.e. norm L^p). - :returns: (n+1) x (m+1) np.array encoding the cost matrix C. - For 0 <= i < n, 0 <= j < m, C[i,j] encodes the distance between X[i] and Y[j], - while C[i, m] (resp. C[n, j]) encodes the distance (to the p) between X[i] (resp Y[j]) + :returns: (n+1) x (m+1) np.array encoding the cost matrix C. + For 0 <= i < n, 0 <= j < m, C[i,j] encodes the distance between X[i] and Y[j], + while C[i, m] (resp. C[n, j]) encodes the distance (to the p) between X[i] (resp Y[j]) and its orthogonal projection onto the diagonal. note also that C[n, m] = 0 (it costs nothing to move from the diagonal to the diagonal). ''' @@ -59,7 +59,7 @@ def _perstot(X, order, internal_p): :param X: (n x 2) numpy.array (points of a given diagram). :param order: exponent for Wasserstein. Default value is 2. :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); Default value is 2 (Euclidean norm). - :returns: float, the total persistence of the diagram (that is, its distance to the empty diagram). + :returns: float, the total persistence of the diagram (that is, its distance to the empty diagram). ''' Xdiag = _proj_on_diag(X) return (np.sum(np.linalg.norm(X - Xdiag, ord=internal_p, axis=1)**order))**(1./order) @@ -67,16 +67,16 @@ def _perstot(X, order, internal_p): def wasserstein_distance(X, Y, matching=False, order=2., internal_p=2.): ''' - :param X: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points + :param X: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points (i.e. with infinite coordinate). :param Y: (m x 2) numpy.array encoding the second diagram. :param matching: if True, computes and returns the optimal matching between X and Y, encoded as a (n x 2) np.array [...[i,j]...], meaning the i-th point in X is matched to the j-th point in Y, with the convention (-1) represents the diagonal. :param order: exponent for Wasserstein; Default value is 2. - :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); + :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); Default value is 2 (Euclidean norm). - :returns: the Wasserstein distance of order q (1 <= q < infinity) between persistence diagrams with + :returns: the Wasserstein distance of order q (1 <= q < infinity) between persistence diagrams with respect to the internal_p-norm as ground metric. If matching is set to True, also returns the optimal matching between X and Y. ''' diff --git a/src/python/test/test_wasserstein_barycenter.py b/src/python/test/test_wasserstein_barycenter.py index f686aef5..f68c748e 100755 --- a/src/python/test/test_wasserstein_barycenter.py +++ b/src/python/test/test_wasserstein_barycenter.py @@ -17,7 +17,7 @@ __license__ = "MIT" def test_lagrangian_barycenter(): - + dg1 = np.array([[0.2, 0.5]]) dg2 = np.array([[0.2, 0.7]]) dg3 = np.array([[0.3, 0.6], [0.7, 0.8], [0.2, 0.3]]) @@ -28,12 +28,12 @@ def test_lagrangian_barycenter(): dg7 = np.array([[0.1, 0.15], [0.1, 0.7], [0.2, 0.22], [0.55, 0.84], [0.11, 0.91], [0.61, 0.75], [0.33, 0.46], [0.12, 0.41], [0.32, 0.48]]) dg8 = np.array([[0., 4.], [4, 8]]) - + # error crit. eps = 1e-7 - assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg1, dg2, dg3, dg4],init=3, verbose=False) - res) < eps + assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg1, dg2, dg3, dg4],init=3, verbose=False) - res) < eps assert np.array_equal(lagrangian_barycenter(pdiagset=[dg4, dg5, dg6], verbose=False), np.empty(shape=(0,2))) assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg7], verbose=False) - dg7) < eps Y, log = lagrangian_barycenter(pdiagset=[dg4, dg8], verbose=True) diff --git a/src/python/test/test_wasserstein_distance.py b/src/python/test/test_wasserstein_distance.py index 0d70e11a..7e0d0f5f 100755 --- a/src/python/test/test_wasserstein_distance.py +++ b/src/python/test/test_wasserstein_distance.py @@ -70,7 +70,7 @@ def _basic_wasserstein(wasserstein_distance, delta, test_infinity=True, test_mat assert np.array_equal(match , [[0, -1], [1, -1]]) match = wasserstein_distance(diag1, diag2, matching=True, internal_p=2., order=2.)[1] assert np.array_equal(match, [[0, 0], [1, 1], [2, -1]]) - + def hera_wrap(delta): -- cgit v1.2.3 From 82dd4481fa0ecb8c1f696ee33e26d9be1e371e88 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 6 Apr 2020 22:46:32 +0200 Subject: Document dependencies for building the doc --- src/python/doc/installation.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/python/doc/installation.rst b/src/python/doc/installation.rst index d459145b..48425d5e 100644 --- a/src/python/doc/installation.rst +++ b/src/python/doc/installation.rst @@ -175,8 +175,8 @@ Documentation To build the documentation, `sphinx-doc `_ and `sphinxcontrib-bibtex `_ are required. As the documentation is auto-tested, `CGAL`_, `Eigen`_, -`Matplotlib`_, `NumPy`_ and `SciPy`_ are also mandatory to build the -documentation. +`Matplotlib`_, `NumPy`_, `POT`_, `Scikit-learn`_ and `SciPy`_ are +also mandatory to build the documentation. Run the following commands in a terminal: @@ -192,8 +192,8 @@ CGAL ==== Some GUDHI modules (cf. :doc:`modules list `), and few examples -require CGAL, a C++ library that provides easy access to efficient and -reliable geometric algorithms. +require `CGAL `_, a C++ library that provides easy +access to efficient and reliable geometric algorithms. The procedure to install this library -- cgit v1.2.3 From f9a933862050ca95b3a96d7a8572d62f7f2205a9 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sat, 11 Apr 2020 18:18:14 +0200 Subject: Use longer names --- src/python/gudhi/point_cloud/dtm.py | 10 +++-- src/python/gudhi/point_cloud/knn.py | 2 +- src/python/test/test_dtm.py | 18 ++++----- src/python/test/test_knn.py | 76 +++++++++++++++++++++++++++---------- 4 files changed, 71 insertions(+), 35 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py index 23c36b88..38368f29 100644 --- a/src/python/gudhi/point_cloud/dtm.py +++ b/src/python/gudhi/point_cloud/dtm.py @@ -7,10 +7,10 @@ # Modification(s): # - YYYY/MM Author: Description of the modification -from .knn import KNN +from .knn import KNearestNeighbors -class DTM: +class DistanceToMeasure: """ Class to compute the distance to the empirical measure defined by a point set, as introduced in :cite:`dtm`. """ @@ -20,7 +20,7 @@ class DTM: Args: k (int): number of neighbors (possibly including the point itself). q (float): order used to compute the distance to measure. Defaults to 2. - kwargs: same parameters as :class:`~gudhi.point_cloud.knn.KNN`, except that metric="neighbors" means that :func:`transform` expects an array with the distances to the k nearest neighbors. + kwargs: same parameters as :class:`~gudhi.point_cloud.knn.KNearestNeighbors`, except that metric="neighbors" means that :func:`transform` expects an array with the distances to the k nearest neighbors. """ self.k = k self.q = q @@ -35,7 +35,9 @@ class DTM: X (numpy.array): coordinates for mass points. """ if self.params.setdefault("metric", "euclidean") != "neighbors": - self.knn = KNN(self.k, return_index=False, return_distance=True, sort_results=False, **self.params) + self.knn = KNearestNeighbors( + self.k, return_index=False, return_distance=True, sort_results=False, **self.params + ) self.knn.fit(X) return self diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index 8369f1f8..6642a3c2 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -10,7 +10,7 @@ import numpy -class KNN: +class KNearestNeighbors: """ Class wrapping several implementations for computing the k nearest neighbors in a point set. """ diff --git a/src/python/test/test_dtm.py b/src/python/test/test_dtm.py index 93b13e1a..37934fdb 100755 --- a/src/python/test/test_dtm.py +++ b/src/python/test/test_dtm.py @@ -8,7 +8,7 @@ - YYYY/MM Author: Description of the modification """ -from gudhi.point_cloud.dtm import DTM +from gudhi.point_cloud.dtm import DistanceToMeasure import numpy import pytest @@ -16,35 +16,35 @@ import pytest def test_dtm_compare_euclidean(): pts = numpy.random.rand(1000, 4) k = 3 - dtm = DTM(k, implementation="ckdtree") + dtm = DistanceToMeasure(k, implementation="ckdtree") r0 = dtm.fit_transform(pts) - dtm = DTM(k, implementation="sklearn") + dtm = DistanceToMeasure(k, implementation="sklearn") r1 = dtm.fit_transform(pts) assert r1 == pytest.approx(r0) - dtm = DTM(k, implementation="sklearn", algorithm="brute") + dtm = DistanceToMeasure(k, implementation="sklearn", algorithm="brute") r2 = dtm.fit_transform(pts) assert r2 == pytest.approx(r0) - dtm = DTM(k, implementation="hnsw") + dtm = DistanceToMeasure(k, implementation="hnsw") r3 = dtm.fit_transform(pts) assert r3 == pytest.approx(r0) from scipy.spatial.distance import cdist d = cdist(pts, pts) - dtm = DTM(k, metric="precomputed") + dtm = DistanceToMeasure(k, metric="precomputed") r4 = dtm.fit_transform(d) assert r4 == pytest.approx(r0) - dtm = DTM(k, implementation="keops") + dtm = DistanceToMeasure(k, implementation="keops") r5 = dtm.fit_transform(pts) assert r5 == pytest.approx(r0) def test_dtm_precomputed(): dist = numpy.array([[1.0, 3, 8], [1, 5, 5], [0, 2, 3]]) - dtm = DTM(2, q=1, metric="neighbors") + dtm = DistanceToMeasure(2, q=1, metric="neighbors") r = dtm.fit_transform(dist) assert r == pytest.approx([2.0, 3, 1]) dist = numpy.array([[2.0, 2], [0, 1], [3, 4]]) - dtm = DTM(2, q=2, metric="neighbors") + dtm = DistanceToMeasure(2, q=2, metric="neighbors") r = dtm.fit_transform(dist) assert r == pytest.approx([2.0, 0.707, 3.5355], rel=0.01) diff --git a/src/python/test/test_knn.py b/src/python/test/test_knn.py index e455fb48..6aac2006 100755 --- a/src/python/test/test_knn.py +++ b/src/python/test/test_knn.py @@ -8,7 +8,7 @@ - YYYY/MM Author: Description of the modification """ -from gudhi.point_cloud.knn import KNN +from gudhi.point_cloud.knn import KNearestNeighbors import numpy as np import pytest @@ -16,39 +16,39 @@ import pytest def test_knn_explicit(): base = np.array([[1.0, 1], [1, 2], [4, 2], [4, 3]]) query = np.array([[1.0, 1], [2, 2], [4, 4]]) - knn = KNN(2, metric="manhattan", return_distance=True, return_index=True) + knn = KNearestNeighbors(2, metric="manhattan", return_distance=True, return_index=True) knn.fit(base) r = knn.transform(query) assert r[0] == pytest.approx(np.array([[0, 1], [1, 0], [3, 2]])) assert r[1] == pytest.approx(np.array([[0.0, 1], [1, 2], [1, 2]])) - knn = KNN(2, metric="chebyshev", return_distance=True, return_index=False) + knn = KNearestNeighbors(2, metric="chebyshev", return_distance=True, return_index=False) knn.fit(base) r = knn.transform(query) assert r == pytest.approx(np.array([[0.0, 1], [1, 1], [1, 2]])) r = ( - KNN(2, metric="chebyshev", return_distance=True, return_index=False, implementation="keops") + KNearestNeighbors(2, metric="chebyshev", return_distance=True, return_index=False, implementation="keops") .fit(base) .transform(query) ) assert r == pytest.approx(np.array([[0.0, 1], [1, 1], [1, 2]])) - knn = KNN(2, metric="minkowski", p=3, return_distance=False, return_index=True) + knn = KNearestNeighbors(2, metric="minkowski", p=3, return_distance=False, return_index=True) knn.fit(base) r = knn.transform(query) assert np.array_equal(r, [[0, 1], [1, 0], [3, 2]]) r = ( - KNN(2, metric="minkowski", p=3, return_distance=False, return_index=True, implementation="keops") + KNearestNeighbors(2, metric="minkowski", p=3, return_distance=False, return_index=True, implementation="keops") .fit(base) .transform(query) ) assert np.array_equal(r, [[0, 1], [1, 0], [3, 2]]) dist = np.array([[0.0, 3, 8], [1, 0, 5], [1, 2, 0]]) - knn = KNN(2, metric="precomputed", return_index=True, return_distance=False) + knn = KNearestNeighbors(2, metric="precomputed", return_index=True, return_distance=False) r = knn.fit_transform(dist) assert np.array_equal(r, [[0, 1], [1, 0], [2, 0]]) - knn = KNN(2, metric="precomputed", return_index=True, return_distance=True) + knn = KNearestNeighbors(2, metric="precomputed", return_index=True, return_distance=True) r = knn.fit_transform(dist) assert np.array_equal(r[0], [[0, 1], [1, 0], [2, 0]]) assert np.array_equal(r[1], [[0, 3], [0, 1], [0, 1]]) @@ -57,16 +57,40 @@ def test_knn_explicit(): def test_knn_compare(): base = np.array([[1.0, 1], [1, 2], [4, 2], [4, 3]]) query = np.array([[1.0, 1], [2, 2], [4, 4]]) - r0 = KNN(2, implementation="ckdtree", return_index=True, return_distance=False).fit(base).transform(query) - r1 = KNN(2, implementation="sklearn", return_index=True, return_distance=False).fit(base).transform(query) - r2 = KNN(2, implementation="hnsw", return_index=True, return_distance=False).fit(base).transform(query) - r3 = KNN(2, implementation="keops", return_index=True, return_distance=False).fit(base).transform(query) + r0 = ( + KNearestNeighbors(2, implementation="ckdtree", return_index=True, return_distance=False) + .fit(base) + .transform(query) + ) + r1 = ( + KNearestNeighbors(2, implementation="sklearn", return_index=True, return_distance=False) + .fit(base) + .transform(query) + ) + r2 = ( + KNearestNeighbors(2, implementation="hnsw", return_index=True, return_distance=False).fit(base).transform(query) + ) + r3 = ( + KNearestNeighbors(2, implementation="keops", return_index=True, return_distance=False) + .fit(base) + .transform(query) + ) assert np.array_equal(r0, r1) and np.array_equal(r0, r2) and np.array_equal(r0, r3) - r0 = KNN(2, implementation="ckdtree", return_index=True, return_distance=True).fit(base).transform(query) - r1 = KNN(2, implementation="sklearn", return_index=True, return_distance=True).fit(base).transform(query) - r2 = KNN(2, implementation="hnsw", return_index=True, return_distance=True).fit(base).transform(query) - r3 = KNN(2, implementation="keops", return_index=True, return_distance=True).fit(base).transform(query) + r0 = ( + KNearestNeighbors(2, implementation="ckdtree", return_index=True, return_distance=True) + .fit(base) + .transform(query) + ) + r1 = ( + KNearestNeighbors(2, implementation="sklearn", return_index=True, return_distance=True) + .fit(base) + .transform(query) + ) + r2 = KNearestNeighbors(2, implementation="hnsw", return_index=True, return_distance=True).fit(base).transform(query) + r3 = ( + KNearestNeighbors(2, implementation="keops", return_index=True, return_distance=True).fit(base).transform(query) + ) assert np.array_equal(r0[0], r1[0]) and np.array_equal(r0[0], r2[0]) and np.array_equal(r0[0], r3[0]) d0 = pytest.approx(r0[1]) assert r1[1] == d0 and r2[1] == d0 and r3[1] == d0 @@ -75,8 +99,18 @@ def test_knn_compare(): def test_knn_nop(): # This doesn't look super useful... p = np.array([[0.0]]) - assert None is KNN(k=1, return_index=False, return_distance=False, implementation="sklearn").fit_transform(p) - assert None is KNN(k=1, return_index=False, return_distance=False, implementation="ckdtree").fit_transform(p) - assert None is KNN(k=1, return_index=False, return_distance=False, implementation="hnsw", ef=5).fit_transform(p) - assert None is KNN(k=1, return_index=False, return_distance=False, implementation="keops").fit_transform(p) - assert None is KNN(k=1, return_index=False, return_distance=False, metric="precomputed").fit_transform(p) + assert None is KNearestNeighbors( + k=1, return_index=False, return_distance=False, implementation="sklearn" + ).fit_transform(p) + assert None is KNearestNeighbors( + k=1, return_index=False, return_distance=False, implementation="ckdtree" + ).fit_transform(p) + assert None is KNearestNeighbors( + k=1, return_index=False, return_distance=False, implementation="hnsw", ef=5 + ).fit_transform(p) + assert None is KNearestNeighbors( + k=1, return_index=False, return_distance=False, implementation="keops" + ).fit_transform(p) + assert None is KNearestNeighbors( + k=1, return_index=False, return_distance=False, metric="precomputed" + ).fit_transform(p) -- cgit v1.2.3 From 83a1bc1fb6124a35d515f4836d2e830f3dbdf0e7 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sun, 12 Apr 2020 21:57:51 +0200 Subject: Parallelize the "precomputed" case of knn It is supposed to be possible to compile numpy with openmp, but it looks like it isn't done in any of the usual packages. It may be possible to refactor that code so there is less redundancy. --- src/python/gudhi/point_cloud/knn.py | 78 +++++++++++++++++++++++++++++-------- src/python/test/test_dtm.py | 3 ++ src/python/test/test_knn.py | 8 ++++ 3 files changed, 73 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index 6642a3c2..f6870517 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -115,25 +115,71 @@ class KNearestNeighbors: if metric == "precomputed": # scikit-learn could handle that, but they insist on calling fit() with an unused square array, which is too unnatural. - X = numpy.array(X) if self.return_index: - neighbors = numpy.argpartition(X, k - 1)[:, 0:k] - if self.params.get("sort_results", True): - X = numpy.take_along_axis(X, neighbors, axis=-1) - ngb_order = numpy.argsort(X, axis=-1) - neighbors = numpy.take_along_axis(neighbors, ngb_order, axis=-1) + n_jobs = self.params.get("n_jobs", 1) + # Supposedly numpy can be compiled with OpenMP and handle this, but nobody does that?! + if n_jobs == 1: + neighbors = numpy.argpartition(X, k - 1)[:, 0:k] + if self.params.get("sort_results", True): + X = numpy.take_along_axis(X, neighbors, axis=-1) + ngb_order = numpy.argsort(X, axis=-1) + neighbors = numpy.take_along_axis(neighbors, ngb_order, axis=-1) + else: + ngb_order = neighbors + if self.return_distance: + distances = numpy.take_along_axis(X, ngb_order, axis=-1) + return neighbors, distances + else: + return neighbors else: - ngb_order = neighbors - if self.return_distance: - distances = numpy.take_along_axis(X, ngb_order, axis=-1) - return neighbors, distances - else: - return neighbors + from joblib import Parallel, delayed, effective_n_jobs + from sklearn.utils import gen_even_slices + + slices = gen_even_slices(len(X), effective_n_jobs(-1)) + parallel = Parallel(backend="threading", n_jobs=-1) + if self.params.get("sort_results", True): + + def func(M): + neighbors = numpy.argpartition(M, k - 1)[:, 0:k] + Y = numpy.take_along_axis(M, neighbors, axis=-1) + ngb_order = numpy.argsort(Y, axis=-1) + return numpy.take_along_axis(neighbors, ngb_order, axis=-1) + + else: + + def func(M): + return numpy.argpartition(M, k - 1)[:, 0:k] + + neighbors = numpy.concatenate(parallel(delayed(func)(X[s]) for s in slices)) + if self.return_distance: + distances = numpy.take_along_axis(X, neighbors, axis=-1) + return neighbors, distances + else: + return neighbors if self.return_distance: - distances = numpy.partition(X, k - 1)[:, 0:k] - if self.params.get("sort_results"): - # partition is not guaranteed to sort the lower half, although it often does - distances.sort(axis=-1) + n_jobs = self.params.get("n_jobs", 1) + if n_jobs == 1: + distances = numpy.partition(X, k - 1)[:, 0:k] + if self.params.get("sort_results"): + # partition is not guaranteed to sort the lower half, although it often does + distances.sort(axis=-1) + else: + from joblib import Parallel, delayed, effective_n_jobs + from sklearn.utils import gen_even_slices + + if self.params.get("sort_results"): + + def func(M): + # Not partitioning in place, because we should not modify the user's array? + r = numpy.partition(M, k - 1)[:, 0:k] + r.sort(axis=-1) + return r + + else: + func = lambda M: numpy.partition(M, k - 1)[:, 0:k] + slices = gen_even_slices(len(X), effective_n_jobs(-1)) + parallel = Parallel(backend="threading", n_jobs=-1) + distances = numpy.concatenate(parallel(delayed(func)(X[s]) for s in slices)) return distances return None diff --git a/src/python/test/test_dtm.py b/src/python/test/test_dtm.py index 37934fdb..bc0d3698 100755 --- a/src/python/test/test_dtm.py +++ b/src/python/test/test_dtm.py @@ -33,6 +33,9 @@ def test_dtm_compare_euclidean(): dtm = DistanceToMeasure(k, metric="precomputed") r4 = dtm.fit_transform(d) assert r4 == pytest.approx(r0) + dtm = DistanceToMeasure(k, metric="precomputed", n_jobs=2) + r4b = dtm.fit_transform(d) + assert r4b == pytest.approx(r0) dtm = DistanceToMeasure(k, implementation="keops") r5 = dtm.fit_transform(pts) assert r5 == pytest.approx(r0) diff --git a/src/python/test/test_knn.py b/src/python/test/test_knn.py index 6aac2006..6269df54 100755 --- a/src/python/test/test_knn.py +++ b/src/python/test/test_knn.py @@ -52,6 +52,14 @@ def test_knn_explicit(): r = knn.fit_transform(dist) assert np.array_equal(r[0], [[0, 1], [1, 0], [2, 0]]) assert np.array_equal(r[1], [[0, 3], [0, 1], [0, 1]]) + # Second time in parallel + knn = KNearestNeighbors(2, metric="precomputed", return_index=True, return_distance=False, n_jobs=2) + r = knn.fit_transform(dist) + assert np.array_equal(r, [[0, 1], [1, 0], [2, 0]]) + knn = KNearestNeighbors(2, metric="precomputed", return_index=True, return_distance=True, n_jobs=2) + r = knn.fit_transform(dist) + assert np.array_equal(r[0], [[0, 1], [1, 0], [2, 0]]) + assert np.array_equal(r[1], [[0, 3], [0, 1], [0, 1]]) def test_knn_compare(): -- cgit v1.2.3 From 280eb9d2323837619db1ae013b929adb9b45013b Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 13 Apr 2020 01:09:45 +0200 Subject: enable_autodiff with keops This doesn't seem like the best way to handle it, we may want to handle it like a wrapper that gets the indices from knn (whatever backend) and then computes the distances. --- src/python/gudhi/point_cloud/knn.py | 33 +++++++++++++++++++++++++++++---- src/python/test/test_dtm.py | 8 ++++++++ src/python/test/test_knn.py | 6 ++++++ 3 files changed, 43 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index f6870517..79362c09 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -36,6 +36,9 @@ class KNearestNeighbors: sort_results (bool): if True, then distances and indices of each point are sorted on return, so that the first column contains the closest points. Otherwise, neighbors are returned in an arbitrary order. Defaults to True. + enable_autodiff (bool): if the input is a torch.tensor, jax.numpy.array or similar, this instructs + the function to compute distances in a way that works with automatic differentiation. + This is experimental and not supported for all implementations. kwargs: additional parameters are forwarded to the backends. """ self.k = k @@ -202,13 +205,18 @@ class KNearestNeighbors: if self.params["implementation"] == "keops": import torch from pykeops.torch import LazyTensor + import eagerpy as ep # 'float64' is slow except on super expensive GPUs. Allow it with some param? - XX = torch.tensor(X, dtype=torch.float32) - if X is self.ref_points: + queries = X + X = ep.astensor(X) + XX = torch.as_tensor(X.numpy(), dtype=torch.float32) + if queries is self.ref_points: + Y = X YY = XX else: - YY = torch.tensor(self.ref_points, dtype=torch.float32) + Y = ep.astensor(self.ref_points) + YY = torch.as_tensor(Y.numpy(), dtype=torch.float32) p = self.params["p"] if p == numpy.inf: @@ -219,6 +227,24 @@ class KNearestNeighbors: else: mat = ((LazyTensor(XX[:, None, :]) - LazyTensor(YY[None, :, :])).abs() ** p).sum(-1) + # pykeops does not support autodiff for kmin yet :-( + if self.params.get("enable_autodiff", False) and self.return_distance: + # Compute the indices of the neighbors, and recompute the relevant distances autodiff-friendly. + # Another strategy would be to compute the whole distance matrix with torch.cdist + # and use neighbors as indices into it. + neighbors = ep.astensor(mat.argKmin(k, dim=1)).numpy() + neighbor_pts = Y[neighbors] + diff = neighbor_pts - X[:, None, :] + if p == numpy.inf: + distances = diff.abs().max(-1) + elif p == 2: + distances = (diff ** 2).sum(-1) ** 0.5 + else: + distances = (diff.abs() ** p).sum(-1) ** (1.0 / p) + if self.return_index: + return neighbors.raw, distances.raw + else: + return distances.raw if self.return_index: if self.return_distance: distances, neighbors = mat.Kmin_argKmin(k, dim=1) @@ -234,7 +260,6 @@ class KNearestNeighbors: distances = distances ** (1.0 / p) return distances return None - # FIXME: convert everything back to numpy arrays or not? if self.params["implementation"] == "ckdtree": qargs = {key: val for key, val in self.params.items() if key in {"p", "eps", "n_jobs"}} diff --git a/src/python/test/test_dtm.py b/src/python/test/test_dtm.py index bc0d3698..8709dd07 100755 --- a/src/python/test/test_dtm.py +++ b/src/python/test/test_dtm.py @@ -11,6 +11,7 @@ from gudhi.point_cloud.dtm import DistanceToMeasure import numpy import pytest +import torch def test_dtm_compare_euclidean(): @@ -39,6 +40,13 @@ def test_dtm_compare_euclidean(): dtm = DistanceToMeasure(k, implementation="keops") r5 = dtm.fit_transform(pts) assert r5 == pytest.approx(r0) + pts2 = torch.tensor(pts, requires_grad=True) + assert pts2.grad is None + dtm = DistanceToMeasure(k, implementation="keops", enable_autodiff=True) + r6 = dtm.fit_transform(pts2) + assert r6.detach().numpy() == pytest.approx(r0) + r6.sum().backward() + assert pts2.grad is not None def test_dtm_precomputed(): diff --git a/src/python/test/test_knn.py b/src/python/test/test_knn.py index 6269df54..415c9d48 100755 --- a/src/python/test/test_knn.py +++ b/src/python/test/test_knn.py @@ -32,6 +32,12 @@ def test_knn_explicit(): .transform(query) ) assert r == pytest.approx(np.array([[0.0, 1], [1, 1], [1, 2]])) + r = ( + KNearestNeighbors(2, metric="chebyshev", return_distance=True, return_index=False, implementation="keops", enable_autodiff=True) + .fit(base) + .transform(query) + ) + assert r == pytest.approx(np.array([[0.0, 1], [1, 1], [1, 2]])) knn = KNearestNeighbors(2, metric="minkowski", p=3, return_distance=False, return_index=True) knn.fit(base) -- cgit v1.2.3 From 2f1576a23cf4ac055565875d384ca604c0ff6844 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 13 Apr 2020 15:01:51 +0200 Subject: Small autodiff tweaks --- src/python/gudhi/point_cloud/knn.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index 79362c09..ab3447d4 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -233,16 +233,17 @@ class KNearestNeighbors: # Another strategy would be to compute the whole distance matrix with torch.cdist # and use neighbors as indices into it. neighbors = ep.astensor(mat.argKmin(k, dim=1)).numpy() - neighbor_pts = Y[neighbors] + # Work around https://github.com/pytorch/pytorch/issues/34452 + neighbor_pts = Y[neighbors,] diff = neighbor_pts - X[:, None, :] if p == numpy.inf: distances = diff.abs().max(-1) elif p == 2: - distances = (diff ** 2).sum(-1) ** 0.5 + distances = (diff ** 2).sum(-1).sqrt() else: distances = (diff.abs() ** p).sum(-1) ** (1.0 / p) if self.return_index: - return neighbors.raw, distances.raw + return neighbors, distances.raw else: return distances.raw if self.return_index: -- cgit v1.2.3 From 3a86402b733a48d9c25a4995325e72c7438c06c0 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 13 Apr 2020 15:21:06 +0200 Subject: Fix NaN gradient with pytorch --- src/python/gudhi/point_cloud/knn.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index ab3447d4..185a7764 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -236,12 +236,11 @@ class KNearestNeighbors: # Work around https://github.com/pytorch/pytorch/issues/34452 neighbor_pts = Y[neighbors,] diff = neighbor_pts - X[:, None, :] - if p == numpy.inf: - distances = diff.abs().max(-1) - elif p == 2: - distances = (diff ** 2).sum(-1).sqrt() + if isinstance(diff, ep.PyTorchTensor): + # https://github.com/jonasrauber/eagerpy/issues/6 + distances = ep.astensor(diff.raw.norm(p, -1)) else: - distances = (diff.abs() ** p).sum(-1) ** (1.0 / p) + distances = diff.norms.lp(p, -1) if self.return_index: return neighbors, distances.raw else: -- cgit v1.2.3 From 3afce326428dddd638e22ab37ee4b2afe52eba75 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 13 Apr 2020 20:32:39 +0200 Subject: Generalize enable_autodiff to more implementations Still limited to L^p --- src/python/gudhi/point_cloud/knn.py | 76 +++++++++++++++++++++++++++---------- 1 file changed, 55 insertions(+), 21 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index 185a7764..87b2798e 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -9,6 +9,7 @@ import numpy +# TODO: https://github.com/facebookresearch/faiss class KNearestNeighbors: """ @@ -67,6 +68,8 @@ class KNearestNeighbors: self.params["implementation"] = "ckdtree" else: self.params["implementation"] = "sklearn" + if not return_distance: + self.params["enable_autodiff"] = False def fit_transform(self, X, y=None): return self.fit(X).transform(X) @@ -77,6 +80,10 @@ class KNearestNeighbors: X (numpy.array): coordinates for reference points. """ self.ref_points = X + if self.params.get("enable_autodiff", False): + import eagerpy as ep + if self.params["implementation"] != "keops" or not isinstance(X, ep.PyTorchTensor): + X = ep.astensor(X).numpy() if self.params["implementation"] == "ckdtree": # sklearn could handle this, but it is much slower from scipy.spatial import cKDTree @@ -113,6 +120,41 @@ class KNearestNeighbors: Args: X (numpy.array): coordinates for query points, or distance matrix if metric is "precomputed". """ + if self.params.get("enable_autodiff", False): + # pykeops does not support autodiff for kmin yet, but when it does in the future, + # we may want a special path. + import eagerpy as ep + save_return_index = self.return_index + self.return_index = True + self.return_distance = False + self.params["enable_autodiff"] = False + try: + # FIXME: how do we test "X is ref_points" then? + newX = ep.astensor(X) + if self.params["implementation"] != "keops" or not isinstance(newX, ep.PyTorchTensor): + newX = newX.numpy() + neighbors = self.transform(newX) + finally: + self.return_index = save_return_index + self.return_distance = True + self.params["enable_autodiff"] = True + # We can implement more later as needed + assert self.metric == "minkowski" + p = self.params["p"] + Y = ep.astensor(self.ref_points) + neighbor_pts = Y[neighbors,] + diff = neighbor_pts - X[:, None, :] + if isinstance(diff, ep.PyTorchTensor): + # https://github.com/jonasrauber/eagerpy/issues/6 + distances = ep.astensor(diff.raw.norm(p, -1)) + else: + distances = diff.norms.lp(p, -1) + if self.return_index: + return neighbors, distances.raw + else: + return distances.raw + + metric = self.metric k = self.k @@ -207,16 +249,26 @@ class KNearestNeighbors: from pykeops.torch import LazyTensor import eagerpy as ep - # 'float64' is slow except on super expensive GPUs. Allow it with some param? queries = X X = ep.astensor(X) - XX = torch.as_tensor(X.numpy(), dtype=torch.float32) + if isinstance(X, ep.PyTorchTensor): + XX = X.raw + else: + # I don't know a clever way to reuse a GPU tensor from tensorflow in pytorch + # without copying to/from the CPU. + XX = X.numpy() + # 'float64' is slow except on super expensive GPUs. Allow it with some param? + XX = torch.as_tensor(XX, dtype=torch.float32) if queries is self.ref_points: Y = X YY = XX else: Y = ep.astensor(self.ref_points) - YY = torch.as_tensor(Y.numpy(), dtype=torch.float32) + if isinstance(Y, ep.PyTorchTensor): + YY = Y.raw + else: + YY = Y.numpy() + YY = torch.as_tensor(YY, dtype=torch.float32) p = self.params["p"] if p == numpy.inf: @@ -227,24 +279,6 @@ class KNearestNeighbors: else: mat = ((LazyTensor(XX[:, None, :]) - LazyTensor(YY[None, :, :])).abs() ** p).sum(-1) - # pykeops does not support autodiff for kmin yet :-( - if self.params.get("enable_autodiff", False) and self.return_distance: - # Compute the indices of the neighbors, and recompute the relevant distances autodiff-friendly. - # Another strategy would be to compute the whole distance matrix with torch.cdist - # and use neighbors as indices into it. - neighbors = ep.astensor(mat.argKmin(k, dim=1)).numpy() - # Work around https://github.com/pytorch/pytorch/issues/34452 - neighbor_pts = Y[neighbors,] - diff = neighbor_pts - X[:, None, :] - if isinstance(diff, ep.PyTorchTensor): - # https://github.com/jonasrauber/eagerpy/issues/6 - distances = ep.astensor(diff.raw.norm(p, -1)) - else: - distances = diff.norms.lp(p, -1) - if self.return_index: - return neighbors, distances.raw - else: - return distances.raw if self.return_index: if self.return_distance: distances, neighbors = mat.Kmin_argKmin(k, dim=1) -- cgit v1.2.3 From 521d8c17c2b7d71c46a51f0490ff2c13c809fc87 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 13 Apr 2020 21:13:19 +0200 Subject: Remove left-over code eagerpy is only used with enable_autodiff --- src/python/gudhi/point_cloud/knn.py | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index 87b2798e..f2cddb38 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -82,8 +82,11 @@ class KNearestNeighbors: self.ref_points = X if self.params.get("enable_autodiff", False): import eagerpy as ep + X = ep.astensor(X) if self.params["implementation"] != "keops" or not isinstance(X, ep.PyTorchTensor): - X = ep.astensor(X).numpy() + # I don't know a clever way to reuse a GPU tensor from tensorflow in pytorch + # without copying to/from the CPU. + X = X.numpy() if self.params["implementation"] == "ckdtree": # sklearn could handle this, but it is much slower from scipy.spatial import cKDTree @@ -133,6 +136,8 @@ class KNearestNeighbors: newX = ep.astensor(X) if self.params["implementation"] != "keops" or not isinstance(newX, ep.PyTorchTensor): newX = newX.numpy() + else: + newX = X neighbors = self.transform(newX) finally: self.return_index = save_return_index @@ -247,29 +252,13 @@ class KNearestNeighbors: if self.params["implementation"] == "keops": import torch from pykeops.torch import LazyTensor - import eagerpy as ep - queries = X - X = ep.astensor(X) - if isinstance(X, ep.PyTorchTensor): - XX = X.raw - else: - # I don't know a clever way to reuse a GPU tensor from tensorflow in pytorch - # without copying to/from the CPU. - XX = X.numpy() # 'float64' is slow except on super expensive GPUs. Allow it with some param? - XX = torch.as_tensor(XX, dtype=torch.float32) - if queries is self.ref_points: - Y = X + XX = torch.as_tensor(X, dtype=torch.float32) + if X is self.ref_points: YY = XX else: - Y = ep.astensor(self.ref_points) - if isinstance(Y, ep.PyTorchTensor): - YY = Y.raw - else: - YY = Y.numpy() - YY = torch.as_tensor(YY, dtype=torch.float32) - + YY = torch.as_tensor(self.ref_points, dtype=torch.float32) p = self.params["p"] if p == numpy.inf: # Requires pykeops 1.4 or later -- cgit v1.2.3 From ce75f66da5a2d7ad2c479355112d48817c5ba68b Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 13 Apr 2020 21:38:24 +0200 Subject: Tweak to detect fit_transform --- src/python/gudhi/point_cloud/knn.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index f2cddb38..8b3cdb46 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -11,6 +11,7 @@ import numpy # TODO: https://github.com/facebookresearch/faiss + class KNearestNeighbors: """ Class wrapping several implementations for computing the k nearest neighbors in a point set. @@ -82,6 +83,7 @@ class KNearestNeighbors: self.ref_points = X if self.params.get("enable_autodiff", False): import eagerpy as ep + X = ep.astensor(X) if self.params["implementation"] != "keops" or not isinstance(X, ep.PyTorchTensor): # I don't know a clever way to reuse a GPU tensor from tensorflow in pytorch @@ -127,17 +129,19 @@ class KNearestNeighbors: # pykeops does not support autodiff for kmin yet, but when it does in the future, # we may want a special path. import eagerpy as ep + save_return_index = self.return_index self.return_index = True self.return_distance = False self.params["enable_autodiff"] = False try: - # FIXME: how do we test "X is ref_points" then? newX = ep.astensor(X) - if self.params["implementation"] != "keops" or not isinstance(newX, ep.PyTorchTensor): + if self.params["implementation"] != "keops" or ( + not isinstance(newX, ep.PyTorchTensor) and not isinstance(newX, ep.NumPyTensor) + ): newX = newX.numpy() else: - newX = X + newX = newX.raw neighbors = self.transform(newX) finally: self.return_index = save_return_index @@ -159,7 +163,6 @@ class KNearestNeighbors: else: return distances.raw - metric = self.metric k = self.k -- cgit v1.2.3 From f0c5aab988ee966510503a30b0591105594ac67d Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Tue, 14 Apr 2020 15:37:31 +0200 Subject: More testing --- src/python/test/test_dtm.py | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src') diff --git a/src/python/test/test_dtm.py b/src/python/test/test_dtm.py index 8709dd07..db3e5df5 100755 --- a/src/python/test/test_dtm.py +++ b/src/python/test/test_dtm.py @@ -47,6 +47,13 @@ def test_dtm_compare_euclidean(): assert r6.detach().numpy() == pytest.approx(r0) r6.sum().backward() assert pts2.grad is not None + pts2 = torch.tensor(pts, requires_grad=True) + assert pts2.grad is None + dtm = DistanceToMeasure(k, implementation="ckdtree", enable_autodiff=True) + r7 = dtm.fit_transform(pts2) + assert r7.detach().numpy() == pytest.approx(r0) + r7.sum().backward() + assert pts2.grad is not None def test_dtm_precomputed(): -- cgit v1.2.3 From b908205e85bbe29c8d18ad1f38e783a1327434d7 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Tue, 14 Apr 2020 17:00:27 +0200 Subject: EagerPy in cmake --- src/cmake/modules/GUDHI_third_party_libraries.cmake | 1 + src/python/CMakeLists.txt | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/cmake/modules/GUDHI_third_party_libraries.cmake b/src/cmake/modules/GUDHI_third_party_libraries.cmake index a931b3a1..0abe66b7 100644 --- a/src/cmake/modules/GUDHI_third_party_libraries.cmake +++ b/src/cmake/modules/GUDHI_third_party_libraries.cmake @@ -181,6 +181,7 @@ if( PYTHONINTERP_FOUND ) find_python_module("pybind11") find_python_module("torch") find_python_module("pykeops") + find_python_module("eagerpy") find_python_module_no_version("hnswlib") endif() diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index d7a6a4db..99e8b57c 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -88,6 +88,9 @@ if(PYTHONINTERP_FOUND) if(PYKEOPS_FOUND) add_gudhi_debug_info("PyKeOps version ${PYKEOPS_VERSION}") endif() + if(EAGERPY_FOUND) + add_gudhi_debug_info("EagerPy version ${EAGERPY_VERSION}") + endif() set(GUDHI_PYTHON_EXTRA_COMPILE_ARGS "${GUDHI_PYTHON_EXTRA_COMPILE_ARGS}'-DBOOST_RESULT_OF_USE_DECLTYPE', ") set(GUDHI_PYTHON_EXTRA_COMPILE_ARGS "${GUDHI_PYTHON_EXTRA_COMPILE_ARGS}'-DBOOST_ALL_NO_LIB', ") @@ -410,7 +413,7 @@ if(PYTHONINTERP_FOUND) add_gudhi_py_test(test_time_delay) # DTM - if(SCIPY_FOUND AND SKLEARN_FOUND AND TORCH_FOUND AND HNSWLIB_FOUND AND PYKEOPS_FOUND) + if(SCIPY_FOUND AND SKLEARN_FOUND AND TORCH_FOUND AND HNSWLIB_FOUND AND PYKEOPS_FOUND AND EAGERPY_FOUND) add_gudhi_py_test(test_knn) add_gudhi_py_test(test_dtm) endif() -- cgit v1.2.3 From 9518287cfa2a62948ede2e7d17d5c9f29092e0f4 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Tue, 14 Apr 2020 18:27:19 +0200 Subject: Doc improvements --- src/python/gudhi/point_cloud/dtm.py | 12 ++++++++++-- src/python/gudhi/point_cloud/knn.py | 11 ++++++++--- 2 files changed, 18 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py index 38368f29..58dec536 100644 --- a/src/python/gudhi/point_cloud/dtm.py +++ b/src/python/gudhi/point_cloud/dtm.py @@ -20,7 +20,9 @@ class DistanceToMeasure: Args: k (int): number of neighbors (possibly including the point itself). q (float): order used to compute the distance to measure. Defaults to 2. - kwargs: same parameters as :class:`~gudhi.point_cloud.knn.KNearestNeighbors`, except that metric="neighbors" means that :func:`transform` expects an array with the distances to the k nearest neighbors. + kwargs: same parameters as :class:`~gudhi.point_cloud.knn.KNearestNeighbors`, except that + metric="neighbors" means that :func:`transform` expects an array with the distances + to the k nearest neighbors. """ self.k = k self.q = q @@ -44,7 +46,13 @@ class DistanceToMeasure: def transform(self, X): """ Args: - X (numpy.array): coordinates for query points, or distance matrix if metric is "precomputed", or distances to the k nearest neighbors if metric is "neighbors" (if the array has more than k columns, the remaining ones are ignored). + X (numpy.array): coordinates for query points, or distance matrix if metric is "precomputed", + or distances to the k nearest neighbors if metric is "neighbors" (if the array has more + than k columns, the remaining ones are ignored). + + Returns: + numpy.array: a 1-d array with, for each point of X, its distance to the measure defined + by the argument of :func:`fit`. """ if self.params["metric"] == "neighbors": distances = X[:, : self.k] diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index 8b3cdb46..d7cf0b2a 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -38,9 +38,9 @@ class KNearestNeighbors: sort_results (bool): if True, then distances and indices of each point are sorted on return, so that the first column contains the closest points. Otherwise, neighbors are returned in an arbitrary order. Defaults to True. - enable_autodiff (bool): if the input is a torch.tensor, jax.numpy.array or similar, this instructs - the function to compute distances in a way that works with automatic differentiation. - This is experimental and not supported for all implementations. + enable_autodiff (bool): if the input is a torch.tensor, jax.numpy.ndarray or tensorflow.Tensor, this + instructs the function to compute distances in a way that works with automatic differentiation. + This is experimental and not supported for all metrics. Defaults to False. kwargs: additional parameters are forwarded to the backends. """ self.k = k @@ -124,6 +124,11 @@ class KNearestNeighbors: """ Args: X (numpy.array): coordinates for query points, or distance matrix if metric is "precomputed". + + Returns: + numpy.array: if return_index, an array of shape (len(X), k) with the indices (in the argument + of :func:`fit`) of the k nearest neighbors to the points of X. If return_distance, an array of the + same shape with the distances to those neighbors. If both, a tuple with the two arrays, in this order. """ if self.params.get("enable_autodiff", False): # pykeops does not support autodiff for kmin yet, but when it does in the future, -- cgit v1.2.3 From acb9d5b9d1317d3d8168bc3ac46860d078abba84 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Tue, 14 Apr 2020 20:30:29 +0200 Subject: Check that the gradient is not NaN This can easily happen with pytorch, and there is special code to avoid it. --- src/python/test/test_dtm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/python/test/test_dtm.py b/src/python/test/test_dtm.py index db3e5df5..de74c42b 100755 --- a/src/python/test/test_dtm.py +++ b/src/python/test/test_dtm.py @@ -46,14 +46,14 @@ def test_dtm_compare_euclidean(): r6 = dtm.fit_transform(pts2) assert r6.detach().numpy() == pytest.approx(r0) r6.sum().backward() - assert pts2.grad is not None + assert pts2.grad is not None and not torch.isnan(pts2.grad).any() pts2 = torch.tensor(pts, requires_grad=True) assert pts2.grad is None dtm = DistanceToMeasure(k, implementation="ckdtree", enable_autodiff=True) r7 = dtm.fit_transform(pts2) assert r7.detach().numpy() == pytest.approx(r0) r7.sum().backward() - assert pts2.grad is not None + assert pts2.grad is not None and not torch.isnan(pts2.grad).any() def test_dtm_precomputed(): -- cgit v1.2.3 From d302e90dcf4b284e6dc8b3ab21e8a67fb9cf5179 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Thu, 16 Apr 2020 15:40:45 +0200 Subject: Update the concept of the simplicial complex We use the key now. It wouldn't be hard to use an unordered_map, but since we usually have an unused field key... --- src/Alpha_complex/concept/SimplicialComplexForAlpha.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'src') diff --git a/src/Alpha_complex/concept/SimplicialComplexForAlpha.h b/src/Alpha_complex/concept/SimplicialComplexForAlpha.h index 1c6c3b0c..c20c3201 100644 --- a/src/Alpha_complex/concept/SimplicialComplexForAlpha.h +++ b/src/Alpha_complex/concept/SimplicialComplexForAlpha.h @@ -72,6 +72,24 @@ struct SimplicialComplexForAlpha { /** \brief Return type of an insertion of a simplex */ typedef unspecified Insertion_result_type; + + /** \name Map interface + * Conceptually a `std::unordered_map`. + * @{ */ + /** \brief Data stored for each simplex. + * + * Must be an integer type. */ + typedef unspecified Simplex_key; + /** \brief Returns a constant dummy number that is either negative, + * or at least as large as the number of simplices. Suggested value: -1. */ + Simplex_key null_key (); + /** \brief Returns the number stored for a simplex by `assign_key()`. + * + * If `assign_key()` has not been called, it must return `null_key()`. */ + Simplex_key key ( Simplex_handle sh ); + /** \brief Store a number for a simplex, which can later be retrieved with `key()`. */ + void assign_key(Simplex_handle sh, Simplex_key n); + /** @} */ }; } // namespace alpha_complex -- cgit v1.2.3 From 039382cbd951c8c94ddfd43b5ae228666a5cabed Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Thu, 16 Apr 2020 17:28:58 +0200 Subject: Fix doc of Simplex_tree about keys --- src/Simplex_tree/include/gudhi/Simplex_tree.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/Simplex_tree/include/gudhi/Simplex_tree.h b/src/Simplex_tree/include/gudhi/Simplex_tree.h index 430d1ac4..591a9e37 100644 --- a/src/Simplex_tree/include/gudhi/Simplex_tree.h +++ b/src/Simplex_tree/include/gudhi/Simplex_tree.h @@ -463,7 +463,7 @@ class Simplex_tree { public: /** \brief Returns the key associated to a simplex. * - * The filtration must be initialized. + * If no key has been assigned, returns `null_key()`. * \pre SimplexTreeOptions::store_key */ static Simplex_key key(Simplex_handle sh) { @@ -473,7 +473,6 @@ class Simplex_tree { /** \brief Returns the simplex that has index idx in the filtration. * * The filtration must be initialized. - * \pre SimplexTreeOptions::store_key */ Simplex_handle simplex(Simplex_key idx) const { return filtration_vect_[idx]; @@ -509,8 +508,7 @@ class Simplex_tree { return Dictionary_it(nullptr); } - /** \brief Returns a key different for all keys associated to the - * simplices of the simplicial complex. */ + /** \brief Returns a fixed number not in the interval [0, `num_simplices()`). */ static Simplex_key null_key() { return -1; } @@ -856,11 +854,9 @@ class Simplex_tree { public: /** \brief Initializes the filtrations, i.e. sort the - * simplices according to their order in the filtration and initializes all Simplex_keys. + * simplices according to their order in the filtration. * - * After calling this method, filtration_simplex_range() becomes valid, and each simplex is - * assigned a Simplex_key corresponding to its order in the filtration (from 0 to m-1 for a - * simplicial complex with m simplices). + * After calling this method, filtration_simplex_range() becomes valid. * * Will be automatically called when calling filtration_simplex_range() * if the filtration has never been initialized yet. */ -- cgit v1.2.3 From 17aaa979e4cdfe5faed9b2750d452171de4b67e1 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Fri, 17 Apr 2020 22:13:29 +0200 Subject: Simplify distance-to-diagonal in Wasserstein --- src/python/gudhi/wasserstein/wasserstein.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/wasserstein/wasserstein.py b/src/python/gudhi/wasserstein/wasserstein.py index 35315939..5df66cf9 100644 --- a/src/python/gudhi/wasserstein/wasserstein.py +++ b/src/python/gudhi/wasserstein/wasserstein.py @@ -15,16 +15,19 @@ try: except ImportError: print("POT (Python Optimal Transport) package is not installed. Try to run $ conda install -c conda-forge pot ; or $ pip install POT") -def _proj_on_diag(X): +def _dist_to_diag(X, internal_p): ''' :param X: (n x 2) array encoding the points of a persistent diagram. - :returns: (n x 2) array encoding the (respective orthogonal) projections of the points onto the diagonal + :param internal_p: Ground metric (i.e. norm L^p). + :returns: (n) array encoding the (respective orthogonal) distances of the points to the diagonal + + .. note:: + Assumes that the points are above the diagonal. ''' - Z = (X[:,0] + X[:,1]) / 2. - return np.array([Z , Z]).T + return (X[:, 1] - X[:, 0]) * 2 ** (1.0 / internal_p - 1) -def _build_dist_matrix(X, Y, order=2., internal_p=2.): +def _build_dist_matrix(X, Y, order, internal_p): ''' :param X: (n x 2) numpy.array encoding the (points of the) first diagram. :param Y: (m x 2) numpy.array encoding the second diagram. @@ -36,16 +39,12 @@ def _build_dist_matrix(X, Y, order=2., internal_p=2.): and its orthogonal projection onto the diagonal. note also that C[n, m] = 0 (it costs nothing to move from the diagonal to the diagonal). ''' - Xdiag = _proj_on_diag(X) - Ydiag = _proj_on_diag(Y) + Cxd = _dist_to_diag(X, internal_p)**order + Cdy = _dist_to_diag(Y, internal_p)**order if np.isinf(internal_p): C = sc.cdist(X,Y, metric='chebyshev')**order - Cxd = np.linalg.norm(X - Xdiag, ord=internal_p, axis=1)**order - Cdy = np.linalg.norm(Y - Ydiag, ord=internal_p, axis=1)**order else: C = sc.cdist(X,Y, metric='minkowski', p=internal_p)**order - Cxd = np.linalg.norm(X - Xdiag, ord=internal_p, axis=1)**order - Cdy = np.linalg.norm(Y - Ydiag, ord=internal_p, axis=1)**order Cf = np.hstack((C, Cxd[:,None])) Cdy = np.append(Cdy, 0) @@ -61,8 +60,7 @@ def _perstot(X, order, internal_p): :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); Default value is 2 (Euclidean norm). :returns: float, the total persistence of the diagram (that is, its distance to the empty diagram). ''' - Xdiag = _proj_on_diag(X) - return (np.sum(np.linalg.norm(X - Xdiag, ord=internal_p, axis=1)**order))**(1./order) + return np.linalg.norm(_dist_to_diag(X, internal_p), ord=order) def wasserstein_distance(X, Y, matching=False, order=2., internal_p=2.): -- cgit v1.2.3 From 8d9611206603f4f7506fe77a0273c73c9d67716b Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sun, 19 Apr 2020 12:30:35 +0200 Subject: Drop redundant test torch.isnan(None) raises an exception anyway --- src/python/test/test_dtm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/python/test/test_dtm.py b/src/python/test/test_dtm.py index de74c42b..859189fa 100755 --- a/src/python/test/test_dtm.py +++ b/src/python/test/test_dtm.py @@ -46,14 +46,14 @@ def test_dtm_compare_euclidean(): r6 = dtm.fit_transform(pts2) assert r6.detach().numpy() == pytest.approx(r0) r6.sum().backward() - assert pts2.grad is not None and not torch.isnan(pts2.grad).any() + assert not torch.isnan(pts2.grad).any() pts2 = torch.tensor(pts, requires_grad=True) assert pts2.grad is None dtm = DistanceToMeasure(k, implementation="ckdtree", enable_autodiff=True) r7 = dtm.fit_transform(pts2) assert r7.detach().numpy() == pytest.approx(r0) r7.sum().backward() - assert pts2.grad is not None and not torch.isnan(pts2.grad).any() + assert not torch.isnan(pts2.grad).any() def test_dtm_precomputed(): -- cgit v1.2.3 From 1c1a99074049e4ff04fa28e7d6e1b6fc2067397a Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 20 Apr 2020 10:38:41 +0200 Subject: Add __license__ --- src/python/gudhi/point_cloud/dtm.py | 4 ++++ src/python/gudhi/point_cloud/knn.py | 8 +++++++- 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py index 58dec536..13e16d24 100644 --- a/src/python/gudhi/point_cloud/dtm.py +++ b/src/python/gudhi/point_cloud/dtm.py @@ -9,6 +9,10 @@ from .knn import KNearestNeighbors +__author__ = "Marc Glisse" +__copyright__ = "Copyright (C) 2020 Inria" +__license__ = "MIT" + class DistanceToMeasure: """ diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index d7cf0b2a..4017e498 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -11,6 +11,10 @@ import numpy # TODO: https://github.com/facebookresearch/faiss +__author__ = "Marc Glisse" +__copyright__ = "Copyright (C) 2020 Inria" +__license__ = "MIT" + class KNearestNeighbors: """ @@ -156,7 +160,9 @@ class KNearestNeighbors: assert self.metric == "minkowski" p = self.params["p"] Y = ep.astensor(self.ref_points) - neighbor_pts = Y[neighbors,] + neighbor_pts = Y[ + neighbors, + ] diff = neighbor_pts - X[:, None, :] if isinstance(diff, ep.PyTorchTensor): # https://github.com/jonasrauber/eagerpy/issues/6 -- cgit v1.2.3 From 3a9105e0d3bea5cc64610b7c0c3fb15f0e00bb9d Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 20 Apr 2020 11:37:44 +0200 Subject: Reintroduce _proj_on_diag, with a unit test --- src/python/gudhi/wasserstein/wasserstein.py | 11 +++++++++++ src/python/test/test_wasserstein_distance.py | 7 +++++++ 2 files changed, 18 insertions(+) (limited to 'src') diff --git a/src/python/gudhi/wasserstein/wasserstein.py b/src/python/gudhi/wasserstein/wasserstein.py index 5df66cf9..efc851a0 100644 --- a/src/python/gudhi/wasserstein/wasserstein.py +++ b/src/python/gudhi/wasserstein/wasserstein.py @@ -15,6 +15,17 @@ try: except ImportError: print("POT (Python Optimal Transport) package is not installed. Try to run $ conda install -c conda-forge pot ; or $ pip install POT") + +# Currently unused, but Théo says it is likely to be used again. +def _proj_on_diag(X): + ''' + :param X: (n x 2) array encoding the points of a persistent diagram. + :returns: (n x 2) array encoding the (respective orthogonal) projections of the points onto the diagonal + ''' + Z = (X[:,0] + X[:,1]) / 2. + return np.array([Z , Z]).T + + def _dist_to_diag(X, internal_p): ''' :param X: (n x 2) array encoding the points of a persistent diagram. diff --git a/src/python/test/test_wasserstein_distance.py b/src/python/test/test_wasserstein_distance.py index 7e0d0f5f..1a4acc1d 100755 --- a/src/python/test/test_wasserstein_distance.py +++ b/src/python/test/test_wasserstein_distance.py @@ -8,6 +8,7 @@ - YYYY/MM Author: Description of the modification """ +from gudhi.wasserstein.wasserstein import _proj_on_diag from gudhi.wasserstein import wasserstein_distance as pot from gudhi.hera import wasserstein_distance as hera import numpy as np @@ -17,6 +18,12 @@ __author__ = "Theo Lacombe" __copyright__ = "Copyright (C) 2019 Inria" __license__ = "MIT" +def test_proj_on_diag(): + dgm = np.array([[1., 1.], [1., 2.], [3., 5.]]) + assert np.array_equal(_proj_on_diag(dgm), [[1., 1.], [1.5, 1.5], [4., 4.]]) + empty = np.empty((0, 2)) + assert np.array_equal(_proj_on_diag(empty), empty) + def _basic_wasserstein(wasserstein_distance, delta, test_infinity=True, test_matching=True): diag1 = np.array([[2.7, 3.7], [9.6, 14.0], [34.2, 34.974]]) diag2 = np.array([[2.8, 4.45], [9.5, 14.1]]) -- cgit v1.2.3 From 9ef7ba65367ab2ff92bf66b1b8166c5990530b76 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 20 Apr 2020 12:16:15 +0200 Subject: Explicitly pass sort_results=True on some tests --- src/python/test/test_knn.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/python/test/test_knn.py b/src/python/test/test_knn.py index 415c9d48..a87ec212 100755 --- a/src/python/test/test_knn.py +++ b/src/python/test/test_knn.py @@ -54,12 +54,12 @@ def test_knn_explicit(): knn = KNearestNeighbors(2, metric="precomputed", return_index=True, return_distance=False) r = knn.fit_transform(dist) assert np.array_equal(r, [[0, 1], [1, 0], [2, 0]]) - knn = KNearestNeighbors(2, metric="precomputed", return_index=True, return_distance=True) + knn = KNearestNeighbors(2, metric="precomputed", return_index=True, return_distance=True, sort_results=True) r = knn.fit_transform(dist) assert np.array_equal(r[0], [[0, 1], [1, 0], [2, 0]]) assert np.array_equal(r[1], [[0, 3], [0, 1], [0, 1]]) # Second time in parallel - knn = KNearestNeighbors(2, metric="precomputed", return_index=True, return_distance=False, n_jobs=2) + knn = KNearestNeighbors(2, metric="precomputed", return_index=True, return_distance=False, n_jobs=2, sort_results=True) r = knn.fit_transform(dist) assert np.array_equal(r, [[0, 1], [1, 0], [2, 0]]) knn = KNearestNeighbors(2, metric="precomputed", return_index=True, return_distance=True, n_jobs=2) -- cgit v1.2.3 From 3e713cee177e10536ae8fc231e56fa04769a35ee Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Mon, 20 Apr 2020 22:06:38 +0200 Subject: Fix #279 --- src/python/CMakeLists.txt | 129 +++++++++++++++++++++++----------------------- 1 file changed, 65 insertions(+), 64 deletions(-) (limited to 'src') diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index 10dcd161..055d5b23 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -242,6 +242,71 @@ if(PYTHONINTERP_FOUND) install(CODE "execute_process(COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/setup.py install)") + # Documentation generation is available through sphinx - requires all modules + # Make it first as sphinx test is by far the longest test which is nice when testing in parallel + if(SPHINX_PATH) + if(MATPLOTLIB_FOUND) + if(NUMPY_FOUND) + if(SCIPY_FOUND) + if(SKLEARN_FOUND) + if(OT_FOUND) + if(PYBIND11_FOUND) + if(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) + set (GUDHI_SPHINX_MESSAGE "Generating API documentation with Sphinx in ${CMAKE_CURRENT_BINARY_DIR}/sphinx/") + # User warning - Sphinx is a static pages generator, and configured to work fine with user_version + # Images and biblio warnings because not found on developper version + if (GUDHI_PYTHON_PATH STREQUAL "src/python") + set (GUDHI_SPHINX_MESSAGE "${GUDHI_SPHINX_MESSAGE} \n WARNING : Sphinx is configured for user version, you run it on developper version. Images and biblio will miss") + endif() + # sphinx target requires gudhi.so, because conf.py reads gudhi version from it + add_custom_target(sphinx + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/doc + COMMAND ${CMAKE_COMMAND} -E env "PYTHONPATH=${CMAKE_CURRENT_BINARY_DIR}" + ${SPHINX_PATH} -b html ${CMAKE_CURRENT_SOURCE_DIR}/doc ${CMAKE_CURRENT_BINARY_DIR}/sphinx + DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/gudhi.so" + COMMENT "${GUDHI_SPHINX_MESSAGE}" VERBATIM) + + add_test(NAME sphinx_py_test + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} -E env "PYTHONPATH=${CMAKE_CURRENT_BINARY_DIR}" + ${SPHINX_PATH} -b doctest ${CMAKE_CURRENT_SOURCE_DIR}/doc ${CMAKE_CURRENT_BINARY_DIR}/doctest) + + # Set missing or not modules + set(GUDHI_MODULES ${GUDHI_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MODULES") + else(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) + message("++ Python documentation module will not be compiled because it requires a Eigen3 and CGAL version >= 4.11.0") + set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") + endif(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) + else(PYBIND11_FOUND) + message("++ Python documentation module will not be compiled because pybind11 was not found") + set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") + endif(PYBIND11_FOUND) + else(OT_FOUND) + message("++ Python documentation module will not be compiled because POT was not found") + set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") + endif(OT_FOUND) + else(SKLEARN_FOUND) + message("++ Python documentation module will not be compiled because scikit-learn was not found") + set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") + endif(SKLEARN_FOUND) + else(SCIPY_FOUND) + message("++ Python documentation module will not be compiled because scipy was not found") + set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") + endif(SCIPY_FOUND) + else(NUMPY_FOUND) + message("++ Python documentation module will not be compiled because numpy was not found") + set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") + endif(NUMPY_FOUND) + else(MATPLOTLIB_FOUND) + message("++ Python documentation module will not be compiled because matplotlib was not found") + set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") + endif(MATPLOTLIB_FOUND) + else(SPHINX_PATH) + message("++ Python documentation module will not be compiled because sphinx and sphinxcontrib-bibtex were not found") + set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") + endif(SPHINX_PATH) + + # Test examples if (NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) # Bottleneck and Alpha @@ -419,70 +484,6 @@ if(PYTHONINTERP_FOUND) add_gudhi_py_test(test_dtm) endif() - # Documentation generation is available through sphinx - requires all modules - if(SPHINX_PATH) - if(MATPLOTLIB_FOUND) - if(NUMPY_FOUND) - if(SCIPY_FOUND) - if(SKLEARN_FOUND) - if(OT_FOUND) - if(PYBIND11_FOUND) - if(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) - set (GUDHI_SPHINX_MESSAGE "Generating API documentation with Sphinx in ${CMAKE_CURRENT_BINARY_DIR}/sphinx/") - # User warning - Sphinx is a static pages generator, and configured to work fine with user_version - # Images and biblio warnings because not found on developper version - if (GUDHI_PYTHON_PATH STREQUAL "src/python") - set (GUDHI_SPHINX_MESSAGE "${GUDHI_SPHINX_MESSAGE} \n WARNING : Sphinx is configured for user version, you run it on developper version. Images and biblio will miss") - endif() - # sphinx target requires gudhi.so, because conf.py reads gudhi version from it - add_custom_target(sphinx - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/doc - COMMAND ${CMAKE_COMMAND} -E env "PYTHONPATH=${CMAKE_CURRENT_BINARY_DIR}" - ${SPHINX_PATH} -b html ${CMAKE_CURRENT_SOURCE_DIR}/doc ${CMAKE_CURRENT_BINARY_DIR}/sphinx - DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/gudhi.so" - COMMENT "${GUDHI_SPHINX_MESSAGE}" VERBATIM) - - add_test(NAME sphinx_py_test - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - COMMAND ${CMAKE_COMMAND} -E env "PYTHONPATH=${CMAKE_CURRENT_BINARY_DIR}" - ${SPHINX_PATH} -b doctest ${CMAKE_CURRENT_SOURCE_DIR}/doc ${CMAKE_CURRENT_BINARY_DIR}/doctest) - - # Set missing or not modules - set(GUDHI_MODULES ${GUDHI_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MODULES") - else(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) - message("++ Python documentation module will not be compiled because it requires a Eigen3 and CGAL version >= 4.11.0") - set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") - endif(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) - else(PYBIND11_FOUND) - message("++ Python documentation module will not be compiled because pybind11 was not found") - set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") - endif(PYBIND11_FOUND) - else(OT_FOUND) - message("++ Python documentation module will not be compiled because POT was not found") - set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") - endif(OT_FOUND) - else(SKLEARN_FOUND) - message("++ Python documentation module will not be compiled because scikit-learn was not found") - set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") - endif(SKLEARN_FOUND) - else(SCIPY_FOUND) - message("++ Python documentation module will not be compiled because scipy was not found") - set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") - endif(SCIPY_FOUND) - else(NUMPY_FOUND) - message("++ Python documentation module will not be compiled because numpy was not found") - set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") - endif(NUMPY_FOUND) - else(MATPLOTLIB_FOUND) - message("++ Python documentation module will not be compiled because matplotlib was not found") - set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") - endif(MATPLOTLIB_FOUND) - else(SPHINX_PATH) - message("++ Python documentation module will not be compiled because sphinx and sphinxcontrib-bibtex were not found") - set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") - endif(SPHINX_PATH) - - # Set missing or not modules set(GUDHI_MODULES ${GUDHI_MODULES} "python" CACHE INTERNAL "GUDHI_MODULES") else(CYTHON_FOUND) -- cgit v1.2.3 From aa90b98bee73ab2aaf39ef91f39f5a750168e5d4 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Wed, 22 Apr 2020 13:04:15 +0200 Subject: Document several optional dependencies of knn --- src/python/doc/installation.rst | 28 ++++++++++++++++++++++++++++ src/python/gudhi/point_cloud/knn.py | 3 ++- 2 files changed, 30 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/python/doc/installation.rst b/src/python/doc/installation.rst index 48425d5e..09a843d5 100644 --- a/src/python/doc/installation.rst +++ b/src/python/doc/installation.rst @@ -211,6 +211,14 @@ The following examples requires CGAL version ≥ 4.11.0: * :download:`euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py>` * :download:`euclidean_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_witness_complex_diagram_persistence_from_off_file_example.py>` +EagerPy +======= + +Some Python functions can handle automatic differentiation (possibly only when +a flag `enable_autodiff=True` is used). In order to reduce code duplication, we +use `EagerPy `_ which wraps arrays from +PyTorch, TensorFlow and JAX in a common interface. + Eigen ===== @@ -229,6 +237,13 @@ The following examples require `Eigen `_ version * :download:`euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py>` * :download:`euclidean_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_witness_complex_diagram_persistence_from_off_file_example.py>` +Hnswlib +======= + +:class:`~gudhi.point_cloud.knn.KNearestNeighbors` can use the Python package +`Hnswlib `_ as a backend if explicitly +requested, to speed-up queries. + Matplotlib ========== @@ -251,6 +266,13 @@ The following examples require the `Matplotlib `_: * :download:`euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py>` * :download:`euclidean_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_witness_complex_diagram_persistence_from_off_file_example.py>` +PyKeOps +======= + +:class:`~gudhi.point_cloud.knn.KNearestNeighbors` can use the Python package +`PyKeOps `_ as a backend if +explicitly requested, to speed-up queries using a GPU. + Python Optimal Transport ======================== @@ -258,6 +280,12 @@ The :doc:`Wasserstein distance ` module requires `POT `_, a library that provides several solvers for optimization problems related to Optimal Transport. +PyTorch +======= + +`PyTorch `_ is currently only used as a dependency of +`PyKeOps`_, and in some tests. + Scikit-learn ============ diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index 4017e498..07553d6d 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -44,7 +44,8 @@ class KNearestNeighbors: Otherwise, neighbors are returned in an arbitrary order. Defaults to True. enable_autodiff (bool): if the input is a torch.tensor, jax.numpy.ndarray or tensorflow.Tensor, this instructs the function to compute distances in a way that works with automatic differentiation. - This is experimental and not supported for all metrics. Defaults to False. + This is experimental, not supported for all metrics, and requires the package EagerPy. + Defaults to False. kwargs: additional parameters are forwarded to the backends. """ self.k = k -- cgit v1.2.3 From c5db8c1aec523c0cdf72c75b29e4ba94b51487b8 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Wed, 22 Apr 2020 19:46:29 +0200 Subject: Reduce the probability of failure of test_dtm It is expected that hnsw sometimes misses one neighbor, which has an impact on the DTM, especially if the number of neighbors considered is low. --- src/python/test/test_dtm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/python/test/test_dtm.py b/src/python/test/test_dtm.py index 859189fa..bff4c267 100755 --- a/src/python/test/test_dtm.py +++ b/src/python/test/test_dtm.py @@ -16,7 +16,7 @@ import torch def test_dtm_compare_euclidean(): pts = numpy.random.rand(1000, 4) - k = 3 + k = 6 dtm = DistanceToMeasure(k, implementation="ckdtree") r0 = dtm.fit_transform(pts) dtm = DistanceToMeasure(k, implementation="sklearn") @@ -27,7 +27,7 @@ def test_dtm_compare_euclidean(): assert r2 == pytest.approx(r0) dtm = DistanceToMeasure(k, implementation="hnsw") r3 = dtm.fit_transform(pts) - assert r3 == pytest.approx(r0) + assert r3 == pytest.approx(r0, rel=0.1) from scipy.spatial.distance import cdist d = cdist(pts, pts) -- cgit v1.2.3 From 0f7fe01852dcf827da35460592bd3a17ca0ab08e Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Thu, 23 Apr 2020 13:30:32 +0200 Subject: Fix pasto in the doc --- src/python/gudhi/simplex_tree.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index 7728ebfc..93f5b332 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -560,7 +560,7 @@ cdef class SimplexTree: """This function writes the persistence intervals of the simplicial complex in a user given file name. - :param persistence_file: The specific dimension. + :param persistence_file: Name of the file. :type persistence_file: string. :note: intervals_in_dim function requires -- cgit v1.2.3 From 658a754397287e8de216ae91d3c9a3c492e4db2d Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Fri, 24 Apr 2020 09:00:39 +0200 Subject: Fix bibliography for sphinx --- src/python/doc/alpha_complex_user.rst | 11 ++--------- src/python/doc/bottleneck_distance_user.rst | 6 ------ src/python/doc/cubical_complex_user.rst | 7 ------- src/python/doc/index.rst | 7 ------- src/python/doc/nerve_gic_complex_ref.rst | 7 ------- src/python/doc/nerve_gic_complex_user.rst | 7 ------- src/python/doc/persistent_cohomology_user.rst | 7 ------- src/python/doc/rips_complex_user.rst | 7 ------- src/python/doc/simplex_tree_user.rst | 7 ------- src/python/doc/tangential_complex_user.rst | 8 -------- src/python/doc/wasserstein_distance_user.rst | 7 ------- src/python/doc/witness_complex_user.rst | 7 ------- src/python/doc/zbibliography.rst | 10 ++++++++++ 13 files changed, 12 insertions(+), 86 deletions(-) create mode 100644 src/python/doc/zbibliography.rst (limited to 'src') diff --git a/src/python/doc/alpha_complex_user.rst b/src/python/doc/alpha_complex_user.rst index 265a82d2..c65e62c8 100644 --- a/src/python/doc/alpha_complex_user.rst +++ b/src/python/doc/alpha_complex_user.rst @@ -10,9 +10,8 @@ Definition .. include:: alpha_complex_sum.inc `AlphaComplex` is constructing a :doc:`SimplexTree ` using -`Delaunay Triangulation `_ -:cite:`cgal:hdj-t-19b` from `CGAL `_ (the Computational Geometry Algorithms Library -:cite:`cgal:eb-19b`). +`Delaunay Triangulation `_ +from `CGAL `_ (the Computational Geometry Algorithms Library). Remarks ^^^^^^^ @@ -203,9 +202,3 @@ the program output is: [4, 5, 6] -> 22.74 [3, 6] -> 30.25 -CGAL citations --------------- - -.. bibliography:: ../../biblio/how_to_cite_cgal.bib - :filter: docname in docnames - :style: unsrt diff --git a/src/python/doc/bottleneck_distance_user.rst b/src/python/doc/bottleneck_distance_user.rst index 206fcb63..89da89d3 100644 --- a/src/python/doc/bottleneck_distance_user.rst +++ b/src/python/doc/bottleneck_distance_user.rst @@ -66,9 +66,3 @@ The output is: Bottleneck distance approximation = 0.81 Bottleneck distance value = 0.75 -Bibliography ------------- - -.. bibliography:: ../../biblio/bibliography.bib - :filter: docname in docnames - :style: unsrt diff --git a/src/python/doc/cubical_complex_user.rst b/src/python/doc/cubical_complex_user.rst index e8c94bf6..e4733653 100644 --- a/src/python/doc/cubical_complex_user.rst +++ b/src/python/doc/cubical_complex_user.rst @@ -158,10 +158,3 @@ Examples. --------- End user programs are available in python/example/ folder. - -Bibliography ------------- - -.. bibliography:: ../../biblio/bibliography.bib - :filter: docname in docnames - :style: unsrt diff --git a/src/python/doc/index.rst b/src/python/doc/index.rst index c153cdfc..13e51047 100644 --- a/src/python/doc/index.rst +++ b/src/python/doc/index.rst @@ -86,10 +86,3 @@ Point cloud utilities ********************* .. include:: point_cloud_sum.inc - -Bibliography -************ - -.. bibliography:: ../../biblio/bibliography.bib - :filter: docname in docnames - :style: unsrt diff --git a/src/python/doc/nerve_gic_complex_ref.rst b/src/python/doc/nerve_gic_complex_ref.rst index 6a81b7af..abde2e8c 100644 --- a/src/python/doc/nerve_gic_complex_ref.rst +++ b/src/python/doc/nerve_gic_complex_ref.rst @@ -12,10 +12,3 @@ Cover complexes reference manual :show-inheritance: .. automethod:: gudhi.CoverComplex.__init__ - -Bibliography ------------- - -.. bibliography:: ../../biblio/bibliography.bib - :filter: docname in docnames - :style: unsrt diff --git a/src/python/doc/nerve_gic_complex_user.rst b/src/python/doc/nerve_gic_complex_user.rst index f709ce91..9101f45d 100644 --- a/src/python/doc/nerve_gic_complex_user.rst +++ b/src/python/doc/nerve_gic_complex_user.rst @@ -313,10 +313,3 @@ the program outputs again SC.dot which gives the following visualization after u :alt: Visualization with neato Visualization with neato - -Bibliography ------------- - -.. bibliography:: ../../biblio/bibliography.bib - :filter: docname in docnames - :style: unsrt diff --git a/src/python/doc/persistent_cohomology_user.rst b/src/python/doc/persistent_cohomology_user.rst index 506fa3a7..4d743aac 100644 --- a/src/python/doc/persistent_cohomology_user.rst +++ b/src/python/doc/persistent_cohomology_user.rst @@ -111,10 +111,3 @@ We provide several example files: run these examples with -h for details on thei * :download:`rips_complex_diagram_persistence_from_distance_matrix_file_example.py <../example/rips_complex_diagram_persistence_from_distance_matrix_file_example.py>` * :download:`random_cubical_complex_persistence_example.py <../example/random_cubical_complex_persistence_example.py>` * :download:`tangential_complex_plain_homology_from_off_file_example.py <../example/tangential_complex_plain_homology_from_off_file_example.py>` - -Bibliography ------------- - -.. bibliography:: ../../biblio/bibliography.bib - :filter: docname in docnames - :style: unsrt diff --git a/src/python/doc/rips_complex_user.rst b/src/python/doc/rips_complex_user.rst index c4bbcfb6..8efb12e6 100644 --- a/src/python/doc/rips_complex_user.rst +++ b/src/python/doc/rips_complex_user.rst @@ -347,10 +347,3 @@ until dimension 1 - one skeleton graph in other words), the output is: points in the persistence diagram will be under the diagonal, and bottleneck distance and persistence graphical tool will not work properly, this is a known issue. - -Bibliography ------------- - -.. bibliography:: ../../biblio/bibliography.bib - :filter: docname in docnames - :style: unsrt diff --git a/src/python/doc/simplex_tree_user.rst b/src/python/doc/simplex_tree_user.rst index 1b272c35..3df7617f 100644 --- a/src/python/doc/simplex_tree_user.rst +++ b/src/python/doc/simplex_tree_user.rst @@ -66,10 +66,3 @@ The output is: ([1, 2], 4.0) ([1], 0.0) ([2], 4.0) - -Bibliography ------------- - -.. bibliography:: ../../biblio/bibliography.bib - :filter: docname in docnames - :style: unsrt diff --git a/src/python/doc/tangential_complex_user.rst b/src/python/doc/tangential_complex_user.rst index cf8199cc..3d45473b 100644 --- a/src/python/doc/tangential_complex_user.rst +++ b/src/python/doc/tangential_complex_user.rst @@ -194,11 +194,3 @@ The output is: Tangential contains 4 vertices. Inconsistencies has been fixed. - - -Bibliography ------------- - -.. bibliography:: ../../biblio/bibliography.bib - :filter: docname in docnames - :style: unsrt diff --git a/src/python/doc/wasserstein_distance_user.rst b/src/python/doc/wasserstein_distance_user.rst index c24da74d..c443bab5 100644 --- a/src/python/doc/wasserstein_distance_user.rst +++ b/src/python/doc/wasserstein_distance_user.rst @@ -164,10 +164,3 @@ The output is: [[0.27916667 0.55416667] [0.7375 0.7625 ] [0.2375 0.2625 ]] - -Bibliography ------------- - -.. bibliography:: ../../biblio/bibliography.bib - :filter: docname in docnames - :style: unsrt diff --git a/src/python/doc/witness_complex_user.rst b/src/python/doc/witness_complex_user.rst index 799f5444..08dcd288 100644 --- a/src/python/doc/witness_complex_user.rst +++ b/src/python/doc/witness_complex_user.rst @@ -126,10 +126,3 @@ Example2: Computing persistence using strong relaxed witness complex Here is an example of constructing a strong witness complex filtration and computing persistence on it: * :download:`euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py>` - -Bibliography ------------- - -.. bibliography:: ../../biblio/bibliography.bib - :filter: docname in docnames - :style: unsrt diff --git a/src/python/doc/zbibliography.rst b/src/python/doc/zbibliography.rst new file mode 100644 index 00000000..4c377b46 --- /dev/null +++ b/src/python/doc/zbibliography.rst @@ -0,0 +1,10 @@ +:orphan: + +.. To get rid of WARNING: document isn't included in any toctree + +Bibliography +------------ + +.. bibliography:: ../../biblio/bibliography.bib + :style: unsrt + -- cgit v1.2.3 From 66337063d2ee3770275268c264548e99db3ec7f0 Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Fri, 24 Apr 2020 19:11:05 +0200 Subject: Code review: plain instead of unsrt for biblio - concatenate biblio files - undo cgal citation removal --- src/cmake/modules/GUDHI_user_version_target.cmake | 6 +++++- src/python/doc/alpha_complex_user.rst | 3 ++- src/python/doc/zbibliography.rst | 2 +- 3 files changed, 8 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/cmake/modules/GUDHI_user_version_target.cmake b/src/cmake/modules/GUDHI_user_version_target.cmake index 257d1939..9cf648e3 100644 --- a/src/cmake/modules/GUDHI_user_version_target.cmake +++ b/src/cmake/modules/GUDHI_user_version_target.cmake @@ -26,8 +26,12 @@ add_custom_command(TARGET user_version PRE_BUILD COMMAND ${CMAKE_COMMAND} -E # Generate bib files for Doxygen - cf. root CMakeLists.txt for explanation string(TIMESTAMP GUDHI_VERSION_YEAR "%Y") configure_file(${CMAKE_SOURCE_DIR}/biblio/how_to_cite_gudhi.bib.in "${CMAKE_CURRENT_BINARY_DIR}/biblio/how_to_cite_gudhi.bib" @ONLY) -file(COPY "${CMAKE_SOURCE_DIR}/biblio/how_to_cite_cgal.bib" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/biblio/") file(COPY "${CMAKE_SOURCE_DIR}/biblio/bibliography.bib" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/biblio/") + +# append cgal citation inside bibliography - sphinx cannot deal with more than one bib file +file(READ "${CMAKE_SOURCE_DIR}/biblio/how_to_cite_cgal.bib" CGAL_CITATION_CONTENT) +file(APPEND "${CMAKE_CURRENT_BINARY_DIR}/biblio/bibliography.bib" "${CGAL_CITATION_CONTENT}") + # Copy biblio directory for user version add_custom_command(TARGET user_version PRE_BUILD COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_BINARY_DIR}/biblio ${GUDHI_USER_VERSION_DIR}/biblio) diff --git a/src/python/doc/alpha_complex_user.rst b/src/python/doc/alpha_complex_user.rst index c65e62c8..a3b35c10 100644 --- a/src/python/doc/alpha_complex_user.rst +++ b/src/python/doc/alpha_complex_user.rst @@ -11,7 +11,8 @@ Definition `AlphaComplex` is constructing a :doc:`SimplexTree ` using `Delaunay Triangulation `_ -from `CGAL `_ (the Computational Geometry Algorithms Library). +:cite:`cgal:hdj-t-19b` from `CGAL `_ (the Computational Geometry Algorithms Library +:cite:`cgal:eb-19b`). Remarks ^^^^^^^ diff --git a/src/python/doc/zbibliography.rst b/src/python/doc/zbibliography.rst index 4c377b46..e23fcf25 100644 --- a/src/python/doc/zbibliography.rst +++ b/src/python/doc/zbibliography.rst @@ -6,5 +6,5 @@ Bibliography ------------ .. bibliography:: ../../biblio/bibliography.bib - :style: unsrt + :style: plain -- cgit v1.2.3