From 1b007fc59f08bd01e1521eb1c0773b598bdf158b Mon Sep 17 00:00:00 2001 From: tlacombe Date: Mon, 23 Sep 2019 11:14:24 +0200 Subject: wasserstein distance added on fork --- src/python/doc/wasserstein_distance_sum.inc | 14 ++++++ src/python/doc/wasserstein_distance_user.rst | 39 +++++++++++++++ src/python/gudhi/wasserstein.py | 75 ++++++++++++++++++++++++++++ src/python/test/test_wasserstein_distance.py | 22 ++++++++ 4 files changed, 150 insertions(+) create mode 100644 src/python/doc/wasserstein_distance_sum.inc create mode 100644 src/python/doc/wasserstein_distance_user.rst create mode 100644 src/python/gudhi/wasserstein.py create mode 100755 src/python/test/test_wasserstein_distance.py (limited to 'src/python') diff --git a/src/python/doc/wasserstein_distance_sum.inc b/src/python/doc/wasserstein_distance_sum.inc new file mode 100644 index 00000000..0263f80f --- /dev/null +++ b/src/python/doc/wasserstein_distance_sum.inc @@ -0,0 +1,14 @@ +.. table:: + :widths: 30 50 20 + + +-----------------------------------------------------------------+----------------------------------------------------------------------+------------------------------------------------------------------+ + | .. figure:: | The p-Wasserstein distance measures the similarity between two | :Author: Theo Lacombe | + | ../../doc/Bottleneck_distance/perturb_pd.png | persistence diagrams. It's the minimum value c that can be achieve by| | + | :figclass: align-center | a perfect matching between the points of the two diagrams (+ all the | :Introduced in: GUDHI 2.0.0 | + | | diagonal points), where the value of a matching is defined as the | | + | Wasserstein distance is the p-th root of the sum of the | p-th root of the sum of all edges lengths to the power p. Edges | :Copyright: MIT (`GPL v3 `_) | + | edges lengths to the power p. | lengths are measured in norm q, for $1 \leq q \leq \infty$. | | + | | | :Requires: `Python Optimal Transport (POT)` | + +-----------------------------------------------------------------+----------------------------------------------------------------------+------------------------------------------------------------------+ + | * :doc:`wasserstein_distance_user` | | + +-----------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/src/python/doc/wasserstein_distance_user.rst b/src/python/doc/wasserstein_distance_user.rst new file mode 100644 index 00000000..a51cfb71 --- /dev/null +++ b/src/python/doc/wasserstein_distance_user.rst @@ -0,0 +1,39 @@ +:orphan: + +.. To get rid of WARNING: document isn't included in any toctree + +Wasserstein distance user manual +=============================== +Definition +---------- + +.. include:: wasserstein_distance_sum.inc + +This implementation is based on ideas from "Large Scale Computation of Means and Cluster for Persistence Diagrams via Optimal Transport". + +Function +-------- +.. autofunction:: gudhi.wasserstein_distance + + +Basic example +------------- + +This example computes the 1-Wasserstein distance from 2 persistence diagrams with euclidean ground metric. +Note that persistence diagrams must be submitted as (n x 2) numpy arrays and must not contain inf values. + +.. testcode:: + + import gudhi + + diag1 = np.array([[2.7, 3.7],[9.6, 14.],[34.2, 34.974]]) + diag2 = np.array([[2.8, 4.45],[9.5, 14.1]]) + + message = "Wasserstein distance value = " + '%.2f' % gudhi.wasserstein_distance(diag1, diag2, q=2., p=1.) + print(message) + +The output is: + +.. testoutput:: + + Wasserstein distance value = 1.45 diff --git a/src/python/gudhi/wasserstein.py b/src/python/gudhi/wasserstein.py new file mode 100644 index 00000000..cc527ed8 --- /dev/null +++ b/src/python/gudhi/wasserstein.py @@ -0,0 +1,75 @@ +import numpy as np +import scipy.spatial.distance as sc +try: + import ot +except ImportError: + print("POT (Python Optimal Transport) package is not installed. Try to run $ pip install POT") + +""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. + See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. + Author(s): Theo Lacombe + + Copyright (C) 2016 Inria + + Modification(s): + - YYYY/MM Author: Description of the modification +""" + +def proj_on_diag(X): + ''' + param X: (n x 2) array encoding the points of a persistent diagram. + return: (n x 2) arary encoding the (respective orthogonal) projections of the points onto the diagonal + ''' + Z = (X[:,0] + X[:,1]) / 2. + return np.array([Z , Z]).T + + +def build_dist_matrix(X, Y, p=2., q=2.): + ''' + param X: (n x 2) np.array encoding the (points of the) first diagram. + param Y: (m x 2) np.array encoding the second diagram. + param q: Ground metric (i.e. norm l_q). + param p: exponent for the Wasserstein metric. + return: (n+1) x (m+1) np.array encoding the cost matrix C. + For 1 <= i <= n, 1 <= j <= m, C[i,j] encodes the distance between X[i] and Y[j], while C[i, m+1] (resp. C[n+1, j]) encodes the distance (to the p) between X[i] (resp Y[j]) and its orthogonal proj onto the diagonal. + note also that C[n+1, m+1] = 0 (it costs nothing to move from the diagonal to the diagonal). + ''' + Xdiag = proj_on_diag(X) + Ydiag = proj_on_diag(Y) + if np.isinf(p): + C = sc.cdist(X,Y, metric='chebyshev', p=q)**p + Cxd = np.linalg.norm(X - Xdiag, ord=q, axis=1)**p + Cdy = np.linalg.norm(Y - Ydiag, ord=q, axis=1)**p + else: + C = sc.cdist(X,Y, metric='minkowski', p=q)**p + Cxd = np.linalg.norm(X - Xdiag, ord=q, axis=1)**p + Cdy = np.linalg.norm(Y - Ydiag, ord=q, axis=1)**p + Cf = np.hstack((C, Cxd[:,None])) + Cdy = np.append(Cdy, 0) + + Cf = np.vstack((Cf, Cdy[None,:])) + + return Cf + + +def wasserstein_distance(X, Y, p=2., q=2.): + ''' + param X, Y: (n x 2) and (m x 2) numpy array (points of persistence diagrams) + param q: Ground metric (i.e. norm l_q); Default value is 2 (euclidean norm). + param p: exponent for Wasserstein; Default value is 2. + return: float, the p-Wasserstein distance (1 <= p < infty) with respect to the q-norm as ground metric. + ''' + M = build_dist_matrix(X, Y, p=p, q=q) + n = len(X) + m = len(Y) + a = 1.0 / (n + m) * np.ones(n) # weight vector of the input diagram. Uniform here. + hat_a = np.append(a, m/(n+m)) # so that we have a probability measure, required by POT + b = 1.0 / (n + m) * np.ones(m) # weight vector of the input diagram. Uniform here. + hat_b = np.append(b, n/(m+n)) # so that we have a probability measure, required by POT + + # Comptuation of the otcost using the ot.emd2 library. + # Note: it is the squared Wasserstein distance. + ot_cost = (n+m) * ot.emd2(hat_a, hat_b, M) + + return np.power(ot_cost, 1./p) + diff --git a/src/python/test/test_wasserstein_distance.py b/src/python/test/test_wasserstein_distance.py new file mode 100755 index 00000000..a5f7cf77 --- /dev/null +++ b/src/python/test/test_wasserstein_distance.py @@ -0,0 +1,22 @@ +import gudhi + +""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. + See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. + Author(s): Theo Lacombe + + Copyright (C) 2016 Inria + + Modification(s): + - YYYY/MM Author: Description of the modification +""" + +__author__ = "Theo Lacombe" +__copyright__ = "Copyright (C) 2016 Inria" +__license__ = "MIT" + + +def test_basic_bottleneck(): + diag1 = np.array([[2.7, 3.7], [9.6, 14.0], [34.2, 34.974]]) + diag2 = np.array([[2.8, 4.45], [9.5, 14.1]]) + + assert gudhi.wasserstein_distance(diag1, diag2) == 1.4453593023967701 -- cgit v1.2.3 From 982fa3738f847b53c72e43c3c854ff47ce846d1c Mon Sep 17 00:00:00 2001 From: tlacombe Date: Mon, 23 Sep 2019 18:08:01 +0200 Subject: update CMakeLists --- src/python/CMakeLists.txt | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src/python') diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index 9e128d30..063a19e8 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -49,6 +49,7 @@ if(PYTHONINTERP_FOUND) set(GUDHI_PYTHON_MODULES "${GUDHI_PYTHON_MODULES}'alpha_complex', ") set(GUDHI_PYTHON_MODULES "${GUDHI_PYTHON_MODULES}'euclidean_witness_complex', ") set(GUDHI_PYTHON_MODULES "${GUDHI_PYTHON_MODULES}'euclidean_strong_witness_complex', ") + set(GUDHI_PYTHON_MODULES "${GUDHI_PYTHON_MODULES}'wasserstein', ") add_gudhi_debug_info("Python version ${PYTHON_VERSION_STRING}") add_gudhi_debug_info("Cython version ${CYTHON_VERSION}") @@ -199,6 +200,7 @@ if(PYTHONINTERP_FOUND) # Other .py files file(COPY "gudhi/persistence_graphical_tools.py" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi") + file(COPY "gudhi/wasserstein.py" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi") add_custom_command( OUTPUT gudhi.so @@ -371,6 +373,11 @@ if(PYTHONINTERP_FOUND) # Reader utils add_gudhi_py_test(test_reader_utils) + # Wasserstein + if(OT_FOUND) + add_gudhi_py_test(test_wasserstein_distance) + endif(OT_FOUND) + # Documentation generation is available through sphinx - requires all modules if(SPHINX_PATH) if(MATPLOTLIB_FOUND) -- cgit v1.2.3 From cb3346903c8b37ca617bf8c01e00eedc03031624 Mon Sep 17 00:00:00 2001 From: tlacombe Date: Mon, 23 Sep 2019 18:08:45 +0200 Subject: update test wasserstein. --- src/python/test/test_wasserstein_distance.py | 36 ++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) (limited to 'src/python') diff --git a/src/python/test/test_wasserstein_distance.py b/src/python/test/test_wasserstein_distance.py index a5f7cf77..c1b568e2 100755 --- a/src/python/test/test_wasserstein_distance.py +++ b/src/python/test/test_wasserstein_distance.py @@ -1,22 +1,50 @@ import gudhi +import numpy as np """ This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. Author(s): Theo Lacombe - Copyright (C) 2016 Inria + Copyright (C) 2019 Inria Modification(s): - YYYY/MM Author: Description of the modification """ __author__ = "Theo Lacombe" -__copyright__ = "Copyright (C) 2016 Inria" +__copyright__ = "Copyright (C) 2019 Inria" __license__ = "MIT" -def test_basic_bottleneck(): +def test_basic_wasserstein(): diag1 = np.array([[2.7, 3.7], [9.6, 14.0], [34.2, 34.974]]) diag2 = np.array([[2.8, 4.45], [9.5, 14.1]]) + diag3 = np.array([[0, 2], [4, 6]]) + diag4 = np.array([[0, 3], [4, 8]]) + emptydiag = np.array([[]]) + + assert gudhi.wasserstein_distance(emptydiag, emptydiag, q=2., p=1.) == 0. + assert gudhi.wasserstein_distance(emptydiag, emptydiag, q=np.inf, p=1.) == 0. + assert gudhi.wasserstein_distance(emptydiag, emptydiag, q=np.inf, p=2.) == 0. + assert gudhi.wasserstein_distance(emptydiag, emptydiag, q=2., p=2.) == 0. + + assert gudhi.wasserstein_distance(diag3, emptydiag, q=np.inf, p=1.) == 2. + assert gudhi.wasserstein_distance(diag3, emptydiag, q=1., p=1.) == 4. + + assert gudhi.wasserstein_distance(diag4, emptydiag, q=1., p=2.) == 5. # thank you Pythagorician triplets + assert gudhi.wasserstein_distance(diag4, emptydiag, q=np.inf, p=2.) == 2.5 + assert gudhi.wasserstein_distance(diag4, emptydiag, q=2., p=2.) == 3.5355339059327378 + + assert gudhi.wasserstein_distance(diag1, diag2, q=2., p=1.) == 1.4453593023967701 + assert gudhi.wasserstein_distance(diag1, diag2, q=2.35, p=1.74) == 0.9772734057168739 + + assert gudhi.wasserstein_distance(diag1, emptydiag, q=2.35, p=1.7863) == 3.141592214572228 + + assert gudhi.wasserstein_distance(diag3, diag4, q=1., p=1.) == 3. + assert gudhi.wasserstein_distance(diag3, diag4, q=np.inf, p=1.) == 3. # no diag matching here + assert gudhi.wasserstein_distance(diag3, diag4, q=np.inf, p=2.) == np.sqrt(5) + assert gudhi.wasserstein_distance(diag3, diag4, q=1., p=2.) == np.sqrt(5) + assert gudhi.wasserstein_distance(diag3, diag4, q=4.5, p=2.) == np.sqrt(5) + + - assert gudhi.wasserstein_distance(diag1, diag2) == 1.4453593023967701 -- cgit v1.2.3 From 36dfb09493f56f666367df39e5d1a170e49a1a23 Mon Sep 17 00:00:00 2001 From: tlacombe Date: Mon, 23 Sep 2019 18:10:14 +0200 Subject: updated doc for sphinx compilation --- src/python/doc/wasserstein_distance_user.rst | 1 + 1 file changed, 1 insertion(+) (limited to 'src/python') diff --git a/src/python/doc/wasserstein_distance_user.rst b/src/python/doc/wasserstein_distance_user.rst index a51cfb71..bcb7f19d 100644 --- a/src/python/doc/wasserstein_distance_user.rst +++ b/src/python/doc/wasserstein_distance_user.rst @@ -25,6 +25,7 @@ Note that persistence diagrams must be submitted as (n x 2) numpy arrays and mus .. testcode:: import gudhi + import numpy as np diag1 = np.array([[2.7, 3.7],[9.6, 14.],[34.2, 34.974]]) diag2 = np.array([[2.8, 4.45],[9.5, 14.1]]) -- cgit v1.2.3 From 3c98951fd157fe750f7df5b29258a19d4d314c1e Mon Sep 17 00:00:00 2001 From: tlacombe Date: Mon, 23 Sep 2019 18:11:34 +0200 Subject: updated wasserstein.py ; added _ in front of private functions, added q=np.inf, added emptydiagram management. --- src/python/gudhi/wasserstein.py | 51 +++++++++++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 15 deletions(-) (limited to 'src/python') diff --git a/src/python/gudhi/wasserstein.py b/src/python/gudhi/wasserstein.py index cc527ed8..db42cc08 100644 --- a/src/python/gudhi/wasserstein.py +++ b/src/python/gudhi/wasserstein.py @@ -9,13 +9,13 @@ except ImportError: See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. Author(s): Theo Lacombe - Copyright (C) 2016 Inria + Copyright (C) 2019 Inria Modification(s): - YYYY/MM Author: Description of the modification """ -def proj_on_diag(X): +def _proj_on_diag(X): ''' param X: (n x 2) array encoding the points of a persistent diagram. return: (n x 2) arary encoding the (respective orthogonal) projections of the points onto the diagonal @@ -24,7 +24,7 @@ def proj_on_diag(X): return np.array([Z , Z]).T -def build_dist_matrix(X, Y, p=2., q=2.): +def _build_dist_matrix(X, Y, p=2., q=2.): ''' param X: (n x 2) np.array encoding the (points of the) first diagram. param Y: (m x 2) np.array encoding the second diagram. @@ -34,10 +34,10 @@ def build_dist_matrix(X, Y, p=2., q=2.): For 1 <= i <= n, 1 <= j <= m, C[i,j] encodes the distance between X[i] and Y[j], while C[i, m+1] (resp. C[n+1, j]) encodes the distance (to the p) between X[i] (resp Y[j]) and its orthogonal proj onto the diagonal. note also that C[n+1, m+1] = 0 (it costs nothing to move from the diagonal to the diagonal). ''' - Xdiag = proj_on_diag(X) - Ydiag = proj_on_diag(Y) - if np.isinf(p): - C = sc.cdist(X,Y, metric='chebyshev', p=q)**p + Xdiag = _proj_on_diag(X) + Ydiag = _proj_on_diag(Y) + if np.isinf(q): + C = sc.cdist(X,Y, metric='chebyshev')**p Cxd = np.linalg.norm(X - Xdiag, ord=q, axis=1)**p Cdy = np.linalg.norm(Y - Ydiag, ord=q, axis=1)**p else: @@ -52,24 +52,45 @@ def build_dist_matrix(X, Y, p=2., q=2.): return Cf +def _perstot(X, p, q): + ''' + param X: (n x 2) numpy array (points of a given diagram) + param q: Ground metric on the (upper-half) plane (i.e. norm l_q in R^2); Default value is 2 (euclidean norm). + param p: exponent for Wasserstein; Default value is 2. + return: float, the total persistence of the diagram (that is, its distance to the empty diagram). + ''' + Xdiag = _proj_on_diag(X) + return (np.sum(np.linalg.norm(X - Xdiag, ord=q, axis=1)**p))**(1/p) + + def wasserstein_distance(X, Y, p=2., q=2.): ''' param X, Y: (n x 2) and (m x 2) numpy array (points of persistence diagrams) - param q: Ground metric (i.e. norm l_q); Default value is 2 (euclidean norm). + param q: Ground metric on the (upper-half) plane (i.e. norm l_q in R^2); Default value is 2 (euclidean norm). param p: exponent for Wasserstein; Default value is 2. return: float, the p-Wasserstein distance (1 <= p < infty) with respect to the q-norm as ground metric. ''' - M = build_dist_matrix(X, Y, p=p, q=q) n = len(X) m = len(Y) - a = 1.0 / (n + m) * np.ones(n) # weight vector of the input diagram. Uniform here. - hat_a = np.append(a, m/(n+m)) # so that we have a probability measure, required by POT - b = 1.0 / (n + m) * np.ones(m) # weight vector of the input diagram. Uniform here. - hat_b = np.append(b, n/(m+n)) # so that we have a probability measure, required by POT + + # handle empty diagrams + if X.size == 0: + if Y.size == 0: + return 0. + else: + return _perstot(Y, p, q) + elif Y.size == 0: + return _perstot(X, p, q) + + M = _build_dist_matrix(X, Y, p=p, q=q) + a = np.full(n+1, 1. / (n + m) ) # weight vector of the input diagram. Uniform here. + a[-1] = a[-1] * m # normalized so that we have a probability measure, required by POT + b = np.full(m+1, 1. / (n + m) ) # weight vector of the input diagram. Uniform here. + b[-1] = b[-1] * n # so that we have a probability measure, required by POT # Comptuation of the otcost using the ot.emd2 library. # Note: it is the squared Wasserstein distance. - ot_cost = (n+m) * ot.emd2(hat_a, hat_b, M) + ot_cost = (n+m) * ot.emd2(a, b, M) - return np.power(ot_cost, 1./p) + return ot_cost ** (1./p) -- cgit v1.2.3 From b0c4bcce51e4c17660b378c374796d6d300002ed Mon Sep 17 00:00:00 2001 From: tlacombe Date: Mon, 23 Sep 2019 18:16:27 +0200 Subject: updated pot install instructions, including conda --- src/python/gudhi/wasserstein.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/python') diff --git a/src/python/gudhi/wasserstein.py b/src/python/gudhi/wasserstein.py index db42cc08..ae5f75a5 100644 --- a/src/python/gudhi/wasserstein.py +++ b/src/python/gudhi/wasserstein.py @@ -3,7 +3,7 @@ import scipy.spatial.distance as sc try: import ot except ImportError: - print("POT (Python Optimal Transport) package is not installed. Try to run $ pip install POT") + print("POT (Python Optimal Transport) package is not installed. Try to run $ conda install -c conda-forge pot ; or $ pip install POT") """ This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. -- cgit v1.2.3 From bbbba969e769277140920b17f0d92e4e00f1b904 Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Tue, 24 Sep 2019 15:19:25 +0200 Subject: no python documentation generation if POT is not found --- src/python/CMakeLists.txt | 60 +++++++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 26 deletions(-) (limited to 'src/python') diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index 063a19e8..07931d10 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -65,6 +65,9 @@ if(PYTHONINTERP_FOUND) if(SCIPY_FOUND) add_gudhi_debug_info("Scipy version ${SCIPY_VERSION}") endif() + if(OT_FOUND) + add_gudhi_debug_info("POT version ${OT_VERSION}") + endif() set(GUDHI_PYTHON_EXTRA_COMPILE_ARGS "${GUDHI_PYTHON_EXTRA_COMPILE_ARGS}'-DBOOST_RESULT_OF_USE_DECLTYPE', ") set(GUDHI_PYTHON_EXTRA_COMPILE_ARGS "${GUDHI_PYTHON_EXTRA_COMPILE_ARGS}'-DBOOST_ALL_NO_LIB', ") @@ -375,7 +378,7 @@ if(PYTHONINTERP_FOUND) # Wasserstein if(OT_FOUND) - add_gudhi_py_test(test_wasserstein_distance) + add_gudhi_py_test(test_wasserstein_distance) endif(OT_FOUND) # Documentation generation is available through sphinx - requires all modules @@ -383,32 +386,37 @@ if(PYTHONINTERP_FOUND) if(MATPLOTLIB_FOUND) if(NUMPY_FOUND) if(SCIPY_FOUND) - if(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) - set (GUDHI_SPHINX_MESSAGE "Generating API documentation with Sphinx in ${CMAKE_CURRENT_BINARY_DIR}/sphinx/") - # User warning - Sphinx is a static pages generator, and configured to work fine with user_version - # Images and biblio warnings because not found on developper version - if (GUDHI_PYTHON_PATH STREQUAL "src/python") - set (GUDHI_SPHINX_MESSAGE "${GUDHI_SPHINX_MESSAGE} \n WARNING : Sphinx is configured for user version, you run it on developper version. Images and biblio will miss") - endif() - # sphinx target requires gudhi.so, because conf.py reads gudhi version from it - add_custom_target(sphinx - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/doc - COMMAND ${CMAKE_COMMAND} -E env "PYTHONPATH=${CMAKE_CURRENT_BINARY_DIR}" - ${SPHINX_PATH} -b html ${CMAKE_CURRENT_SOURCE_DIR}/doc ${CMAKE_CURRENT_BINARY_DIR}/sphinx - DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/gudhi.so" - COMMENT "${GUDHI_SPHINX_MESSAGE}" VERBATIM) - - add_test(NAME sphinx_py_test - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - COMMAND ${CMAKE_COMMAND} -E env "PYTHONPATH=${CMAKE_CURRENT_BINARY_DIR}" - ${SPHINX_PATH} -b doctest ${CMAKE_CURRENT_SOURCE_DIR}/doc ${CMAKE_CURRENT_BINARY_DIR}/doctest) - - # Set missing or not modules - set(GUDHI_MODULES ${GUDHI_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MODULES") - else(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) - message("++ Python documentation module will not be compiled because it requires a Eigen3 and CGAL version >= 4.11.0") + if(OT_FOUND) + if(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) + set (GUDHI_SPHINX_MESSAGE "Generating API documentation with Sphinx in ${CMAKE_CURRENT_BINARY_DIR}/sphinx/") + # User warning - Sphinx is a static pages generator, and configured to work fine with user_version + # Images and biblio warnings because not found on developper version + if (GUDHI_PYTHON_PATH STREQUAL "src/python") + set (GUDHI_SPHINX_MESSAGE "${GUDHI_SPHINX_MESSAGE} \n WARNING : Sphinx is configured for user version, you run it on developper version. Images and biblio will miss") + endif() + # sphinx target requires gudhi.so, because conf.py reads gudhi version from it + add_custom_target(sphinx + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/doc + COMMAND ${CMAKE_COMMAND} -E env "PYTHONPATH=${CMAKE_CURRENT_BINARY_DIR}" + ${SPHINX_PATH} -b html ${CMAKE_CURRENT_SOURCE_DIR}/doc ${CMAKE_CURRENT_BINARY_DIR}/sphinx + DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/gudhi.so" + COMMENT "${GUDHI_SPHINX_MESSAGE}" VERBATIM) + + add_test(NAME sphinx_py_test + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} -E env "PYTHONPATH=${CMAKE_CURRENT_BINARY_DIR}" + ${SPHINX_PATH} -b doctest ${CMAKE_CURRENT_SOURCE_DIR}/doc ${CMAKE_CURRENT_BINARY_DIR}/doctest) + + # Set missing or not modules + set(GUDHI_MODULES ${GUDHI_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MODULES") + else(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) + message("++ Python documentation module will not be compiled because it requires a Eigen3 and CGAL version >= 4.11.0") + set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") + endif(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) + else(OT_FOUND) + message("++ Python documentation module will not be compiled because POT was not found") set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") - endif(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) + endif(OT_FOUND) else(SCIPY_FOUND) message("++ Python documentation module will not be compiled because scipy was not found") set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") -- cgit v1.2.3 From f9ec015c1bdd01068771d0d04ff55e0436ffc879 Mon Sep 17 00:00:00 2001 From: tlacombe Date: Thu, 17 Oct 2019 11:55:22 +0200 Subject: updated index.rst to include Wasserstein distance --- src/python/doc/index.rst | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/python') diff --git a/src/python/doc/index.rst b/src/python/doc/index.rst index e379bc23..16d918bc 100644 --- a/src/python/doc/index.rst +++ b/src/python/doc/index.rst @@ -73,6 +73,11 @@ Bottleneck distance .. include:: bottleneck_distance_sum.inc +Wasserstein distance +=================== + +.. include:: wasserstein_distance_sum.inc + Persistence graphical tools =========================== -- cgit v1.2.3 From 91632989f92b89752dd4e59836dff80b43f349f1 Mon Sep 17 00:00:00 2001 From: tlacombe Date: Thu, 17 Oct 2019 11:56:20 +0200 Subject: updated wasserstein doc to be sphinx-compatible + correction of typo in the .rst --- src/python/doc/wasserstein_distance_sum.inc | 4 ++-- src/python/gudhi/wasserstein.py | 32 +++++++++++++++-------------- 2 files changed, 19 insertions(+), 17 deletions(-) (limited to 'src/python') diff --git a/src/python/doc/wasserstein_distance_sum.inc b/src/python/doc/wasserstein_distance_sum.inc index 0263f80f..3b0b9025 100644 --- a/src/python/doc/wasserstein_distance_sum.inc +++ b/src/python/doc/wasserstein_distance_sum.inc @@ -7,8 +7,8 @@ | :figclass: align-center | a perfect matching between the points of the two diagrams (+ all the | :Introduced in: GUDHI 2.0.0 | | | diagonal points), where the value of a matching is defined as the | | | Wasserstein distance is the p-th root of the sum of the | p-th root of the sum of all edges lengths to the power p. Edges | :Copyright: MIT (`GPL v3 `_) | - | edges lengths to the power p. | lengths are measured in norm q, for $1 \leq q \leq \infty$. | | - | | | :Requires: `Python Optimal Transport (POT)` | + | edges lengths to the power p. | lengths are measured in norm q, for :math:`1 \leq q \leq \infty`. | | + | | | :Requires: Python Optimal Transport (POT) :math:`\geq` 0.5.1 | +-----------------------------------------------------------------+----------------------------------------------------------------------+------------------------------------------------------------------+ | * :doc:`wasserstein_distance_user` | | +-----------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/src/python/gudhi/wasserstein.py b/src/python/gudhi/wasserstein.py index ae5f75a5..0b2fe79a 100644 --- a/src/python/gudhi/wasserstein.py +++ b/src/python/gudhi/wasserstein.py @@ -17,8 +17,8 @@ except ImportError: def _proj_on_diag(X): ''' - param X: (n x 2) array encoding the points of a persistent diagram. - return: (n x 2) arary encoding the (respective orthogonal) projections of the points onto the diagonal + :param X: (n x 2) array encoding the points of a persistent diagram. + :returns: (n x 2) arary encoding the (respective orthogonal) projections of the points onto the diagonal ''' Z = (X[:,0] + X[:,1]) / 2. return np.array([Z , Z]).T @@ -26,11 +26,11 @@ def _proj_on_diag(X): def _build_dist_matrix(X, Y, p=2., q=2.): ''' - param X: (n x 2) np.array encoding the (points of the) first diagram. - param Y: (m x 2) np.array encoding the second diagram. - param q: Ground metric (i.e. norm l_q). - param p: exponent for the Wasserstein metric. - return: (n+1) x (m+1) np.array encoding the cost matrix C. + :param X: (n x 2) np.array encoding the (points of the) first diagram. + :param Y: (m x 2) np.array encoding the second diagram. + :param q: Ground metric (i.e. norm l_q). + :param p: exponent for the Wasserstein metric. + :returns: (n+1) x (m+1) np.array encoding the cost matrix C. For 1 <= i <= n, 1 <= j <= m, C[i,j] encodes the distance between X[i] and Y[j], while C[i, m+1] (resp. C[n+1, j]) encodes the distance (to the p) between X[i] (resp Y[j]) and its orthogonal proj onto the diagonal. note also that C[n+1, m+1] = 0 (it costs nothing to move from the diagonal to the diagonal). ''' @@ -54,10 +54,10 @@ def _build_dist_matrix(X, Y, p=2., q=2.): def _perstot(X, p, q): ''' - param X: (n x 2) numpy array (points of a given diagram) - param q: Ground metric on the (upper-half) plane (i.e. norm l_q in R^2); Default value is 2 (euclidean norm). - param p: exponent for Wasserstein; Default value is 2. - return: float, the total persistence of the diagram (that is, its distance to the empty diagram). + :param X: (n x 2) numpy array (points of a given diagram) + :param q: Ground metric on the (upper-half) plane (i.e. norm l_q in R^2); Default value is 2 (euclidean norm). + :param p: exponent for Wasserstein; Default value is 2. + :returns: float, the total persistence of the diagram (that is, its distance to the empty diagram). ''' Xdiag = _proj_on_diag(X) return (np.sum(np.linalg.norm(X - Xdiag, ord=q, axis=1)**p))**(1/p) @@ -65,10 +65,12 @@ def _perstot(X, p, q): def wasserstein_distance(X, Y, p=2., q=2.): ''' - param X, Y: (n x 2) and (m x 2) numpy array (points of persistence diagrams) - param q: Ground metric on the (upper-half) plane (i.e. norm l_q in R^2); Default value is 2 (euclidean norm). - param p: exponent for Wasserstein; Default value is 2. - return: float, the p-Wasserstein distance (1 <= p < infty) with respect to the q-norm as ground metric. + :param X: (n x 2) np.array encoding the (points of the) first diagram. + :param Y: (m x 2) np.array encoding the second diagram. + :param q: Ground metric on the (upper-half) plane (i.e. norm l_q in R^2); Default value is 2 (euclidean norm). + :param p: exponent for Wasserstein; Default value is 2. + :returns: the p-Wasserstein distance (1 <= p < infty) with respect to the q-norm as ground metric. + :rtype: float ''' n = len(X) m = len(Y) -- cgit v1.2.3 From 7ea7dfe83e93dc3d33d8d50917f718c05f32ca7f Mon Sep 17 00:00:00 2001 From: tlacombe Date: Thu, 17 Oct 2019 23:54:39 +0200 Subject: removed GPL license, updated expected gudhi release (set at 3.1.0 for now) --- src/python/doc/wasserstein_distance_sum.inc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/python') diff --git a/src/python/doc/wasserstein_distance_sum.inc b/src/python/doc/wasserstein_distance_sum.inc index 3b0b9025..24b72c0e 100644 --- a/src/python/doc/wasserstein_distance_sum.inc +++ b/src/python/doc/wasserstein_distance_sum.inc @@ -4,9 +4,9 @@ +-----------------------------------------------------------------+----------------------------------------------------------------------+------------------------------------------------------------------+ | .. figure:: | The p-Wasserstein distance measures the similarity between two | :Author: Theo Lacombe | | ../../doc/Bottleneck_distance/perturb_pd.png | persistence diagrams. It's the minimum value c that can be achieve by| | - | :figclass: align-center | a perfect matching between the points of the two diagrams (+ all the | :Introduced in: GUDHI 2.0.0 | + | :figclass: align-center | a perfect matching between the points of the two diagrams (+ all the | :Introduced in: GUDHI 3.1.0 | | | diagonal points), where the value of a matching is defined as the | | - | Wasserstein distance is the p-th root of the sum of the | p-th root of the sum of all edges lengths to the power p. Edges | :Copyright: MIT (`GPL v3 `_) | + | Wasserstein distance is the p-th root of the sum of the | p-th root of the sum of all edges lengths to the power p. Edges | :Copyright: MIT | | edges lengths to the power p. | lengths are measured in norm q, for :math:`1 \leq q \leq \infty`. | | | | | :Requires: Python Optimal Transport (POT) :math:`\geq` 0.5.1 | +-----------------------------------------------------------------+----------------------------------------------------------------------+------------------------------------------------------------------+ -- cgit v1.2.3 From a625d5879beaaae47118bc79fda17f441483dffe Mon Sep 17 00:00:00 2001 From: tlacombe Date: Thu, 17 Oct 2019 23:55:17 +0200 Subject: Few improvements to the documentation. --- src/python/gudhi/wasserstein.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'src/python') diff --git a/src/python/gudhi/wasserstein.py b/src/python/gudhi/wasserstein.py index 0b2fe79a..4e17811d 100644 --- a/src/python/gudhi/wasserstein.py +++ b/src/python/gudhi/wasserstein.py @@ -26,8 +26,8 @@ def _proj_on_diag(X): def _build_dist_matrix(X, Y, p=2., q=2.): ''' - :param X: (n x 2) np.array encoding the (points of the) first diagram. - :param Y: (m x 2) np.array encoding the second diagram. + :param X: (n x 2) numpy.array encoding the (points of the) first diagram. + :param Y: (m x 2) numpy.array encoding the second diagram. :param q: Ground metric (i.e. norm l_q). :param p: exponent for the Wasserstein metric. :returns: (n+1) x (m+1) np.array encoding the cost matrix C. @@ -54,19 +54,19 @@ def _build_dist_matrix(X, Y, p=2., q=2.): def _perstot(X, p, q): ''' - :param X: (n x 2) numpy array (points of a given diagram) - :param q: Ground metric on the (upper-half) plane (i.e. norm l_q in R^2); Default value is 2 (euclidean norm). + :param X: (n x 2) numpy.array (points of a given diagram). + :param q: Ground metric on the (upper-half) plane (i.e. norm l_q in R^2); Default value is 2 (Euclidean norm). :param p: exponent for Wasserstein; Default value is 2. :returns: float, the total persistence of the diagram (that is, its distance to the empty diagram). ''' Xdiag = _proj_on_diag(X) - return (np.sum(np.linalg.norm(X - Xdiag, ord=q, axis=1)**p))**(1/p) + return (np.sum(np.linalg.norm(X - Xdiag, ord=q, axis=1)**p))**(1./p) def wasserstein_distance(X, Y, p=2., q=2.): ''' - :param X: (n x 2) np.array encoding the (points of the) first diagram. - :param Y: (m x 2) np.array encoding the second diagram. + :param X: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points (i.e. with infinite coordinate). + :param Y: (m x 2) numpy.array encoding the second diagram. :param q: Ground metric on the (upper-half) plane (i.e. norm l_q in R^2); Default value is 2 (euclidean norm). :param p: exponent for Wasserstein; Default value is 2. :returns: the p-Wasserstein distance (1 <= p < infty) with respect to the q-norm as ground metric. -- cgit v1.2.3 From a3e609d04f296f80edf622ba1d6e0efe71eb8c41 Mon Sep 17 00:00:00 2001 From: Théo Lacombe Date: Fri, 18 Oct 2019 22:17:48 +0200 Subject: Update src/python/gudhi/wasserstein.py Co-Authored-By: Marc Glisse --- src/python/gudhi/wasserstein.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/python') diff --git a/src/python/gudhi/wasserstein.py b/src/python/gudhi/wasserstein.py index 4e17811d..32d236c5 100644 --- a/src/python/gudhi/wasserstein.py +++ b/src/python/gudhi/wasserstein.py @@ -69,7 +69,7 @@ def wasserstein_distance(X, Y, p=2., q=2.): :param Y: (m x 2) numpy.array encoding the second diagram. :param q: Ground metric on the (upper-half) plane (i.e. norm l_q in R^2); Default value is 2 (euclidean norm). :param p: exponent for Wasserstein; Default value is 2. - :returns: the p-Wasserstein distance (1 <= p < infty) with respect to the q-norm as ground metric. + :returns: the p-Wasserstein distance (1 <= p < infinity) with respect to the q-norm as ground metric. :rtype: float ''' n = len(X) -- cgit v1.2.3 From 12f62eca36952053169a71f70169a0e15fa481fc Mon Sep 17 00:00:00 2001 From: tlacombe Date: Fri, 18 Oct 2019 22:30:07 +0200 Subject: correction edges lengths ==> edge lengths --- src/python/doc/wasserstein_distance_sum.inc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/python') diff --git a/src/python/doc/wasserstein_distance_sum.inc b/src/python/doc/wasserstein_distance_sum.inc index 24b72c0e..ffd4d312 100644 --- a/src/python/doc/wasserstein_distance_sum.inc +++ b/src/python/doc/wasserstein_distance_sum.inc @@ -3,11 +3,11 @@ +-----------------------------------------------------------------+----------------------------------------------------------------------+------------------------------------------------------------------+ | .. figure:: | The p-Wasserstein distance measures the similarity between two | :Author: Theo Lacombe | - | ../../doc/Bottleneck_distance/perturb_pd.png | persistence diagrams. It's the minimum value c that can be achieve by| | - | :figclass: align-center | a perfect matching between the points of the two diagrams (+ all the | :Introduced in: GUDHI 3.1.0 | + | ../../doc/Bottleneck_distance/perturb_pd.png | persistence diagrams. It's the minimum value c that can be achieved | | + | :figclass: align-center | by a perfect matching between the points of the two diagrams (+ all | :Introduced in: GUDHI 3.1.0 | | | diagonal points), where the value of a matching is defined as the | | - | Wasserstein distance is the p-th root of the sum of the | p-th root of the sum of all edges lengths to the power p. Edges | :Copyright: MIT | - | edges lengths to the power p. | lengths are measured in norm q, for :math:`1 \leq q \leq \infty`. | | + | Wasserstein distance is the p-th root of the sum of the | p-th root of the sum of all edge lengths to the power p. Edge lengths| :Copyright: MIT | + | edge lengths to the power p. | are measured in norm q, for :math:`1 \leq q \leq \infty`. | | | | | :Requires: Python Optimal Transport (POT) :math:`\geq` 0.5.1 | +-----------------------------------------------------------------+----------------------------------------------------------------------+------------------------------------------------------------------+ | * :doc:`wasserstein_distance_user` | | -- cgit v1.2.3 From e0feb6725f3970eea66c8e3268ec1a55020279bd Mon Sep 17 00:00:00 2001 From: Théo Lacombe Date: Fri, 18 Oct 2019 22:31:38 +0200 Subject: Update src/python/gudhi/wasserstein.py typo correction Co-Authored-By: Marc Glisse --- src/python/gudhi/wasserstein.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/python') diff --git a/src/python/gudhi/wasserstein.py b/src/python/gudhi/wasserstein.py index 32d236c5..445772e4 100644 --- a/src/python/gudhi/wasserstein.py +++ b/src/python/gudhi/wasserstein.py @@ -18,7 +18,7 @@ except ImportError: def _proj_on_diag(X): ''' :param X: (n x 2) array encoding the points of a persistent diagram. - :returns: (n x 2) arary encoding the (respective orthogonal) projections of the points onto the diagonal + :returns: (n x 2) array encoding the (respective orthogonal) projections of the points onto the diagonal ''' Z = (X[:,0] + X[:,1]) / 2. return np.array([Z , Z]).T -- cgit v1.2.3 From d02296df9ca6ebea31345a89a6070258c0ed91a9 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sun, 27 Oct 2019 12:59:55 +0100 Subject: Mention POT in installation instructions --- src/python/doc/installation.rst | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src/python') diff --git a/src/python/doc/installation.rst b/src/python/doc/installation.rst index 5a6ad9f4..4778b3d9 100644 --- a/src/python/doc/installation.rst +++ b/src/python/doc/installation.rst @@ -215,6 +215,13 @@ The following examples require the `Matplotlib `_: * :download:`euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py>` * :download:`euclidean_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_witness_complex_diagram_persistence_from_off_file_example.py>` +Python Optimal Transport +======================== + +The :doc:`Wasserstein distance ` +module requires `POT `_, a library that provides +several solvers for optimization problems related to Optimal Transport. + SciPy ===== -- cgit v1.2.3 From 3c76f73a530daacd48d476cd96bd946e4ab6d78a Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sun, 27 Oct 2019 13:40:23 +0100 Subject: Wasserstein also uses SciPy --- src/python/doc/installation.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src/python') diff --git a/src/python/doc/installation.rst b/src/python/doc/installation.rst index 4778b3d9..3711ca8e 100644 --- a/src/python/doc/installation.rst +++ b/src/python/doc/installation.rst @@ -225,9 +225,10 @@ several solvers for optimization problems related to Optimal Transport. SciPy ===== -The :doc:`persistence graphical tools ` -module requires `SciPy `_, a Python-based ecosystem of -open-source software for mathematics, science, and engineering. +The :doc:`persistence graphical tools ` and +:doc:`Wasserstein distance ` modules require `SciPy +`_, a Python-based ecosystem of open-source software for +mathematics, science, and engineering. Threading Building Blocks ========================= -- cgit v1.2.3 From ee4934750e8c9dbdee4874d56921aeb9bf7b7bb7 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Thu, 31 Oct 2019 08:48:15 +0100 Subject: Increase numItermax in the call to POT. This number is pretty arbitrary... --- src/python/gudhi/wasserstein.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/python') diff --git a/src/python/gudhi/wasserstein.py b/src/python/gudhi/wasserstein.py index 445772e4..eba7c6d5 100644 --- a/src/python/gudhi/wasserstein.py +++ b/src/python/gudhi/wasserstein.py @@ -92,7 +92,8 @@ def wasserstein_distance(X, Y, p=2., q=2.): # Comptuation of the otcost using the ot.emd2 library. # Note: it is the squared Wasserstein distance. - ot_cost = (n+m) * ot.emd2(a, b, M) + # The default numItermax=100000 is not sufficient for some examples with 5000 points, what is a good value? + ot_cost = (n+m) * ot.emd2(a, b, M, numItermax=2000000) return ot_cost ** (1./p) -- cgit v1.2.3