summaryrefslogtreecommitdiff
path: root/src/python
diff options
context:
space:
mode:
Diffstat (limited to 'src/python')
-rw-r--r--src/python/CMakeLists.txt60
-rw-r--r--src/python/doc/installation.rst5
-rw-r--r--src/python/doc/wasserstein_distance_user.rst17
-rw-r--r--src/python/gudhi/hera.cc71
-rw-r--r--src/python/gudhi/wasserstein.py6
-rw-r--r--src/python/setup.py.in17
-rwxr-xr-xsrc/python/test/test_wasserstein_distance.py61
7 files changed, 183 insertions, 54 deletions
diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt
index b558d4c4..090a7446 100644
--- a/src/python/CMakeLists.txt
+++ b/src/python/CMakeLists.txt
@@ -32,6 +32,10 @@ function( add_gudhi_debug_info DEBUG_INFO )
endfunction( add_gudhi_debug_info )
if(PYTHONINTERP_FOUND)
+ if(PYBIND11_FOUND)
+ add_gudhi_debug_info("Pybind11 version ${PYBIND11_VERSION}")
+ set(GUDHI_PYTHON_MODULES_EXTRA "${GUDHI_PYTHON_MODULES_EXTRA}'hera', ")
+ endif()
if(CYTHON_FOUND)
set(GUDHI_PYTHON_MODULES "${GUDHI_PYTHON_MODULES}'off_reader', ")
set(GUDHI_PYTHON_MODULES "${GUDHI_PYTHON_MODULES}'simplex_tree', ")
@@ -86,6 +90,7 @@ if(PYTHONINTERP_FOUND)
endif(MSVC)
if(CMAKE_COMPILER_IS_GNUCXX)
set(GUDHI_PYTHON_EXTRA_COMPILE_ARGS "${GUDHI_PYTHON_EXTRA_COMPILE_ARGS}'-frounding-math', ")
+ set(GUDHI_PYBIND11_EXTRA_COMPILE_ARGS "${GUDHI_PYBIND11_EXTRA_COMPILE_ARGS}'-fvisibility=hidden', ")
endif(CMAKE_COMPILER_IS_GNUCXX)
if (CMAKE_CXX_COMPILER_ID MATCHES Intel)
set(GUDHI_PYTHON_EXTRA_COMPILE_ARGS "${GUDHI_PYTHON_EXTRA_COMPILE_ARGS}'-fp-model strict', ")
@@ -390,9 +395,9 @@ endif(CGAL_FOUND)
add_gudhi_py_test(test_reader_utils)
# Wasserstein
- if(OT_FOUND)
+ if(OT_FOUND AND PYBIND11_FOUND)
add_gudhi_py_test(test_wasserstein_distance)
- endif(OT_FOUND)
+ endif()
# Representations
if(SKLEARN_FOUND AND MATPLOTLIB_FOUND)
@@ -406,32 +411,37 @@ endif(CGAL_FOUND)
if(SCIPY_FOUND)
if(SKLEARN_FOUND)
if(OT_FOUND)
- if(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0)
- set (GUDHI_SPHINX_MESSAGE "Generating API documentation with Sphinx in ${CMAKE_CURRENT_BINARY_DIR}/sphinx/")
- # User warning - Sphinx is a static pages generator, and configured to work fine with user_version
- # Images and biblio warnings because not found on developper version
- if (GUDHI_PYTHON_PATH STREQUAL "src/python")
- set (GUDHI_SPHINX_MESSAGE "${GUDHI_SPHINX_MESSAGE} \n WARNING : Sphinx is configured for user version, you run it on developper version. Images and biblio will miss")
- endif()
- # sphinx target requires gudhi.so, because conf.py reads gudhi version from it
- add_custom_target(sphinx
- WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/doc
- COMMAND ${CMAKE_COMMAND} -E env "PYTHONPATH=${CMAKE_CURRENT_BINARY_DIR}"
- ${SPHINX_PATH} -b html ${CMAKE_CURRENT_SOURCE_DIR}/doc ${CMAKE_CURRENT_BINARY_DIR}/sphinx
- DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/gudhi.so"
- COMMENT "${GUDHI_SPHINX_MESSAGE}" VERBATIM)
+ if(PYBIND11_FOUND)
+ if(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0)
+ set (GUDHI_SPHINX_MESSAGE "Generating API documentation with Sphinx in ${CMAKE_CURRENT_BINARY_DIR}/sphinx/")
+ # User warning - Sphinx is a static pages generator, and configured to work fine with user_version
+ # Images and biblio warnings because not found on developper version
+ if (GUDHI_PYTHON_PATH STREQUAL "src/python")
+ set (GUDHI_SPHINX_MESSAGE "${GUDHI_SPHINX_MESSAGE} \n WARNING : Sphinx is configured for user version, you run it on developper version. Images and biblio will miss")
+ endif()
+ # sphinx target requires gudhi.so, because conf.py reads gudhi version from it
+ add_custom_target(sphinx
+ WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/doc
+ COMMAND ${CMAKE_COMMAND} -E env "PYTHONPATH=${CMAKE_CURRENT_BINARY_DIR}"
+ ${SPHINX_PATH} -b html ${CMAKE_CURRENT_SOURCE_DIR}/doc ${CMAKE_CURRENT_BINARY_DIR}/sphinx
+ DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/gudhi.so"
+ COMMENT "${GUDHI_SPHINX_MESSAGE}" VERBATIM)
- add_test(NAME sphinx_py_test
- WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
- COMMAND ${CMAKE_COMMAND} -E env "PYTHONPATH=${CMAKE_CURRENT_BINARY_DIR}"
- ${SPHINX_PATH} -b doctest ${CMAKE_CURRENT_SOURCE_DIR}/doc ${CMAKE_CURRENT_BINARY_DIR}/doctest)
+ add_test(NAME sphinx_py_test
+ WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+ COMMAND ${CMAKE_COMMAND} -E env "PYTHONPATH=${CMAKE_CURRENT_BINARY_DIR}"
+ ${SPHINX_PATH} -b doctest ${CMAKE_CURRENT_SOURCE_DIR}/doc ${CMAKE_CURRENT_BINARY_DIR}/doctest)
- # Set missing or not modules
- set(GUDHI_MODULES ${GUDHI_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MODULES")
- else(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0)
- message("++ Python documentation module will not be compiled because it requires a Eigen3 and CGAL version >= 4.11.0")
+ # Set missing or not modules
+ set(GUDHI_MODULES ${GUDHI_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MODULES")
+ else(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0)
+ message("++ Python documentation module will not be compiled because it requires a Eigen3 and CGAL version >= 4.11.0")
+ set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES")
+ endif(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0)
+ else(PYBIND11_FOUND)
+ message("++ Python documentation module will not be compiled because pybind11 was not found")
set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES")
- endif(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0)
+ endif(PYBIND11_FOUND)
else(OT_FOUND)
message("++ Python documentation module will not be compiled because POT was not found")
set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES")
diff --git a/src/python/doc/installation.rst b/src/python/doc/installation.rst
index 40f3f44b..d459145b 100644
--- a/src/python/doc/installation.rst
+++ b/src/python/doc/installation.rst
@@ -14,10 +14,11 @@ Compiling
*********
The library uses c++14 and requires `Boost <https://www.boost.org/>`_ ≥ 1.56.0,
`CMake <https://www.cmake.org/>`_ ≥ 3.1 to generate makefiles,
-`NumPy <http://numpy.org>`_ and `Cython <https://www.cython.org/>`_ to compile
+`NumPy <http://numpy.org>`_, `Cython <https://www.cython.org/>`_ and
+`pybind11 <https://github.com/pybind/pybind11>`_ to compile
the GUDHI Python module.
It is a multi-platform library and compiles on Linux, Mac OSX and Visual
-Studio 2015.
+Studio 2017.
On `Windows <https://wiki.python.org/moin/WindowsCompilers>`_ , only Python
≥ 3.5 are available because of the required Visual Studio version.
diff --git a/src/python/doc/wasserstein_distance_user.rst b/src/python/doc/wasserstein_distance_user.rst
index 32999a0c..94b454e2 100644
--- a/src/python/doc/wasserstein_distance_user.rst
+++ b/src/python/doc/wasserstein_distance_user.rst
@@ -9,17 +9,26 @@ Definition
.. include:: wasserstein_distance_sum.inc
-This implementation is based on ideas from "Large Scale Computation of Means and Cluster for Persistence Diagrams via Optimal Transport".
+Functions
+---------
+This implementation uses the Python Optimal Transport library and is based on
+ideas from "Large Scale Computation of Means and Cluster for Persistence
+Diagrams via Optimal Transport" :cite:`10.5555/3327546.3327645`.
-Function
---------
.. autofunction:: gudhi.wasserstein.wasserstein_distance
+This other implementation comes from `Hera
+<https://bitbucket.org/grey_narn/hera/src/master/>`_ (BSD-3-Clause) which is
+based on "Geometry Helps to Compare Persistence Diagrams"
+:cite:`Kerber:2017:GHC:3047249.3064175` by Michael Kerber, Dmitriy
+Morozov, and Arnur Nigmetov.
+
+.. autofunction:: gudhi.hera.wasserstein_distance
Basic example
-------------
-This example computes the 1-Wasserstein distance from 2 persistence diagrams with euclidean ground metric.
+This example computes the 1-Wasserstein distance from 2 persistence diagrams with Euclidean ground metric.
Note that persistence diagrams must be submitted as (n x 2) numpy arrays and must not contain inf values.
.. testcode::
diff --git a/src/python/gudhi/hera.cc b/src/python/gudhi/hera.cc
new file mode 100644
index 00000000..0d562b4c
--- /dev/null
+++ b/src/python/gudhi/hera.cc
@@ -0,0 +1,71 @@
+/* This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+ * See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+ * Author(s): Marc Glisse
+ *
+ * Copyright (C) 2020 Inria
+ *
+ * Modification(s):
+ * - YYYY/MM Author: Description of the modification
+ */
+
+#include <pybind11/pybind11.h>
+#include <pybind11/numpy.h>
+
+#include <boost/range/iterator_range.hpp>
+
+#include <wasserstein.h> // Hera
+
+#include <array>
+
+namespace py = pybind11;
+typedef py::array_t<double, py::array::c_style | py::array::forcecast> Dgm;
+
+double wasserstein_distance(
+ Dgm d1, Dgm d2,
+ double wasserstein_power, double internal_p,
+ double delta)
+{
+ py::buffer_info buf1 = d1.request();
+ py::buffer_info buf2 = d2.request();
+ // shape (n,2) or (0) for empty
+ if((buf1.ndim!=2 || buf1.shape[1]!=2) && (buf1.ndim!=1 || buf1.shape[0]!=0))
+ throw std::runtime_error("Diagram 1 must be an array of size n x 2");
+ if((buf2.ndim!=2 || buf2.shape[1]!=2) && (buf2.ndim!=1 || buf2.shape[0]!=0))
+ throw std::runtime_error("Diagram 2 must be an array of size n x 2");
+ typedef std::array<double, 2> Point;
+ auto p1 = (Point*)buf1.ptr;
+ auto p2 = (Point*)buf2.ptr;
+ auto diag1 = boost::make_iterator_range(p1, p1+buf1.shape[0]);
+ auto diag2 = boost::make_iterator_range(p2, p2+buf2.shape[0]);
+
+ hera::AuctionParams<double> params;
+ params.wasserstein_power = wasserstein_power;
+ // hera encodes infinity as -1...
+ if(std::isinf(internal_p)) internal_p = hera::get_infinity<double>();
+ params.internal_p = internal_p;
+ params.delta = delta;
+ // The extra parameters are purposedly not exposed for now.
+ return hera::wasserstein_dist(diag1, diag2, params);
+}
+
+PYBIND11_MODULE(hera, m) {
+ m.def("wasserstein_distance", &wasserstein_distance,
+ py::arg("X"), py::arg("Y"),
+ py::arg("order") = 1,
+ py::arg("internal_p") = std::numeric_limits<double>::infinity(),
+ py::arg("delta") = .01,
+ R"pbdoc(
+ Compute the Wasserstein distance between two diagrams.
+ Points at infinity are supported.
+
+ Parameters:
+ X (n x 2 numpy array): First diagram
+ Y (n x 2 numpy array): Second diagram
+ order (float): Wasserstein exponent W_q
+ internal_p (float): Internal Minkowski norm L^p in R^2
+ delta (float): Relative error 1+delta
+
+ Returns:
+ float: Approximate Wasserstein distance W_q(X,Y)
+ )pbdoc");
+}
diff --git a/src/python/gudhi/wasserstein.py b/src/python/gudhi/wasserstein.py
index db5ddff2..13102094 100644
--- a/src/python/gudhi/wasserstein.py
+++ b/src/python/gudhi/wasserstein.py
@@ -27,8 +27,8 @@ def _build_dist_matrix(X, Y, order=2., internal_p=2.):
'''
:param X: (n x 2) numpy.array encoding the (points of the) first diagram.
:param Y: (m x 2) numpy.array encoding the second diagram.
- :param internal_p: Ground metric (i.e. norm l_p).
:param order: exponent for the Wasserstein metric.
+ :param internal_p: Ground metric (i.e. norm L^p).
:returns: (n+1) x (m+1) np.array encoding the cost matrix C.
For 1 <= i <= n, 1 <= j <= m, C[i,j] encodes the distance between X[i] and Y[j], while C[i, m+1] (resp. C[n+1, j]) encodes the distance (to the p) between X[i] (resp Y[j]) and its orthogonal proj onto the diagonal.
note also that C[n+1, m+1] = 0 (it costs nothing to move from the diagonal to the diagonal).
@@ -54,8 +54,8 @@ def _build_dist_matrix(X, Y, order=2., internal_p=2.):
def _perstot(X, order, internal_p):
'''
:param X: (n x 2) numpy.array (points of a given diagram).
- :param internal_p: Ground metric on the (upper-half) plane (i.e. norm l_p in R^2); Default value is 2 (Euclidean norm).
:param order: exponent for Wasserstein. Default value is 2.
+ :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); Default value is 2 (Euclidean norm).
:returns: float, the total persistence of the diagram (that is, its distance to the empty diagram).
'''
Xdiag = _proj_on_diag(X)
@@ -66,8 +66,8 @@ def wasserstein_distance(X, Y, order=2., internal_p=2.):
'''
:param X: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points (i.e. with infinite coordinate).
:param Y: (m x 2) numpy.array encoding the second diagram.
- :param internal_p: Ground metric on the (upper-half) plane (i.e. norm l_p in R^2); Default value is 2 (euclidean norm).
:param order: exponent for Wasserstein; Default value is 2.
+ :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); Default value is 2 (Euclidean norm).
:returns: the Wasserstein distance of order q (1 <= q < infinity) between persistence diagrams with respect to the internal_p-norm as ground metric.
:rtype: float
'''
diff --git a/src/python/setup.py.in b/src/python/setup.py.in
index bd7fb180..f968bd59 100644
--- a/src/python/setup.py.in
+++ b/src/python/setup.py.in
@@ -12,6 +12,7 @@ from setuptools import setup, Extension, find_packages
from Cython.Build import cythonize
from numpy import get_include as numpy_get_include
import sys
+import pybind11
__author__ = "Vincent Rouvreau"
__copyright__ = "Copyright (C) 2016 Inria"
@@ -42,6 +43,18 @@ for module in modules:
runtime_library_dirs=runtime_library_dirs,
cython_directives = {'language_level': str(sys.version_info[0])},))
+ext_modules = cythonize(ext_modules)
+
+ext_modules.append(Extension(
+ 'gudhi.hera',
+ sources = [source_dir + 'hera.cc'],
+ language = 'c++',
+ include_dirs = include_dirs +
+ ['@HERA_WASSERSTEIN_INCLUDE_DIR@',
+ pybind11.get_include(False), pybind11.get_include(True)],
+ extra_compile_args=extra_compile_args + [@GUDHI_PYBIND11_EXTRA_COMPILE_ARGS@],
+ ))
+
setup(
name = 'gudhi',
packages=find_packages(), # find_namespace_packages(include=["gudhi*"])
@@ -49,7 +62,7 @@ setup(
author_email='gudhi-contact@lists.gforge.inria.fr',
version='@GUDHI_VERSION@',
url='http://gudhi.gforge.inria.fr/',
- ext_modules = cythonize(ext_modules),
+ ext_modules = ext_modules,
install_requires = ['cython','numpy >= 1.9',],
- setup_requires = ['numpy >= 1.9',],
+ setup_requires = ['numpy >= 1.9','pybind11',],
)
diff --git a/src/python/test/test_wasserstein_distance.py b/src/python/test/test_wasserstein_distance.py
index 43dda77e..6a6b217b 100755
--- a/src/python/test/test_wasserstein_distance.py
+++ b/src/python/test/test_wasserstein_distance.py
@@ -1,6 +1,6 @@
""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
- Author(s): Theo Lacombe
+ Author(s): Theo Lacombe, Marc Glisse
Copyright (C) 2019 Inria
@@ -8,41 +8,66 @@
- YYYY/MM Author: Description of the modification
"""
-from gudhi.wasserstein import wasserstein_distance
+from gudhi.wasserstein import wasserstein_distance as pot
+from gudhi.hera import wasserstein_distance as hera
import numpy as np
+import pytest
__author__ = "Theo Lacombe"
__copyright__ = "Copyright (C) 2019 Inria"
__license__ = "MIT"
-
-def test_basic_wasserstein():
+def _basic_wasserstein(wasserstein_distance, delta, test_infinity=True):
diag1 = np.array([[2.7, 3.7], [9.6, 14.0], [34.2, 34.974]])
diag2 = np.array([[2.8, 4.45], [9.5, 14.1]])
diag3 = np.array([[0, 2], [4, 6]])
diag4 = np.array([[0, 3], [4, 8]])
- emptydiag = np.array([[]])
+ emptydiag = np.array([])
+
+ # We just need to handle positive numbers here
+ def approx(x):
+ return pytest.approx(x, rel=delta)
assert wasserstein_distance(emptydiag, emptydiag, internal_p=2., order=1.) == 0.
assert wasserstein_distance(emptydiag, emptydiag, internal_p=np.inf, order=1.) == 0.
assert wasserstein_distance(emptydiag, emptydiag, internal_p=np.inf, order=2.) == 0.
assert wasserstein_distance(emptydiag, emptydiag, internal_p=2., order=2.) == 0.
- assert wasserstein_distance(diag3, emptydiag, internal_p=np.inf, order=1.) == 2.
- assert wasserstein_distance(diag3, emptydiag, internal_p=1., order=1.) == 4.
+ assert wasserstein_distance(diag3, emptydiag, internal_p=np.inf, order=1.) == approx(2.)
+ assert wasserstein_distance(diag3, emptydiag, internal_p=1., order=1.) == approx(4.)
+
+ assert wasserstein_distance(diag4, emptydiag, internal_p=1., order=2.) == approx(5.) # thank you Pythagorician triplets
+ assert wasserstein_distance(diag4, emptydiag, internal_p=np.inf, order=2.) == approx(2.5)
+ assert wasserstein_distance(diag4, emptydiag, internal_p=2., order=2.) == approx(3.5355339059327378)
+
+ assert wasserstein_distance(diag1, diag2, internal_p=2., order=1.) == approx(1.4453593023967701)
+ assert wasserstein_distance(diag1, diag2, internal_p=2.35, order=1.74) == approx(0.9772734057168739)
+
+ assert wasserstein_distance(diag1, emptydiag, internal_p=2.35, order=1.7863) == approx(3.141592214572228)
+
+ assert wasserstein_distance(diag3, diag4, internal_p=1., order=1.) == approx(3.)
+ assert wasserstein_distance(diag3, diag4, internal_p=np.inf, order=1.) == approx(3.) # no diag matching here
+ assert wasserstein_distance(diag3, diag4, internal_p=np.inf, order=2.) == approx(np.sqrt(5))
+ assert wasserstein_distance(diag3, diag4, internal_p=1., order=2.) == approx(np.sqrt(5))
+ assert wasserstein_distance(diag3, diag4, internal_p=4.5, order=2.) == approx(np.sqrt(5))
+
+ if(not test_infinity):
+ return
- assert wasserstein_distance(diag4, emptydiag, internal_p=1., order=2.) == 5. # thank you Pythagorician triplets
- assert wasserstein_distance(diag4, emptydiag, internal_p=np.inf, order=2.) == 2.5
- assert wasserstein_distance(diag4, emptydiag, internal_p=2., order=2.) == 3.5355339059327378
+ diag5 = np.array([[0, 3], [4, np.inf]])
+ diag6 = np.array([[7, 8], [4, 6], [3, np.inf]])
- assert wasserstein_distance(diag1, diag2, internal_p=2., order=1.) == 1.4453593023967701
- assert wasserstein_distance(diag1, diag2, internal_p=2.35, order=1.74) == 0.9772734057168739
+ assert wasserstein_distance(diag4, diag5) == np.inf
+ assert wasserstein_distance(diag5, diag6, order=1, internal_p=np.inf) == approx(4.)
- assert wasserstein_distance(diag1, emptydiag, internal_p=2.35, order=1.7863) == 3.141592214572228
+def hera_wrap(delta):
+ def fun(*kargs,**kwargs):
+ return hera(*kargs,**kwargs,delta=delta)
+ return fun
- assert wasserstein_distance(diag3, diag4, internal_p=1., order=1.) == 3.
- assert wasserstein_distance(diag3, diag4, internal_p=np.inf, order=1.) == 3. # no diag matching here
- assert wasserstein_distance(diag3, diag4, internal_p=np.inf, order=2.) == np.sqrt(5)
- assert wasserstein_distance(diag3, diag4, internal_p=1., order=2.) == np.sqrt(5)
- assert wasserstein_distance(diag3, diag4, internal_p=4.5, order=2.) == np.sqrt(5)
+def test_wasserstein_distance_pot():
+ _basic_wasserstein(pot, 1e-15, test_infinity=False)
+def test_wasserstein_distance_hera():
+ _basic_wasserstein(hera_wrap(1e-12), 1e-12)
+ _basic_wasserstein(hera_wrap(.1), .1)