diff options
Diffstat (limited to 'src/python')
69 files changed, 1610 insertions, 512 deletions
diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index 22af3ec9..055d5b23 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -78,6 +78,19 @@ if(PYTHONINTERP_FOUND) if(OT_FOUND) add_gudhi_debug_info("POT version ${OT_VERSION}") endif() + if(HNSWLIB_FOUND) + # Does not have a version number... + add_gudhi_debug_info("HNSWlib found") + endif() + if(TORCH_FOUND) + add_gudhi_debug_info("PyTorch version ${TORCH_VERSION}") + endif() + if(PYKEOPS_FOUND) + add_gudhi_debug_info("PyKeOps version ${PYKEOPS_VERSION}") + endif() + if(EAGERPY_FOUND) + add_gudhi_debug_info("EagerPy version ${EAGERPY_VERSION}") + endif() set(GUDHI_PYTHON_EXTRA_COMPILE_ARGS "${GUDHI_PYTHON_EXTRA_COMPILE_ARGS}'-DBOOST_RESULT_OF_USE_DECLTYPE', ") set(GUDHI_PYTHON_EXTRA_COMPILE_ARGS "${GUDHI_PYTHON_EXTRA_COMPILE_ARGS}'-DBOOST_ALL_NO_LIB', ") @@ -128,16 +141,6 @@ if(PYTHONINTERP_FOUND) endif () if(CGAL_FOUND) - can_cgal_use_cxx11_thread_local() - if (NOT CGAL_CAN_USE_CXX11_THREAD_LOCAL_RESULT) - if(CMAKE_BUILD_TYPE MATCHES Debug) - add_GUDHI_PYTHON_lib("${Boost_THREAD_LIBRARY_DEBUG}") - else() - add_GUDHI_PYTHON_lib("${Boost_THREAD_LIBRARY_RELEASE}") - endif() - message("** Add Boost ${Boost_LIBRARY_DIRS}") - set(GUDHI_PYTHON_LIBRARY_DIRS "${GUDHI_PYTHON_LIBRARY_DIRS}'${Boost_LIBRARY_DIRS}', ") - endif() # Add CGAL compilation args if(CGAL_HEADER_ONLY) add_gudhi_debug_info("CGAL header only version ${CGAL_VERSION}") @@ -226,7 +229,7 @@ if(PYTHONINTERP_FOUND) # Other .py files file(COPY "gudhi/persistence_graphical_tools.py" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi") file(COPY "gudhi/representations" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi/") - file(COPY "gudhi/wasserstein.py" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi") + file(COPY "gudhi/wasserstein" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi") file(COPY "gudhi/point_cloud" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi") add_custom_command( @@ -239,6 +242,71 @@ if(PYTHONINTERP_FOUND) install(CODE "execute_process(COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/setup.py install)") + # Documentation generation is available through sphinx - requires all modules + # Make it first as sphinx test is by far the longest test which is nice when testing in parallel + if(SPHINX_PATH) + if(MATPLOTLIB_FOUND) + if(NUMPY_FOUND) + if(SCIPY_FOUND) + if(SKLEARN_FOUND) + if(OT_FOUND) + if(PYBIND11_FOUND) + if(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) + set (GUDHI_SPHINX_MESSAGE "Generating API documentation with Sphinx in ${CMAKE_CURRENT_BINARY_DIR}/sphinx/") + # User warning - Sphinx is a static pages generator, and configured to work fine with user_version + # Images and biblio warnings because not found on developper version + if (GUDHI_PYTHON_PATH STREQUAL "src/python") + set (GUDHI_SPHINX_MESSAGE "${GUDHI_SPHINX_MESSAGE} \n WARNING : Sphinx is configured for user version, you run it on developper version. Images and biblio will miss") + endif() + # sphinx target requires gudhi.so, because conf.py reads gudhi version from it + add_custom_target(sphinx + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/doc + COMMAND ${CMAKE_COMMAND} -E env "PYTHONPATH=${CMAKE_CURRENT_BINARY_DIR}" + ${SPHINX_PATH} -b html ${CMAKE_CURRENT_SOURCE_DIR}/doc ${CMAKE_CURRENT_BINARY_DIR}/sphinx + DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/gudhi.so" + COMMENT "${GUDHI_SPHINX_MESSAGE}" VERBATIM) + + add_test(NAME sphinx_py_test + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} -E env "PYTHONPATH=${CMAKE_CURRENT_BINARY_DIR}" + ${SPHINX_PATH} -b doctest ${CMAKE_CURRENT_SOURCE_DIR}/doc ${CMAKE_CURRENT_BINARY_DIR}/doctest) + + # Set missing or not modules + set(GUDHI_MODULES ${GUDHI_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MODULES") + else(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) + message("++ Python documentation module will not be compiled because it requires a Eigen3 and CGAL version >= 4.11.0") + set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") + endif(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) + else(PYBIND11_FOUND) + message("++ Python documentation module will not be compiled because pybind11 was not found") + set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") + endif(PYBIND11_FOUND) + else(OT_FOUND) + message("++ Python documentation module will not be compiled because POT was not found") + set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") + endif(OT_FOUND) + else(SKLEARN_FOUND) + message("++ Python documentation module will not be compiled because scikit-learn was not found") + set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") + endif(SKLEARN_FOUND) + else(SCIPY_FOUND) + message("++ Python documentation module will not be compiled because scipy was not found") + set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") + endif(SCIPY_FOUND) + else(NUMPY_FOUND) + message("++ Python documentation module will not be compiled because numpy was not found") + set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") + endif(NUMPY_FOUND) + else(MATPLOTLIB_FOUND) + message("++ Python documentation module will not be compiled because matplotlib was not found") + set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") + endif(MATPLOTLIB_FOUND) + else(SPHINX_PATH) + message("++ Python documentation module will not be compiled because sphinx and sphinxcontrib-bibtex were not found") + set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") + endif(SPHINX_PATH) + + # Test examples if (NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) # Bottleneck and Alpha @@ -399,6 +467,7 @@ if(PYTHONINTERP_FOUND) # Wasserstein if(OT_FOUND AND PYBIND11_FOUND) add_gudhi_py_test(test_wasserstein_distance) + add_gudhi_py_test(test_wasserstein_barycenter) endif() # Representations @@ -409,69 +478,11 @@ if(PYTHONINTERP_FOUND) # Time Delay add_gudhi_py_test(test_time_delay) - # Documentation generation is available through sphinx - requires all modules - if(SPHINX_PATH) - if(MATPLOTLIB_FOUND) - if(NUMPY_FOUND) - if(SCIPY_FOUND) - if(SKLEARN_FOUND) - if(OT_FOUND) - if(PYBIND11_FOUND) - if(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) - set (GUDHI_SPHINX_MESSAGE "Generating API documentation with Sphinx in ${CMAKE_CURRENT_BINARY_DIR}/sphinx/") - # User warning - Sphinx is a static pages generator, and configured to work fine with user_version - # Images and biblio warnings because not found on developper version - if (GUDHI_PYTHON_PATH STREQUAL "src/python") - set (GUDHI_SPHINX_MESSAGE "${GUDHI_SPHINX_MESSAGE} \n WARNING : Sphinx is configured for user version, you run it on developper version. Images and biblio will miss") - endif() - # sphinx target requires gudhi.so, because conf.py reads gudhi version from it - add_custom_target(sphinx - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/doc - COMMAND ${CMAKE_COMMAND} -E env "PYTHONPATH=${CMAKE_CURRENT_BINARY_DIR}" - ${SPHINX_PATH} -b html ${CMAKE_CURRENT_SOURCE_DIR}/doc ${CMAKE_CURRENT_BINARY_DIR}/sphinx - DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/gudhi.so" - COMMENT "${GUDHI_SPHINX_MESSAGE}" VERBATIM) - - add_test(NAME sphinx_py_test - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - COMMAND ${CMAKE_COMMAND} -E env "PYTHONPATH=${CMAKE_CURRENT_BINARY_DIR}" - ${SPHINX_PATH} -b doctest ${CMAKE_CURRENT_SOURCE_DIR}/doc ${CMAKE_CURRENT_BINARY_DIR}/doctest) - - # Set missing or not modules - set(GUDHI_MODULES ${GUDHI_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MODULES") - else(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) - message("++ Python documentation module will not be compiled because it requires a Eigen3 and CGAL version >= 4.11.0") - set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") - endif(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) - else(PYBIND11_FOUND) - message("++ Python documentation module will not be compiled because pybind11 was not found") - set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") - endif(PYBIND11_FOUND) - else(OT_FOUND) - message("++ Python documentation module will not be compiled because POT was not found") - set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") - endif(OT_FOUND) - else(SKLEARN_FOUND) - message("++ Python documentation module will not be compiled because scikit-learn was not found") - set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") - endif(SKLEARN_FOUND) - else(SCIPY_FOUND) - message("++ Python documentation module will not be compiled because scipy was not found") - set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") - endif(SCIPY_FOUND) - else(NUMPY_FOUND) - message("++ Python documentation module will not be compiled because numpy was not found") - set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") - endif(NUMPY_FOUND) - else(MATPLOTLIB_FOUND) - message("++ Python documentation module will not be compiled because matplotlib was not found") - set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") - endif(MATPLOTLIB_FOUND) - else(SPHINX_PATH) - message("++ Python documentation module will not be compiled because sphinx and sphinxcontrib-bibtex were not found") - set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") - endif(SPHINX_PATH) - + # DTM + if(SCIPY_FOUND AND SKLEARN_FOUND AND TORCH_FOUND AND HNSWLIB_FOUND AND PYKEOPS_FOUND AND EAGERPY_FOUND) + add_gudhi_py_test(test_knn) + add_gudhi_py_test(test_dtm) + endif() # Set missing or not modules set(GUDHI_MODULES ${GUDHI_MODULES} "python" CACHE INTERNAL "GUDHI_MODULES") diff --git a/src/python/doc/alpha_complex_sum.inc b/src/python/doc/alpha_complex_sum.inc index b5af0d27..9e6414d0 100644 --- a/src/python/doc/alpha_complex_sum.inc +++ b/src/python/doc/alpha_complex_sum.inc @@ -1,12 +1,12 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +----------------------------------------------------------------+------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------+ | .. figure:: | Alpha complex is a simplicial complex constructed from the finite | :Author: Vincent Rouvreau | | ../../doc/Alpha_complex/alpha_complex_representation.png | cells of a Delaunay Triangulation. | | - | :alt: Alpha complex representation | | :Introduced in: GUDHI 2.0.0 | + | :alt: Alpha complex representation | | :Since: GUDHI 2.0.0 | | :figclass: align-center | The filtration value of each simplex is computed as the **square** of | | - | | the circumradius of the simplex if the circumsphere is empty (the | :Copyright: MIT (`GPL v3 </licensing/>`_) | + | | the circumradius of the simplex if the circumsphere is empty (the | :License: MIT (`GPL v3 </licensing/>`_) | | | simplex is then said to be Gabriel), and as the minimum of the | | | | filtration values of the codimension 1 cofaces that make it not | :Requires: `Eigen <installation.html#eigen>`__ :math:`\geq` 3.1.0 and `CGAL <installation.html#cgal>`__ :math:`\geq` 4.11.0 | | | Gabriel otherwise. | | diff --git a/src/python/doc/alpha_complex_user.rst b/src/python/doc/alpha_complex_user.rst index 60319e84..a3b35c10 100644 --- a/src/python/doc/alpha_complex_user.rst +++ b/src/python/doc/alpha_complex_user.rst @@ -10,7 +10,7 @@ Definition .. include:: alpha_complex_sum.inc `AlphaComplex` is constructing a :doc:`SimplexTree <simplex_tree_ref>` using -`Delaunay Triangulation <http://doc.cgal.org/latest/Triangulation/index.html#Chapter_Triangulations>`_ +`Delaunay Triangulation <http://doc.cgal.org/latest/Triangulation/index.html#Chapter_Triangulations>`_ :cite:`cgal:hdj-t-19b` from `CGAL <http://www.cgal.org/>`_ (the Computational Geometry Algorithms Library :cite:`cgal:eb-19b`). @@ -203,9 +203,3 @@ the program output is: [4, 5, 6] -> 22.74 [3, 6] -> 30.25 -CGAL citations -============== - -.. bibliography:: ../../biblio/how_to_cite_cgal.bib - :filter: docnames - :style: unsrt diff --git a/src/python/doc/bottleneck_distance_sum.inc b/src/python/doc/bottleneck_distance_sum.inc index 6eb0ac19..0de4625c 100644 --- a/src/python/doc/bottleneck_distance_sum.inc +++ b/src/python/doc/bottleneck_distance_sum.inc @@ -1,12 +1,12 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +-----------------------------------------------------------------+----------------------------------------------------------------------+------------------------------------------------------------------+ | .. figure:: | Bottleneck distance measures the similarity between two persistence | :Author: François Godi | | ../../doc/Bottleneck_distance/perturb_pd.png | diagrams. It's the shortest distance b for which there exists a | | - | :figclass: align-center | perfect matching between the points of the two diagrams (+ all the | :Introduced in: GUDHI 2.0.0 | + | :figclass: align-center | perfect matching between the points of the two diagrams (+ all the | :Since: GUDHI 2.0.0 | | | diagonal points) such that any couple of matched points are at | | - | Bottleneck distance is the length of | distance at most b, where the distance between points is the sup | :Copyright: MIT (`GPL v3 </licensing/>`_) | + | Bottleneck distance is the length of | distance at most b, where the distance between points is the sup | :License: MIT (`GPL v3 </licensing/>`_) | | the longest edge | norm in :math:`\mathbb{R}^2`. | | | | | :Requires: `CGAL <installation.html#cgal>`__ :math:`\geq` 4.11.0 | +-----------------------------------------------------------------+----------------------------------------------------------------------+------------------------------------------------------------------+ diff --git a/src/python/doc/bottleneck_distance_user.rst b/src/python/doc/bottleneck_distance_user.rst index 9435c7f1..89da89d3 100644 --- a/src/python/doc/bottleneck_distance_user.rst +++ b/src/python/doc/bottleneck_distance_user.rst @@ -65,3 +65,4 @@ The output is: Bottleneck distance approximation = 0.81 Bottleneck distance value = 0.75 + diff --git a/src/python/doc/cubical_complex_sum.inc b/src/python/doc/cubical_complex_sum.inc index f200e695..28bf8e94 100644 --- a/src/python/doc/cubical_complex_sum.inc +++ b/src/python/doc/cubical_complex_sum.inc @@ -1,12 +1,12 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +--------------------------------------------------------------------------+----------------------------------------------------------------------+-----------------------------+ | .. figure:: | The cubical complex is an example of a structured complex useful in | :Author: Pawel Dlotko | | ../../doc/Bitmap_cubical_complex/Cubical_complex_representation.png | computational mathematics (specially rigorous numerics) and image | | - | :alt: Cubical complex representation | analysis. | :Introduced in: GUDHI 2.0.0 | + | :alt: Cubical complex representation | analysis. | :Since: GUDHI 2.0.0 | | :figclass: align-center | | | - | | | :Copyright: MIT | + | | | :License: MIT | | | | | +--------------------------------------------------------------------------+----------------------------------------------------------------------+-----------------------------+ | * :doc:`cubical_complex_user` | * :doc:`cubical_complex_ref` | diff --git a/src/python/doc/cubical_complex_user.rst b/src/python/doc/cubical_complex_user.rst index 56cf0170..e4733653 100644 --- a/src/python/doc/cubical_complex_user.rst +++ b/src/python/doc/cubical_complex_user.rst @@ -8,7 +8,7 @@ Definition ---------- ===================================== ===================================== ===================================== -:Author: Pawel Dlotko :Introduced in: GUDHI PYTHON 2.0.0 :Copyright: GPL v3 +:Author: Pawel Dlotko :Since: GUDHI PYTHON 2.0.0 :License: GPL v3 ===================================== ===================================== ===================================== +---------------------------------------------+----------------------------------------------------------------------+ @@ -158,10 +158,3 @@ Examples. --------- End user programs are available in python/example/ folder. - -Bibliography -============ - -.. bibliography:: ../../biblio/bibliography.bib - :filter: docnames - :style: unsrt diff --git a/src/python/doc/img/barycenter.png b/src/python/doc/img/barycenter.png Binary files differnew file mode 100644 index 00000000..cad6af70 --- /dev/null +++ b/src/python/doc/img/barycenter.png diff --git a/src/python/doc/index.rst b/src/python/doc/index.rst index 3387a64f..13e51047 100644 --- a/src/python/doc/index.rst +++ b/src/python/doc/index.rst @@ -71,6 +71,7 @@ Wasserstein distance .. include:: wasserstein_distance_sum.inc + Persistence representations =========================== @@ -85,10 +86,3 @@ Point cloud utilities ********************* .. include:: point_cloud_sum.inc - -Bibliography -************ - -.. bibliography:: ../../biblio/bibliography.bib - :filter: docnames - :style: unsrt diff --git a/src/python/doc/installation.rst b/src/python/doc/installation.rst index d459145b..09a843d5 100644 --- a/src/python/doc/installation.rst +++ b/src/python/doc/installation.rst @@ -175,8 +175,8 @@ Documentation To build the documentation, `sphinx-doc <http://www.sphinx-doc.org>`_ and `sphinxcontrib-bibtex <https://sphinxcontrib-bibtex.readthedocs.io>`_ are required. As the documentation is auto-tested, `CGAL`_, `Eigen`_, -`Matplotlib`_, `NumPy`_ and `SciPy`_ are also mandatory to build the -documentation. +`Matplotlib`_, `NumPy`_, `POT`_, `Scikit-learn`_ and `SciPy`_ are +also mandatory to build the documentation. Run the following commands in a terminal: @@ -192,8 +192,8 @@ CGAL ==== Some GUDHI modules (cf. :doc:`modules list </index>`), and few examples -require CGAL, a C++ library that provides easy access to efficient and -reliable geometric algorithms. +require `CGAL <https://www.cgal.org/>`_, a C++ library that provides easy +access to efficient and reliable geometric algorithms. The procedure to install this library @@ -211,6 +211,14 @@ The following examples requires CGAL version ≥ 4.11.0: * :download:`euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py>` * :download:`euclidean_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_witness_complex_diagram_persistence_from_off_file_example.py>` +EagerPy +======= + +Some Python functions can handle automatic differentiation (possibly only when +a flag `enable_autodiff=True` is used). In order to reduce code duplication, we +use `EagerPy <https://eagerpy.jonasrauber.de/>`_ which wraps arrays from +PyTorch, TensorFlow and JAX in a common interface. + Eigen ===== @@ -229,6 +237,13 @@ The following examples require `Eigen <http://eigen.tuxfamily.org/>`_ version ≠* :download:`euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py>` * :download:`euclidean_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_witness_complex_diagram_persistence_from_off_file_example.py>` +Hnswlib +======= + +:class:`~gudhi.point_cloud.knn.KNearestNeighbors` can use the Python package +`Hnswlib <https://github.com/nmslib/hnswlib>`_ as a backend if explicitly +requested, to speed-up queries. + Matplotlib ========== @@ -251,6 +266,13 @@ The following examples require the `Matplotlib <http://matplotlib.org>`_: * :download:`euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py>` * :download:`euclidean_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_witness_complex_diagram_persistence_from_off_file_example.py>` +PyKeOps +======= + +:class:`~gudhi.point_cloud.knn.KNearestNeighbors` can use the Python package +`PyKeOps <https://www.kernel-operations.io/keops/python/>`_ as a backend if +explicitly requested, to speed-up queries using a GPU. + Python Optimal Transport ======================== @@ -258,6 +280,12 @@ The :doc:`Wasserstein distance </wasserstein_distance_user>` module requires `POT <https://pot.readthedocs.io/>`_, a library that provides several solvers for optimization problems related to Optimal Transport. +PyTorch +======= + +`PyTorch <https://pytorch.org/>`_ is currently only used as a dependency of +`PyKeOps`_, and in some tests. + Scikit-learn ============ diff --git a/src/python/doc/nerve_gic_complex_sum.inc b/src/python/doc/nerve_gic_complex_sum.inc index d633c4ff..7fe55aff 100644 --- a/src/python/doc/nerve_gic_complex_sum.inc +++ b/src/python/doc/nerve_gic_complex_sum.inc @@ -1,12 +1,12 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +----------------------------------------------------------------+------------------------------------------------------------------------+------------------------------------------------------------------+ | .. figure:: | Nerves and Graph Induced Complexes are cover complexes, i.e. | :Author: Mathieu Carrière | | ../../doc/Nerve_GIC/gicvisu.jpg | simplicial complexes that provably contain topological information | | - | :alt: Graph Induced Complex of a point cloud. | about the input data. They can be computed with a cover of the data, | :Introduced in: GUDHI 2.3.0 | + | :alt: Graph Induced Complex of a point cloud. | about the input data. They can be computed with a cover of the data, | :Since: GUDHI 2.3.0 | | :figclass: align-center | that comes i.e. from the preimage of a family of intervals covering | | - | | the image of a scalar-valued function defined on the data. | :Copyright: MIT (`GPL v3 </licensing/>`_) | + | | the image of a scalar-valued function defined on the data. | :License: MIT (`GPL v3 </licensing/>`_) | | | | | | | | :Requires: `CGAL <installation.html#cgal>`__ :math:`\geq` 4.11.0 | | | | | diff --git a/src/python/doc/persistence_graphical_tools_sum.inc b/src/python/doc/persistence_graphical_tools_sum.inc index ef376802..b68d3d7e 100644 --- a/src/python/doc/persistence_graphical_tools_sum.inc +++ b/src/python/doc/persistence_graphical_tools_sum.inc @@ -1,12 +1,12 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +-----------------------------------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------+ | .. figure:: | These graphical tools comes on top of persistence results and allows | :Author: Vincent Rouvreau, Theo Lacombe | | img/graphical_tools_representation.png | the user to display easily persistence barcode, diagram or density. | | - | | | :Introduced in: GUDHI 2.0.0 | + | | | :Since: GUDHI 2.0.0 | | | Note that these functions return the matplotlib axis, allowing | | - | | for further modifications (title, aspect, etc.) | :Copyright: MIT | + | | for further modifications (title, aspect, etc.) | :License: MIT | | | | | | | | :Requires: matplotlib, numpy and scipy | +-----------------------------------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------+ diff --git a/src/python/doc/persistent_cohomology_sum.inc b/src/python/doc/persistent_cohomology_sum.inc index 4d7b077e..0effb50f 100644 --- a/src/python/doc/persistent_cohomology_sum.inc +++ b/src/python/doc/persistent_cohomology_sum.inc @@ -1,12 +1,12 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +-----------------------------------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------+ | .. figure:: | The theory of homology consists in attaching to a topological space | :Author: Clément Maria | | ../../doc/Persistent_cohomology/3DTorus_poch.png | a sequence of (homology) groups, capturing global topological | | - | :figclass: align-center | features like connected components, holes, cavities, etc. Persistent | :Introduced in: GUDHI 2.0.0 | + | :figclass: align-center | features like connected components, holes, cavities, etc. Persistent | :Since: GUDHI 2.0.0 | | | homology studies the evolution -- birth, life and death -- of these | | - | Rips Persistent Cohomology on a 3D | features when the topological space is changing. Consequently, the | :Copyright: MIT | + | Rips Persistent Cohomology on a 3D | features when the topological space is changing. Consequently, the | :License: MIT | | Torus | theory is essentially composed of three elements: topological spaces, | | | | their homology groups and an evolution scheme. | | | | | | diff --git a/src/python/doc/persistent_cohomology_user.rst b/src/python/doc/persistent_cohomology_user.rst index de83cda1..4d743aac 100644 --- a/src/python/doc/persistent_cohomology_user.rst +++ b/src/python/doc/persistent_cohomology_user.rst @@ -7,7 +7,7 @@ Persistent cohomology user manual Definition ---------- ===================================== ===================================== ===================================== -:Author: Clément Maria :Introduced in: GUDHI PYTHON 2.0.0 :Copyright: GPL v3 +:Author: Clément Maria :Since: GUDHI PYTHON 2.0.0 :License: GPL v3 ===================================== ===================================== ===================================== +-----------------------------------------------------------------+-----------------------------------------------------------------------+ @@ -111,10 +111,3 @@ We provide several example files: run these examples with -h for details on thei * :download:`rips_complex_diagram_persistence_from_distance_matrix_file_example.py <../example/rips_complex_diagram_persistence_from_distance_matrix_file_example.py>` * :download:`random_cubical_complex_persistence_example.py <../example/random_cubical_complex_persistence_example.py>` * :download:`tangential_complex_plain_homology_from_off_file_example.py <../example/tangential_complex_plain_homology_from_off_file_example.py>` - -Bibliography -============ - -.. bibliography:: ../../biblio/bibliography.bib - :filter: docnames - :style: unsrt diff --git a/src/python/doc/point_cloud.rst b/src/python/doc/point_cloud.rst index c0d4b303..192f70db 100644 --- a/src/python/doc/point_cloud.rst +++ b/src/python/doc/point_cloud.rst @@ -21,10 +21,25 @@ Subsampling :special-members: :show-inheritance: -TimeDelayEmbedding ------------------- +Time Delay Embedding +-------------------- .. autoclass:: gudhi.point_cloud.timedelay.TimeDelayEmbedding :members: :special-members: __call__ +K nearest neighbors +------------------- + +.. automodule:: gudhi.point_cloud.knn + :members: + :undoc-members: + :special-members: __init__ + +Distance to measure +------------------- + +.. automodule:: gudhi.point_cloud.dtm + :members: + :undoc-members: + :special-members: __init__ diff --git a/src/python/doc/point_cloud_sum.inc b/src/python/doc/point_cloud_sum.inc index 85d52de7..d4761aba 100644 --- a/src/python/doc/point_cloud_sum.inc +++ b/src/python/doc/point_cloud_sum.inc @@ -1,12 +1,12 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +----------------------------------------------------------------+------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------+ - | | :math:`(x_1, x_2, \ldots, x_d)` | Utilities to process point clouds: read from file, subsample, etc. | :Author: Vincent Rouvreau | - | | :math:`(y_1, y_2, \ldots, y_d)` | | | - | | | :Introduced in: GUDHI 2.0.0 | + | | :math:`(x_1, x_2, \ldots, x_d)` | Utilities to process point clouds: read from file, subsample, | :Authors: Vincent Rouvreau, Marc Glisse, Masatoshi Takenouchi | + | | :math:`(y_1, y_2, \ldots, y_d)` | find neighbors, embed time series in higher dimension, etc. | | + | | | :Since: GUDHI 2.0.0 | | | | | - | | | :Copyright: MIT (`GPL v3 </licensing/>`_) | + | | | :License: MIT (`GPL v3 </licensing/>`_, BSD-3-Clause, Apache-2.0) | | | Parts of this package require CGAL. | | | | | :Requires: `Eigen <installation.html#eigen>`__ :math:`\geq` 3.1.0 and `CGAL <installation.html#cgal>`__ :math:`\geq` 4.11.0 | | | | | diff --git a/src/python/doc/representations_sum.inc b/src/python/doc/representations_sum.inc index 700828f1..eac89b9d 100644 --- a/src/python/doc/representations_sum.inc +++ b/src/python/doc/representations_sum.inc @@ -1,12 +1,12 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +------------------------------------------------------------------+----------------------------------------------------------------+-----------------------------------------------+ | .. figure:: | Vectorizations, distances and kernels that work on persistence | :Author: Mathieu Carrière | | img/sklearn-tda.png | diagrams, compatible with scikit-learn. | | - | | | :Introduced in: GUDHI 3.1.0 | + | | | :Since: GUDHI 3.1.0 | | | | | - | | | :Copyright: MIT | + | | | :License: MIT | | | | | | | | :Requires: scikit-learn | +------------------------------------------------------------------+----------------------------------------------------------------+-----------------------------------------------+ diff --git a/src/python/doc/rips_complex_sum.inc b/src/python/doc/rips_complex_sum.inc index 857c6893..6feb74cd 100644 --- a/src/python/doc/rips_complex_sum.inc +++ b/src/python/doc/rips_complex_sum.inc @@ -1,12 +1,12 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +----------------------------------------------------------------+------------------------------------------------------------------------+----------------------------------------------------------------------+ | .. figure:: | Rips complex is a simplicial complex constructed from a one skeleton | :Authors: Clément Maria, Pawel Dlotko, Vincent Rouvreau, Marc Glisse | | ../../doc/Rips_complex/rips_complex_representation.png | graph. | | - | :figclass: align-center | | :Introduced in: GUDHI 2.0.0 | + | :figclass: align-center | | :Since: GUDHI 2.0.0 | | | The filtration value of each edge is computed from a user-given | | - | | distance function and is inserted until a user-given threshold | :Copyright: MIT | + | | distance function and is inserted until a user-given threshold | :License: MIT | | | value. | | | | | | | | This complex can be built from a point cloud and a distance function, | | diff --git a/src/python/doc/rips_complex_user.rst b/src/python/doc/rips_complex_user.rst index a27573e8..8efb12e6 100644 --- a/src/python/doc/rips_complex_user.rst +++ b/src/python/doc/rips_complex_user.rst @@ -8,7 +8,7 @@ Definition ---------- ==================================================================== ================================ ====================== -:Authors: Clément Maria, Pawel Dlotko, Vincent Rouvreau, Marc Glisse :Introduced in: GUDHI 2.0.0 :Copyright: GPL v3 +:Authors: Clément Maria, Pawel Dlotko, Vincent Rouvreau, Marc Glisse :Since: GUDHI 2.0.0 :License: GPL v3 ==================================================================== ================================ ====================== +-------------------------------------------+----------------------------------------------------------------------+ diff --git a/src/python/doc/simplex_tree_ref.rst b/src/python/doc/simplex_tree_ref.rst index 9eb8c199..46b2c1e5 100644 --- a/src/python/doc/simplex_tree_ref.rst +++ b/src/python/doc/simplex_tree_ref.rst @@ -8,7 +8,6 @@ Simplex tree reference manual .. autoclass:: gudhi.SimplexTree :members: - :undoc-members: :show-inheritance: .. automethod:: gudhi.SimplexTree.__init__ diff --git a/src/python/doc/simplex_tree_sum.inc b/src/python/doc/simplex_tree_sum.inc index 5ba58d2b..a8858f16 100644 --- a/src/python/doc/simplex_tree_sum.inc +++ b/src/python/doc/simplex_tree_sum.inc @@ -1,12 +1,12 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +----------------------------------------------------------------+------------------------------------------------------------------------+-----------------------------+ | .. figure:: | The simplex tree is an efficient and flexible data structure for | :Author: Clément Maria | | ../../doc/Simplex_tree/Simplex_tree_representation.png | representing general (filtered) simplicial complexes. | | - | :alt: Simplex tree representation | | :Introduced in: GUDHI 2.0.0 | + | :alt: Simplex tree representation | | :Since: GUDHI 2.0.0 | | :figclass: align-center | The data structure is described in | | - | | :cite:`boissonnatmariasimplextreealgorithmica` | :Copyright: MIT | + | | :cite:`boissonnatmariasimplextreealgorithmica` | :License: MIT | | | | | +----------------------------------------------------------------+------------------------------------------------------------------------+-----------------------------+ | * :doc:`simplex_tree_user` | * :doc:`simplex_tree_ref` | diff --git a/src/python/doc/tangential_complex_sum.inc b/src/python/doc/tangential_complex_sum.inc index d84aa433..45ce2a66 100644 --- a/src/python/doc/tangential_complex_sum.inc +++ b/src/python/doc/tangential_complex_sum.inc @@ -1,12 +1,12 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +----------------------------------------------------------------+------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------+ | .. figure:: | A Tangential Delaunay complex is a simplicial complex designed to | :Author: Clément Jamin | | ../../doc/Tangential_complex/tc_examples.png | reconstruct a :math:`k`-dimensional manifold embedded in :math:`d`- | | - | :figclass: align-center | dimensional Euclidean space. The input is a point sample coming from | :Introduced in: GUDHI 2.0.0 | + | :figclass: align-center | dimensional Euclidean space. The input is a point sample coming from | :Since: GUDHI 2.0.0 | | | an unknown manifold. The running time depends only linearly on the | | - | | extrinsic dimension :math:`d` and exponentially on the intrinsic | :Copyright: MIT (`GPL v3 </licensing/>`_) | + | | extrinsic dimension :math:`d` and exponentially on the intrinsic | :License: MIT (`GPL v3 </licensing/>`_) | | | dimension :math:`k`. | | | | | :Requires: `Eigen <installation.html#eigen>`__ :math:`\geq` 3.1.0 and `CGAL <installation.html#cgal>`__ :math:`\geq` 4.11.0 | +----------------------------------------------------------------+------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------+ diff --git a/src/python/doc/tangential_complex_user.rst b/src/python/doc/tangential_complex_user.rst index 852cf5b6..3d45473b 100644 --- a/src/python/doc/tangential_complex_user.rst +++ b/src/python/doc/tangential_complex_user.rst @@ -194,11 +194,3 @@ The output is: Tangential contains 4 vertices. Inconsistencies has been fixed. - - -Bibliography -============ - -.. bibliography:: ../../biblio/bibliography.bib - :filter: docnames - :style: unsrt diff --git a/src/python/doc/wasserstein_distance_sum.inc b/src/python/doc/wasserstein_distance_sum.inc index a97f428d..f9308e5e 100644 --- a/src/python/doc/wasserstein_distance_sum.inc +++ b/src/python/doc/wasserstein_distance_sum.inc @@ -1,13 +1,13 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +-----------------------------------------------------------------+----------------------------------------------------------------------+------------------------------------------------------------------+ | .. figure:: | The q-Wasserstein distance measures the similarity between two | :Author: Theo Lacombe | - | ../../doc/Bottleneck_distance/perturb_pd.png | persistence diagrams. It's the minimum value c that can be achieved | | - | :figclass: align-center | by a perfect matching between the points of the two diagrams (+ all | :Introduced in: GUDHI 3.1.0 | - | | diagonal points), where the value of a matching is defined as the | | - | Wasserstein distance is the q-th root of the sum of the | q-th root of the sum of all edge lengths to the power q. Edge lengths| :Copyright: MIT | - | edge lengths to the power q. | are measured in norm p, for :math:`1 \leq p \leq \infty`. | | + | ../../doc/Bottleneck_distance/perturb_pd.png | persistence diagrams using the sum of all edges lengths (instead of | | + | :figclass: align-center | the maximum). It allows to define sophisticated objects such as | :Since: GUDHI 3.1.0 | + | | barycenters of a family of persistence diagrams. | | + | | | :License: MIT | + | | | | | | | :Requires: Python Optimal Transport (POT) :math:`\geq` 0.5.1 | +-----------------------------------------------------------------+----------------------------------------------------------------------+------------------------------------------------------------------+ | * :doc:`wasserstein_distance_user` | | diff --git a/src/python/doc/wasserstein_distance_user.rst b/src/python/doc/wasserstein_distance_user.rst index a9b21fa5..c443bab5 100644 --- a/src/python/doc/wasserstein_distance_user.rst +++ b/src/python/doc/wasserstein_distance_user.rst @@ -9,10 +9,16 @@ Definition .. include:: wasserstein_distance_sum.inc -Functions ---------- -This implementation uses the Python Optimal Transport library and is based on -ideas from "Large Scale Computation of Means and Cluster for Persistence +The q-Wasserstein distance is defined as the minimal value achieved +by a perfect matching between the points of the two diagrams (+ all +diagonal points), where the value of a matching is defined as the +q-th root of the sum of all edge lengths to the power q. Edge lengths +are measured in norm p, for :math:`1 \leq p \leq \infty`. + +Distance Functions +------------------ +This first implementation uses the Python Optimal Transport library and is based +on ideas from "Large Scale Computation of Means and Cluster for Persistence Diagrams via Optimal Transport" :cite:`10.5555/3327546.3327645`. .. autofunction:: gudhi.wasserstein.wasserstein_distance @@ -26,7 +32,7 @@ Morozov, and Arnur Nigmetov. .. autofunction:: gudhi.hera.wasserstein_distance Basic example -------------- +************* This example computes the 1-Wasserstein distance from 2 persistence diagrams with Euclidean ground metric. Note that persistence diagrams must be submitted as (n x 2) numpy arrays and must not contain inf values. @@ -48,9 +54,9 @@ The output is: Wasserstein distance value = 1.45 -We can also have access to the optimal matching by letting `matching=True`. +We can also have access to the optimal matching by letting `matching=True`. It is encoded as a list of indices (i,j), meaning that the i-th point in X -is mapped to the j-th point in Y. +is mapped to the j-th point in Y. An index of -1 represents the diagonal. .. testcode:: @@ -78,9 +84,83 @@ An index of -1 represents the diagonal. The output is: .. testoutput:: - + Wasserstein distance value = 2.15 point 0 in dgm1 is matched to point 0 in dgm2 point 1 in dgm1 is matched to point 2 in dgm2 point 2 in dgm1 is matched to the diagonal point 1 in dgm2 is matched to the diagonal + +Barycenters +----------- + +A Frechet mean (or barycenter) is a generalization of the arithmetic +mean in a non linear space such as the one of persistence diagrams. +Given a set of persistence diagrams :math:`\mu_1 \dots \mu_n`, it is +defined as a minimizer of the variance functional, that is of +:math:`\mu \mapsto \sum_{i=1}^n d_2(\mu,\mu_i)^2`. +where :math:`d_2` denotes the Wasserstein-2 distance between +persistence diagrams. +It is known to exist and is generically unique. However, an exact +computation is in general untractable. Current implementation +available is based on (Turner et al., 2014), +:cite:`turner2014frechet` +and uses an EM-scheme to +provide a local minimum of the variance functional (somewhat similar +to the Lloyd algorithm to estimate a solution to the k-means +problem). The local minimum returned depends on the initialization of +the barycenter. +The combinatorial structure of the algorithm limits its +performances on large scale problems (thousands of diagrams and of points +per diagram). + +.. figure:: + ./img/barycenter.png + :figclass: align-center + + Illustration of Frechet mean between persistence + diagrams. + + +.. autofunction:: gudhi.wasserstein.barycenter.lagrangian_barycenter + +Basic example +************* + +This example estimates the Frechet mean (aka Wasserstein barycenter) between +four persistence diagrams. +It is initialized on the 4th diagram. +As the algorithm is not convex, its output depends on the initialization and +is only a local minimum of the objective function. +Initialization can be either given as an integer (in which case the i-th +diagram of the list is used as initial estimate) or as a diagram. +If None, it will randomly select one of the diagrams of the list +as initial estimate. +Note that persistence diagrams must be submitted as +(n x 2) numpy arrays and must not contain inf values. + + +.. testcode:: + + from gudhi.wasserstein.barycenter import lagrangian_barycenter + import numpy as np + + dg1 = np.array([[0.2, 0.5]]) + dg2 = np.array([[0.2, 0.7]]) + dg3 = np.array([[0.3, 0.6], [0.7, 0.8], [0.2, 0.3]]) + dg4 = np.array([]) + pdiagset = [dg1, dg2, dg3, dg4] + bary = lagrangian_barycenter(pdiagset=pdiagset,init=3) + + message = "Wasserstein barycenter estimated:" + print(message) + print(bary) + +The output is: + +.. testoutput:: + + Wasserstein barycenter estimated: + [[0.27916667 0.55416667] + [0.7375 0.7625 ] + [0.2375 0.2625 ]] diff --git a/src/python/doc/witness_complex_sum.inc b/src/python/doc/witness_complex_sum.inc index 71b65a71..34d4df4a 100644 --- a/src/python/doc/witness_complex_sum.inc +++ b/src/python/doc/witness_complex_sum.inc @@ -1,12 +1,12 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +-------------------------------------------------------------------+----------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ | .. figure:: | Witness complex :math:`Wit(W,L)` is a simplicial complex defined on | :Author: Siargey Kachanovich | | ../../doc/Witness_complex/Witness_complex_representation.png | two sets of points in :math:`\mathbb{R}^D`. | | - | :alt: Witness complex representation | | :Introduced in: GUDHI 2.0.0 | + | :alt: Witness complex representation | | :Since: GUDHI 2.0.0 | | :figclass: align-center | The data structure is described in | | - | | :cite:`boissonnatmariasimplextreealgorithmica`. | :Copyright: MIT (`GPL v3 </licensing/>`_ for Euclidean versions only) | + | | :cite:`boissonnatmariasimplextreealgorithmica`. | :License: MIT (`GPL v3 </licensing/>`_ for Euclidean versions only) | | | | | | | | :Requires: `Eigen <installation.html#eigen>`__ :math:`\geq` 3.1.0 and `CGAL <installation.html#cgal>`__ :math:`\geq` 4.11.0 for Euclidean versions only | +-------------------------------------------------------------------+----------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/src/python/doc/witness_complex_user.rst b/src/python/doc/witness_complex_user.rst index 7087fa98..08dcd288 100644 --- a/src/python/doc/witness_complex_user.rst +++ b/src/python/doc/witness_complex_user.rst @@ -126,10 +126,3 @@ Example2: Computing persistence using strong relaxed witness complex Here is an example of constructing a strong witness complex filtration and computing persistence on it: * :download:`euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py>` - -Bibliography -============ - -.. bibliography:: ../../biblio/bibliography.bib - :filter: docnames - :style: unsrt diff --git a/src/python/doc/zbibliography.rst b/src/python/doc/zbibliography.rst new file mode 100644 index 00000000..e23fcf25 --- /dev/null +++ b/src/python/doc/zbibliography.rst @@ -0,0 +1,10 @@ +:orphan: + +.. To get rid of WARNING: document isn't included in any toctree + +Bibliography +------------ + +.. bibliography:: ../../biblio/bibliography.bib + :style: plain + diff --git a/src/python/example/alpha_complex_diagram_persistence_from_off_file_example.py b/src/python/example/alpha_complex_diagram_persistence_from_off_file_example.py index 4079a469..727af4fa 100755 --- a/src/python/example/alpha_complex_diagram_persistence_from_off_file_example.py +++ b/src/python/example/alpha_complex_diagram_persistence_from_off_file_example.py @@ -1,11 +1,15 @@ #!/usr/bin/env python import argparse +import errno +import os import matplotlib.pyplot as plot import gudhi -""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. - See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. +""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - + which is released under MIT. + See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full + license details. Author(s): Vincent Rouvreau Copyright (C) 2016 Inria @@ -41,7 +45,7 @@ args = parser.parse_args() with open(args.file, "r") as f: first_line = f.readline() if (first_line == "OFF\n") or (first_line == "nOFF\n"): - print("#####################################################################") + print("##############################################################") print("AlphaComplex creation from points read in a OFF file") message = "AlphaComplex with max_edge_length=" + repr(args.max_alpha_square) @@ -64,6 +68,7 @@ with open(args.file, "r") as f: gudhi.plot_persistence_diagram(diag, band=args.band) plot.show() else: - print(args.file, "is not a valid OFF file") + raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), + args.file) f.close() diff --git a/src/python/example/alpha_complex_from_points_example.py b/src/python/example/alpha_complex_from_points_example.py index 73faf17c..465632eb 100755 --- a/src/python/example/alpha_complex_from_points_example.py +++ b/src/python/example/alpha_complex_from_points_example.py @@ -46,9 +46,6 @@ if simplex_tree.find([4]): else: print("[4] Not found...") -# Some insertions, simplex_tree needs to initialize filtrations -simplex_tree.initialize_filtration() - print("dimension=", simplex_tree.dimension()) print("filtrations=") for simplex_with_filtration in simplex_tree.get_filtration(): diff --git a/src/python/example/alpha_rips_persistence_bottleneck_distance.py b/src/python/example/alpha_rips_persistence_bottleneck_distance.py index d5c33ec8..3e12b0d5 100755 --- a/src/python/example/alpha_rips_persistence_bottleneck_distance.py +++ b/src/python/example/alpha_rips_persistence_bottleneck_distance.py @@ -3,9 +3,14 @@ import gudhi import argparse import math - -""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. - See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. +import errno +import os +import numpy as np + +""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - + which is released under MIT. + See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full + license details. Author(s): Vincent Rouvreau Copyright (C) 2016 Inria @@ -36,7 +41,7 @@ with open(args.file, "r") as f: first_line = f.readline() if (first_line == "OFF\n") or (first_line == "nOFF\n"): point_cloud = gudhi.read_points_from_off_file(off_file=args.file) - print("#####################################################################") + print("##############################################################") print("RipsComplex creation from points read in a OFF file") message = "RipsComplex with max_edge_length=" + repr(args.threshold) @@ -46,14 +51,15 @@ with open(args.file, "r") as f: points=point_cloud, max_edge_length=args.threshold ) - rips_stree = rips_complex.create_simplex_tree(max_dimension=args.max_dimension) + rips_stree = rips_complex.create_simplex_tree( + max_dimension=args.max_dimension) message = "Number of simplices=" + repr(rips_stree.num_simplices()) print(message) - rips_diag = rips_stree.persistence() + rips_stree.compute_persistence() - print("#####################################################################") + print("##############################################################") print("AlphaComplex creation from points read in a OFF file") message = "AlphaComplex with max_edge_length=" + repr(args.threshold) @@ -67,18 +73,13 @@ with open(args.file, "r") as f: message = "Number of simplices=" + repr(alpha_stree.num_simplices()) print(message) - alpha_diag = alpha_stree.persistence() + alpha_stree.compute_persistence() max_b_distance = 0.0 for dim in range(args.max_dimension): # Alpha persistence values needs to be transform because filtration # values are alpha square values - funcs = [math.sqrt, math.sqrt] - alpha_intervals = [] - for interval in alpha_stree.persistence_intervals_in_dimension(dim): - alpha_intervals.append( - map(lambda func, value: func(value), funcs, interval) - ) + alpha_intervals = np.sqrt(alpha_stree.persistence_intervals_in_dimension(dim)) rips_intervals = rips_stree.persistence_intervals_in_dimension(dim) bottleneck_distance = gudhi.bottleneck_distance( @@ -93,13 +94,13 @@ with open(args.file, "r") as f: print(message) max_b_distance = max(bottleneck_distance, max_b_distance) - print( - "================================================================================" - ) + print("==============================================================") message = "Bottleneck distance is " + repr(max_b_distance) print(message) else: - print(args.file, "is not a valid OFF file") + raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), + args.file) + f.close() diff --git a/src/python/example/diagram_vectorizations_distances_kernels.py b/src/python/example/diagram_vectorizations_distances_kernels.py index 507ead7c..de22d9e7 100755 --- a/src/python/example/diagram_vectorizations_distances_kernels.py +++ b/src/python/example/diagram_vectorizations_distances_kernels.py @@ -9,7 +9,7 @@ from gudhi.representations import DiagramSelector, Clamping, Landscape, Silhouet TopologicalVector, DiagramScaler, BirthPersistenceTransform,\ PersistenceImage, PersistenceWeightedGaussianKernel, Entropy, \ PersistenceScaleSpaceKernel, SlicedWassersteinDistance,\ - SlicedWassersteinKernel, BottleneckDistance, WassersteinDistance, PersistenceFisherKernel + SlicedWassersteinKernel, BottleneckDistance, PersistenceFisherKernel D = np.array([[0.,4.],[1.,2.],[3.,8.],[6.,8.], [0., np.inf], [5., np.inf]]) diags = [D] diff --git a/src/python/example/euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py b/src/python/example/euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py index 4903667e..e1e572df 100755 --- a/src/python/example/euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py +++ b/src/python/example/euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py @@ -1,11 +1,15 @@ #!/usr/bin/env python import argparse +import errno +import os import matplotlib.pyplot as plot import gudhi -""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. - See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. +""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - + which is released under MIT. + See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full + license details. Author(s): Vincent Rouvreau Copyright (C) 2016 Inria @@ -44,8 +48,9 @@ args = parser.parse_args() with open(args.file, "r") as f: first_line = f.readline() if (first_line == "OFF\n") or (first_line == "nOFF\n"): - print("#####################################################################") - print("EuclideanStrongWitnessComplex creation from points read in a OFF file") + print("##############################################################") + print("EuclideanStrongWitnessComplex creation from points read "\ + "in a OFF file") witnesses = gudhi.read_points_from_off_file(off_file=args.file) landmarks = gudhi.pick_n_random_points( @@ -64,7 +69,8 @@ with open(args.file, "r") as f: witnesses=witnesses, landmarks=landmarks ) simplex_tree = witness_complex.create_simplex_tree( - max_alpha_square=args.max_alpha_square, limit_dimension=args.limit_dimension + max_alpha_square=args.max_alpha_square, + limit_dimension=args.limit_dimension ) message = "Number of simplices=" + repr(simplex_tree.num_simplices()) @@ -79,6 +85,7 @@ with open(args.file, "r") as f: gudhi.plot_persistence_diagram(diag, band=args.band) plot.show() else: - print(args.file, "is not a valid OFF file") + raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), + args.file) f.close() diff --git a/src/python/example/euclidean_witness_complex_diagram_persistence_from_off_file_example.py b/src/python/example/euclidean_witness_complex_diagram_persistence_from_off_file_example.py index 339a8577..58cb2bb5 100755 --- a/src/python/example/euclidean_witness_complex_diagram_persistence_from_off_file_example.py +++ b/src/python/example/euclidean_witness_complex_diagram_persistence_from_off_file_example.py @@ -1,11 +1,15 @@ #!/usr/bin/env python import argparse +import errno +import os import matplotlib.pyplot as plot import gudhi -""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. - See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. +""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - + which is released under MIT. + See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full + license details. Author(s): Vincent Rouvreau Copyright (C) 2016 Inria @@ -78,6 +82,7 @@ with open(args.file, "r") as f: gudhi.plot_persistence_diagram(diag, band=args.band) plot.show() else: - print(args.file, "is not a valid OFF file") + raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), + args.file) f.close() diff --git a/src/python/example/periodic_cubical_complex_barcode_persistence_from_perseus_file_example.py b/src/python/example/periodic_cubical_complex_barcode_persistence_from_perseus_file_example.py index c692e66f..499171df 100755 --- a/src/python/example/periodic_cubical_complex_barcode_persistence_from_perseus_file_example.py +++ b/src/python/example/periodic_cubical_complex_barcode_persistence_from_perseus_file_example.py @@ -2,10 +2,14 @@ import argparse import matplotlib.pyplot as plot +import errno +import os import gudhi -""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. - See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. +""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - + which is released under MIT. + See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full + license details. Author(s): Vincent Rouvreau Copyright (C) 2016 Inria @@ -57,9 +61,10 @@ parser.add_argument( args = parser.parse_args() if is_file_perseus(args.file): - print("#####################################################################") + print("##################################################################") print("PeriodicCubicalComplex creation") - periodic_cubical_complex = gudhi.PeriodicCubicalComplex(perseus_file=args.file) + periodic_cubical_complex = gudhi.PeriodicCubicalComplex( + perseus_file=args.file) print("persistence(homology_coeff_field=3, min_persistence=0)=") diag = periodic_cubical_complex.persistence( @@ -73,4 +78,5 @@ if is_file_perseus(args.file): gudhi.plot_persistence_barcode(diag) plot.show() else: - print(args.file, "is not a valid perseus style file") + raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), + args.file) diff --git a/src/python/example/rips_complex_diagram_persistence_from_off_file_example.py b/src/python/example/rips_complex_diagram_persistence_from_off_file_example.py index c757aca7..6f992508 100755 --- a/src/python/example/rips_complex_diagram_persistence_from_off_file_example.py +++ b/src/python/example/rips_complex_diagram_persistence_from_off_file_example.py @@ -1,11 +1,15 @@ #!/usr/bin/env python import argparse +import errno +import os import matplotlib.pyplot as plot import gudhi -""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. - See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. +""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - + which is released under MIT. + See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full + license details. Author(s): Vincent Rouvreau Copyright (C) 2016 Inria @@ -42,10 +46,11 @@ args = parser.parse_args() with open(args.file, "r") as f: first_line = f.readline() if (first_line == "OFF\n") or (first_line == "nOFF\n"): - print("#####################################################################") + print("##############################################################") print("RipsComplex creation from points read in a OFF file") - message = "RipsComplex with max_edge_length=" + repr(args.max_edge_length) + message = "RipsComplex with max_edge_length=" + \ + repr(args.max_edge_length) print(message) point_cloud = gudhi.read_points_from_off_file(off_file=args.file) @@ -68,6 +73,7 @@ with open(args.file, "r") as f: gudhi.plot_persistence_diagram(diag, band=args.band) plot.show() else: - print(args.file, "is not a valid OFF file") + raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), + args.file) f.close() diff --git a/src/python/example/simplex_tree_example.py b/src/python/example/simplex_tree_example.py index 34833899..c4635dc5 100755 --- a/src/python/example/simplex_tree_example.py +++ b/src/python/example/simplex_tree_example.py @@ -42,7 +42,6 @@ print("simplices=") for simplex_with_filtration in st.get_simplices(): print("(%s, %.2f)" % tuple(simplex_with_filtration)) -st.initialize_filtration() print("filtration=") for simplex_with_filtration in st.get_filtration(): print("(%s, %.2f)" % tuple(simplex_with_filtration)) diff --git a/src/python/example/tangential_complex_plain_homology_from_off_file_example.py b/src/python/example/tangential_complex_plain_homology_from_off_file_example.py index f0df2189..85bade4a 100755 --- a/src/python/example/tangential_complex_plain_homology_from_off_file_example.py +++ b/src/python/example/tangential_complex_plain_homology_from_off_file_example.py @@ -1,11 +1,15 @@ #!/usr/bin/env python import argparse +import errno +import os import matplotlib.pyplot as plot import gudhi -""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. - See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. +""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - + which is released under MIT. + See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full + license details. Author(s): Vincent Rouvreau Copyright (C) 2016 Inria @@ -19,7 +23,7 @@ __copyright__ = "Copyright (C) 2016 Inria" __license__ = "MIT" parser = argparse.ArgumentParser( - description="TangentialComplex creation from " "points read in a OFF file.", + description="TangentialComplex creation from points read in a OFF file.", epilog="Example: " "example/tangential_complex_plain_homology_from_off_file_example.py " "-f ../data/points/tore3D_300.off -i 3" @@ -41,10 +45,11 @@ args = parser.parse_args() with open(args.file, "r") as f: first_line = f.readline() if (first_line == "OFF\n") or (first_line == "nOFF\n"): - print("#####################################################################") + print("##############################################################") print("TangentialComplex creation from points read in a OFF file") - tc = gudhi.TangentialComplex(intrisic_dim=args.intrisic_dim, off_file=args.file) + tc = gudhi.TangentialComplex(intrisic_dim=args.intrisic_dim, + off_file=args.file) tc.compute_tangential_complex() st = tc.create_simplex_tree() @@ -60,6 +65,7 @@ with open(args.file, "r") as f: gudhi.plot_persistence_diagram(diag, band=args.band) plot.show() else: - print(args.file, "is not a valid OFF file") + raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), + args.file) f.close() diff --git a/src/python/gudhi/alpha_complex.pyx b/src/python/gudhi/alpha_complex.pyx index fff3e920..e04dc652 100644 --- a/src/python/gudhi/alpha_complex.pyx +++ b/src/python/gudhi/alpha_complex.pyx @@ -1,5 +1,7 @@ -# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. -# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. +# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - +# which is released under MIT. +# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full +# license details. # Author(s): Vincent Rouvreau # # Copyright (C) 2016 Inria @@ -7,6 +9,7 @@ # Modification(s): # - YYYY/MM Author: Description of the modification +from __future__ import print_function from cython cimport numeric from libcpp.vector cimport vector from libcpp.utility cimport pair @@ -69,7 +72,8 @@ cdef class AlphaComplex: def __cinit__(self, points = None, off_file = ''): if off_file: if os.path.isfile(off_file): - self.thisptr = new Alpha_complex_interface(off_file.encode('utf-8'), True) + self.thisptr = new Alpha_complex_interface( + off_file.encode('utf-8'), True) else: print("file " + off_file + " not found.") else: diff --git a/src/python/gudhi/cubical_complex.pyx b/src/python/gudhi/cubical_complex.pyx index cbeda014..007abcb6 100644 --- a/src/python/gudhi/cubical_complex.pyx +++ b/src/python/gudhi/cubical_complex.pyx @@ -1,5 +1,7 @@ -# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. -# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. +# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - +# which is released under MIT. +# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full +# license details. # Author(s): Vincent Rouvreau # # Copyright (C) 2016 Inria @@ -7,12 +9,15 @@ # Modification(s): # - YYYY/MM Author: Description of the modification +from __future__ import print_function from cython cimport numeric from libcpp.vector cimport vector from libcpp.utility cimport pair from libcpp.string cimport string from libcpp cimport bool +import errno import os +import sys import numpy as np @@ -30,7 +35,8 @@ cdef extern from "Cubical_complex_interface.h" namespace "Gudhi": cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi": cdef cppclass Cubical_complex_persistence_interface "Gudhi::Persistent_cohomology_interface<Gudhi::Cubical_complex::Cubical_complex_interface<>>": Cubical_complex_persistence_interface(Bitmap_cubical_complex_base_interface * st, bool persistence_dim_max) - vector[pair[int, pair[double, double]]] get_persistence(int homology_coeff_field, double min_persistence) + void compute_persistence(int homology_coeff_field, double min_persistence) + vector[pair[int, pair[double, double]]] get_persistence() vector[int] betti_numbers() vector[int] persistent_betti_numbers(double from_value, double to_value) vector[pair[double,double]] intervals_in_dimension(int dimension) @@ -87,10 +93,12 @@ cdef class CubicalComplex: if os.path.isfile(perseus_file): self.thisptr = new Bitmap_cubical_complex_base_interface(perseus_file.encode('utf-8')) else: - print("file " + perseus_file + " not found.") + raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), + perseus_file) else: print("CubicalComplex can be constructed from dimensions and " - "top_dimensional_cells or from a Perseus-style file name.") + "top_dimensional_cells or from a Perseus-style file name.", + file=sys.stderr) def __dealloc__(self): if self.thisptr != NULL: @@ -122,8 +130,31 @@ cdef class CubicalComplex: """ return self.thisptr.dimension() + def compute_persistence(self, homology_coeff_field=11, min_persistence=0): + """This function computes the persistence of the complex, so it can be + accessed through :func:`persistent_betti_numbers`, + :func:`persistence_intervals_in_dimension`, etc. This function is + equivalent to :func:`persistence` when you do not want the list + :func:`persistence` returns. + + :param homology_coeff_field: The homology coefficient field. Must be a + prime number + :type homology_coeff_field: int. + :param min_persistence: The minimum persistence value to take into + account (strictly greater than min_persistence). Default value is + 0.0. + Sets min_persistence to -1.0 to see all values. + :type min_persistence: float. + :returns: Nothing. + """ + if self.pcohptr != NULL: + del self.pcohptr + assert self.__is_defined() + self.pcohptr = new Cubical_complex_persistence_interface(self.thisptr, True) + self.pcohptr.compute_persistence(homology_coeff_field, min_persistence) + def persistence(self, homology_coeff_field=11, min_persistence=0): - """This function returns the persistence of the complex. + """This function computes and returns the persistence of the complex. :param homology_coeff_field: The homology coefficient field. Must be a prime number @@ -136,30 +167,22 @@ cdef class CubicalComplex: :returns: list of pairs(dimension, pair(birth, death)) -- the persistence of the complex. """ - if self.pcohptr != NULL: - del self.pcohptr - if self.thisptr != NULL: - self.pcohptr = new Cubical_complex_persistence_interface(self.thisptr, True) - cdef vector[pair[int, pair[double, double]]] persistence_result - if self.pcohptr != NULL: - persistence_result = self.pcohptr.get_persistence(homology_coeff_field, min_persistence) - return persistence_result + self.compute_persistence(homology_coeff_field, min_persistence) + return self.pcohptr.get_persistence() def betti_numbers(self): """This function returns the Betti numbers of the complex. :returns: list of int -- The Betti numbers ([B0, B1, ..., Bn]). - :note: betti_numbers function requires persistence function to be + :note: betti_numbers function requires :func:`compute_persistence` function to be launched first. :note: betti_numbers function always returns [1, 0, 0, ...] as infinity filtration cubes are not removed from the complex. """ - cdef vector[int] bn_result - if self.pcohptr != NULL: - bn_result = self.pcohptr.betti_numbers() - return bn_result + assert self.pcohptr != NULL, "compute_persistence() must be called before betti_numbers()" + return self.pcohptr.betti_numbers() def persistent_betti_numbers(self, from_value, to_value): """This function returns the persistent Betti numbers of the complex. @@ -174,13 +197,11 @@ cdef class CubicalComplex: :returns: list of int -- The persistent Betti numbers ([B0, B1, ..., Bn]). - :note: persistent_betti_numbers function requires persistence + :note: persistent_betti_numbers function requires :func:`compute_persistence` function to be launched first. """ - cdef vector[int] pbn_result - if self.pcohptr != NULL: - pbn_result = self.pcohptr.persistent_betti_numbers(<double>from_value, <double>to_value) - return pbn_result + assert self.pcohptr != NULL, "compute_persistence() must be called before persistent_betti_numbers()" + return self.pcohptr.persistent_betti_numbers(<double>from_value, <double>to_value) def persistence_intervals_in_dimension(self, dimension): """This function returns the persistence intervals of the complex in a @@ -191,13 +212,8 @@ cdef class CubicalComplex: :returns: The persistence intervals. :rtype: numpy array of dimension 2 - :note: intervals_in_dim function requires persistence function to be + :note: intervals_in_dim function requires :func:`compute_persistence` function to be launched first. """ - cdef vector[pair[double,double]] intervals_result - if self.pcohptr != NULL: - intervals_result = self.pcohptr.intervals_in_dimension(dimension) - else: - print("intervals_in_dim function requires persistence function" - " to be launched first.") - return np.array(intervals_result) + assert self.pcohptr != NULL, "compute_persistence() must be called before persistence_intervals_in_dimension()" + return np.array(self.pcohptr.intervals_in_dimension(dimension)) diff --git a/src/python/gudhi/nerve_gic.pyx b/src/python/gudhi/nerve_gic.pyx index 45cc8eba..9c89b239 100644 --- a/src/python/gudhi/nerve_gic.pyx +++ b/src/python/gudhi/nerve_gic.pyx @@ -1,5 +1,7 @@ -# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. -# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. +# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - +# which is released under MIT. +# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full +# license details. # Author(s): Vincent Rouvreau # # Copyright (C) 2018 Inria @@ -7,11 +9,13 @@ # Modification(s): # - YYYY/MM Author: Description of the modification +from __future__ import print_function from cython cimport numeric from libcpp.vector cimport vector from libcpp.utility cimport pair from libcpp.string cimport string from libcpp cimport bool +import errno import os from libc.stdint cimport intptr_t @@ -96,7 +100,8 @@ cdef class CoverComplex: return self.thisptr != NULL def set_point_cloud_from_range(self, cloud): - """ Reads and stores the input point cloud from a vector stored in memory. + """ Reads and stores the input point cloud from a vector stored in + memory. :param cloud: Input vector containing the point cloud. :type cloud: vector[vector[double]] @@ -104,7 +109,8 @@ cdef class CoverComplex: return self.thisptr.set_point_cloud_from_range(cloud) def set_distances_from_range(self, distance_matrix): - """ Reads and stores the input distance matrix from a vector stored in memory. + """ Reads and stores the input distance matrix from a vector stored in + memory. :param distance_matrix: Input vector containing the distance matrix. :type distance_matrix: vector[vector[double]] @@ -163,7 +169,8 @@ cdef class CoverComplex: """ stree = SimplexTree() cdef intptr_t stree_int_ptr=stree.thisptr - self.thisptr.create_simplex_tree(<Simplex_tree_interface_full_featured*>stree_int_ptr) + self.thisptr.create_simplex_tree( + <Simplex_tree_interface_full_featured*>stree_int_ptr) return stree def find_simplices(self): @@ -182,8 +189,8 @@ cdef class CoverComplex: if os.path.isfile(off_file): return self.thisptr.read_point_cloud(off_file.encode('utf-8')) else: - print("file " + off_file + " not found.") - return False + raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), + off_file) def set_automatic_resolution(self): """Computes the optimal length of intervals (i.e. the smallest interval @@ -214,7 +221,8 @@ cdef class CoverComplex: if os.path.isfile(color_file_name): self.thisptr.set_color_from_file(color_file_name.encode('utf-8')) else: - print("file " + color_file_name + " not found.") + raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), + color_file_name) def set_color_from_range(self, color): """Computes the function used to color the nodes of the simplicial @@ -235,7 +243,8 @@ cdef class CoverComplex: if os.path.isfile(cover_file_name): self.thisptr.set_cover_from_file(cover_file_name.encode('utf-8')) else: - print("file " + cover_file_name + " not found.") + raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), + cover_file_name) def set_cover_from_function(self): """Creates a cover C from the preimages of the function f. @@ -268,7 +277,8 @@ cdef class CoverComplex: if os.path.isfile(func_file_name): self.thisptr.set_function_from_file(func_file_name.encode('utf-8')) else: - print("file " + func_file_name + " not found.") + raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), + func_file_name) def set_function_from_range(self, function): """Creates the function f from a vector stored in memory. @@ -302,14 +312,15 @@ cdef class CoverComplex: """Creates a graph G from a file containing the edges. :param graph_file_name: Name of the input graph file. The graph file - contains one edge per line, each edge being represented by the IDs of - its two nodes. + contains one edge per line, each edge being represented by the IDs + of its two nodes. :type graph_file_name: string """ if os.path.isfile(graph_file_name): self.thisptr.set_graph_from_file(graph_file_name.encode('utf-8')) else: - print("file " + graph_file_name + " not found.") + raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), + graph_file_name) def set_graph_from_OFF(self): """Creates a graph G from the triangulation given by the input OFF diff --git a/src/python/gudhi/off_reader.pyx b/src/python/gudhi/off_reader.pyx index 7e6d9d80..a3200704 100644 --- a/src/python/gudhi/off_reader.pyx +++ b/src/python/gudhi/off_reader.pyx @@ -1,5 +1,7 @@ -# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. -# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. +# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - +# which is released under MIT. +# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full +# license details. # Author(s): Vincent Rouvreau # # Copyright (C) 2016 Inria @@ -7,9 +9,11 @@ # Modification(s): # - YYYY/MM Author: Description of the modification +from __future__ import print_function from cython cimport numeric from libcpp.vector cimport vector from libcpp.string cimport string +import errno import os __author__ = "Vincent Rouvreau" @@ -32,6 +36,6 @@ def read_points_from_off_file(off_file=''): if os.path.isfile(off_file): return read_points_from_OFF_file(off_file.encode('utf-8')) else: - print("file " + off_file + " not found.") - return [] + raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), + off_file) diff --git a/src/python/gudhi/periodic_cubical_complex.pyx b/src/python/gudhi/periodic_cubical_complex.pyx index 37f76201..246a3a02 100644 --- a/src/python/gudhi/periodic_cubical_complex.pyx +++ b/src/python/gudhi/periodic_cubical_complex.pyx @@ -7,11 +7,13 @@ # Modification(s): # - YYYY/MM Author: Description of the modification +from __future__ import print_function from cython cimport numeric from libcpp.vector cimport vector from libcpp.utility cimport pair from libcpp.string cimport string from libcpp cimport bool +import sys import os import numpy as np @@ -30,7 +32,8 @@ cdef extern from "Cubical_complex_interface.h" namespace "Gudhi": cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi": cdef cppclass Periodic_cubical_complex_persistence_interface "Gudhi::Persistent_cohomology_interface<Gudhi::Cubical_complex::Cubical_complex_interface<Gudhi::cubical_complex::Bitmap_cubical_complex_periodic_boundary_conditions_base<double>>>": Periodic_cubical_complex_persistence_interface(Periodic_cubical_complex_base_interface * st, bool persistence_dim_max) - vector[pair[int, pair[double, double]]] get_persistence(int homology_coeff_field, double min_persistence) + void compute_persistence(int homology_coeff_field, double min_persistence) + vector[pair[int, pair[double, double]]] get_persistence() vector[int] betti_numbers() vector[int] persistent_betti_numbers(double from_value, double to_value) vector[pair[double,double]] intervals_in_dimension(int dimension) @@ -95,12 +98,12 @@ cdef class PeriodicCubicalComplex: if os.path.isfile(perseus_file): self.thisptr = new Periodic_cubical_complex_base_interface(perseus_file.encode('utf-8')) else: - print("file " + perseus_file + " not found.") + print("file " + perseus_file + " not found.", file=sys.stderr) else: print("CubicalComplex can be constructed from dimensions, " "top_dimensional_cells and periodic_dimensions, or from " "top_dimensional_cells and periodic_dimensions or from " - "a Perseus-style file name.") + "a Perseus-style file name.", file=sys.stderr) def __dealloc__(self): if self.thisptr != NULL: @@ -132,8 +135,31 @@ cdef class PeriodicCubicalComplex: """ return self.thisptr.dimension() + def compute_persistence(self, homology_coeff_field=11, min_persistence=0): + """This function computes the persistence of the complex, so it can be + accessed through :func:`persistent_betti_numbers`, + :func:`persistence_intervals_in_dimension`, etc. This function is + equivalent to :func:`persistence` when you do not want the list + :func:`persistence` returns. + + :param homology_coeff_field: The homology coefficient field. Must be a + prime number + :type homology_coeff_field: int. + :param min_persistence: The minimum persistence value to take into + account (strictly greater than min_persistence). Default value is + 0.0. + Sets min_persistence to -1.0 to see all values. + :type min_persistence: float. + :returns: Nothing. + """ + if self.pcohptr != NULL: + del self.pcohptr + assert self.__is_defined() + self.pcohptr = new Periodic_cubical_complex_persistence_interface(self.thisptr, True) + self.pcohptr.compute_persistence(homology_coeff_field, min_persistence) + def persistence(self, homology_coeff_field=11, min_persistence=0): - """This function returns the persistence of the complex. + """This function computes and returns the persistence of the complex. :param homology_coeff_field: The homology coefficient field. Must be a prime number @@ -146,30 +172,22 @@ cdef class PeriodicCubicalComplex: :returns: list of pairs(dimension, pair(birth, death)) -- the persistence of the complex. """ - if self.pcohptr != NULL: - del self.pcohptr - if self.thisptr != NULL: - self.pcohptr = new Periodic_cubical_complex_persistence_interface(self.thisptr, True) - cdef vector[pair[int, pair[double, double]]] persistence_result - if self.pcohptr != NULL: - persistence_result = self.pcohptr.get_persistence(homology_coeff_field, min_persistence) - return persistence_result + self.compute_persistence(homology_coeff_field, min_persistence) + return self.pcohptr.get_persistence() def betti_numbers(self): """This function returns the Betti numbers of the complex. :returns: list of int -- The Betti numbers ([B0, B1, ..., Bn]). - :note: betti_numbers function requires persistence function to be + :note: betti_numbers function requires :func:`compute_persistence` function to be launched first. - :note: betti_numbers function always returns [1, 0, 0, ...] as infinity + :note: This function always returns the Betti numbers of a torus as infinity filtration cubes are not removed from the complex. """ - cdef vector[int] bn_result - if self.pcohptr != NULL: - bn_result = self.pcohptr.betti_numbers() - return bn_result + assert self.pcohptr != NULL, "compute_persistence() must be called before betti_numbers()" + return self.pcohptr.betti_numbers() def persistent_betti_numbers(self, from_value, to_value): """This function returns the persistent Betti numbers of the complex. @@ -184,13 +202,11 @@ cdef class PeriodicCubicalComplex: :returns: list of int -- The persistent Betti numbers ([B0, B1, ..., Bn]). - :note: persistent_betti_numbers function requires persistence + :note: persistent_betti_numbers function requires :func:`compute_persistence` function to be launched first. """ - cdef vector[int] pbn_result - if self.pcohptr != NULL: - pbn_result = self.pcohptr.persistent_betti_numbers(<double>from_value, <double>to_value) - return pbn_result + assert self.pcohptr != NULL, "compute_persistence() must be called before persistent_betti_numbers()" + return self.pcohptr.persistent_betti_numbers(<double>from_value, <double>to_value) def persistence_intervals_in_dimension(self, dimension): """This function returns the persistence intervals of the complex in a @@ -201,13 +217,8 @@ cdef class PeriodicCubicalComplex: :returns: The persistence intervals. :rtype: numpy array of dimension 2 - :note: intervals_in_dim function requires persistence function to be + :note: intervals_in_dim function requires :func:`compute_persistence` function to be launched first. """ - cdef vector[pair[double,double]] intervals_result - if self.pcohptr != NULL: - intervals_result = self.pcohptr.intervals_in_dimension(dimension) - else: - print("intervals_in_dim function requires persistence function" - " to be launched first.") - return np.array(intervals_result) + assert self.pcohptr != NULL, "compute_persistence() must be called before persistence_intervals_in_dimension()" + return np.array(self.pcohptr.intervals_in_dimension(dimension)) diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py new file mode 100644 index 00000000..13e16d24 --- /dev/null +++ b/src/python/gudhi/point_cloud/dtm.py @@ -0,0 +1,70 @@ +# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. +# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. +# Author(s): Marc Glisse +# +# Copyright (C) 2020 Inria +# +# Modification(s): +# - YYYY/MM Author: Description of the modification + +from .knn import KNearestNeighbors + +__author__ = "Marc Glisse" +__copyright__ = "Copyright (C) 2020 Inria" +__license__ = "MIT" + + +class DistanceToMeasure: + """ + Class to compute the distance to the empirical measure defined by a point set, as introduced in :cite:`dtm`. + """ + + def __init__(self, k, q=2, **kwargs): + """ + Args: + k (int): number of neighbors (possibly including the point itself). + q (float): order used to compute the distance to measure. Defaults to 2. + kwargs: same parameters as :class:`~gudhi.point_cloud.knn.KNearestNeighbors`, except that + metric="neighbors" means that :func:`transform` expects an array with the distances + to the k nearest neighbors. + """ + self.k = k + self.q = q + self.params = kwargs + + def fit_transform(self, X, y=None): + return self.fit(X).transform(X) + + def fit(self, X, y=None): + """ + Args: + X (numpy.array): coordinates for mass points. + """ + if self.params.setdefault("metric", "euclidean") != "neighbors": + self.knn = KNearestNeighbors( + self.k, return_index=False, return_distance=True, sort_results=False, **self.params + ) + self.knn.fit(X) + return self + + def transform(self, X): + """ + Args: + X (numpy.array): coordinates for query points, or distance matrix if metric is "precomputed", + or distances to the k nearest neighbors if metric is "neighbors" (if the array has more + than k columns, the remaining ones are ignored). + + Returns: + numpy.array: a 1-d array with, for each point of X, its distance to the measure defined + by the argument of :func:`fit`. + """ + if self.params["metric"] == "neighbors": + distances = X[:, : self.k] + else: + distances = self.knn.transform(X) + distances = distances ** self.q + dtm = distances.sum(-1) / self.k + dtm = dtm ** (1.0 / self.q) + # We compute too many powers, 1/p in knn then q in dtm, 1/q in dtm then q or some log in the caller. + # Add option to skip the final root? + return dtm diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py new file mode 100644 index 00000000..07553d6d --- /dev/null +++ b/src/python/gudhi/point_cloud/knn.py @@ -0,0 +1,324 @@ +# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. +# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. +# Author(s): Marc Glisse +# +# Copyright (C) 2020 Inria +# +# Modification(s): +# - YYYY/MM Author: Description of the modification + +import numpy + +# TODO: https://github.com/facebookresearch/faiss + +__author__ = "Marc Glisse" +__copyright__ = "Copyright (C) 2020 Inria" +__license__ = "MIT" + + +class KNearestNeighbors: + """ + Class wrapping several implementations for computing the k nearest neighbors in a point set. + """ + + def __init__(self, k, return_index=True, return_distance=False, metric="euclidean", **kwargs): + """ + Args: + k (int): number of neighbors (possibly including the point itself). + return_index (bool): if True, return the index of each neighbor. + return_distance (bool): if True, return the distance to each neighbor. + implementation (str): choice of the library that does the real work. + + * 'keops' for a brute-force, CUDA implementation through pykeops. Useful when the dimension becomes large (10+) but the number of points remains low (less than a million). Only "minkowski" and its aliases are supported. + * 'ckdtree' for scipy's cKDTree. Only "minkowski" and its aliases are supported. + * 'sklearn' for scikit-learn's NearestNeighbors. Note that this provides in particular an option algorithm="brute". + * 'hnsw' for hnswlib.Index. It can be very fast but does not provide guarantees. Only supports "euclidean" for now. + * None will try to select a sensible one (scipy if possible, scikit-learn otherwise). + metric (str): see `sklearn.neighbors.NearestNeighbors`. + eps (float): relative error when computing nearest neighbors with the cKDTree. + p (float): norm L^p on input points (including numpy.inf) if metric is "minkowski". Defaults to 2. + n_jobs (int): number of jobs to schedule for parallel processing of nearest neighbors on the CPU. + If -1 is given all processors are used. Default: 1. + sort_results (bool): if True, then distances and indices of each point are + sorted on return, so that the first column contains the closest points. + Otherwise, neighbors are returned in an arbitrary order. Defaults to True. + enable_autodiff (bool): if the input is a torch.tensor, jax.numpy.ndarray or tensorflow.Tensor, this + instructs the function to compute distances in a way that works with automatic differentiation. + This is experimental, not supported for all metrics, and requires the package EagerPy. + Defaults to False. + kwargs: additional parameters are forwarded to the backends. + """ + self.k = k + self.return_index = return_index + self.return_distance = return_distance + self.metric = metric + self.params = kwargs + # canonicalize + if metric == "euclidean": + self.params["p"] = 2 + self.metric = "minkowski" + elif metric == "manhattan": + self.params["p"] = 1 + self.metric = "minkowski" + elif metric == "chebyshev": + self.params["p"] = numpy.inf + self.metric = "minkowski" + elif metric == "minkowski": + self.params["p"] = kwargs.get("p", 2) + if self.params.get("implementation") in {"keops", "ckdtree"}: + assert self.metric == "minkowski" + if self.params.get("implementation") == "hnsw": + assert self.metric == "minkowski" and self.params["p"] == 2 + if not self.params.get("implementation"): + if self.metric == "minkowski": + self.params["implementation"] = "ckdtree" + else: + self.params["implementation"] = "sklearn" + if not return_distance: + self.params["enable_autodiff"] = False + + def fit_transform(self, X, y=None): + return self.fit(X).transform(X) + + def fit(self, X, y=None): + """ + Args: + X (numpy.array): coordinates for reference points. + """ + self.ref_points = X + if self.params.get("enable_autodiff", False): + import eagerpy as ep + + X = ep.astensor(X) + if self.params["implementation"] != "keops" or not isinstance(X, ep.PyTorchTensor): + # I don't know a clever way to reuse a GPU tensor from tensorflow in pytorch + # without copying to/from the CPU. + X = X.numpy() + if self.params["implementation"] == "ckdtree": + # sklearn could handle this, but it is much slower + from scipy.spatial import cKDTree + + self.kdtree = cKDTree(X) + + if self.params["implementation"] == "sklearn" and self.metric != "precomputed": + # FIXME: sklearn badly handles "precomputed" + from sklearn.neighbors import NearestNeighbors + + nargs = { + k: v for k, v in self.params.items() if k in {"p", "n_jobs", "metric_params", "algorithm", "leaf_size"} + } + self.nn = NearestNeighbors(self.k, metric=self.metric, **nargs) + self.nn.fit(X) + + if self.params["implementation"] == "hnsw": + import hnswlib + + self.graph = hnswlib.Index("l2", len(X[0])) # Actually returns squared distances + self.graph.init_index( + len(X), **{k: v for k, v in self.params.items() if k in {"ef_construction", "M", "random_seed"}} + ) + n = self.params.get("num_threads") + if n is None: + n = self.params.get("n_jobs", 1) + self.params["num_threads"] = n + self.graph.add_items(X, num_threads=n) + + return self + + def transform(self, X): + """ + Args: + X (numpy.array): coordinates for query points, or distance matrix if metric is "precomputed". + + Returns: + numpy.array: if return_index, an array of shape (len(X), k) with the indices (in the argument + of :func:`fit`) of the k nearest neighbors to the points of X. If return_distance, an array of the + same shape with the distances to those neighbors. If both, a tuple with the two arrays, in this order. + """ + if self.params.get("enable_autodiff", False): + # pykeops does not support autodiff for kmin yet, but when it does in the future, + # we may want a special path. + import eagerpy as ep + + save_return_index = self.return_index + self.return_index = True + self.return_distance = False + self.params["enable_autodiff"] = False + try: + newX = ep.astensor(X) + if self.params["implementation"] != "keops" or ( + not isinstance(newX, ep.PyTorchTensor) and not isinstance(newX, ep.NumPyTensor) + ): + newX = newX.numpy() + else: + newX = newX.raw + neighbors = self.transform(newX) + finally: + self.return_index = save_return_index + self.return_distance = True + self.params["enable_autodiff"] = True + # We can implement more later as needed + assert self.metric == "minkowski" + p = self.params["p"] + Y = ep.astensor(self.ref_points) + neighbor_pts = Y[ + neighbors, + ] + diff = neighbor_pts - X[:, None, :] + if isinstance(diff, ep.PyTorchTensor): + # https://github.com/jonasrauber/eagerpy/issues/6 + distances = ep.astensor(diff.raw.norm(p, -1)) + else: + distances = diff.norms.lp(p, -1) + if self.return_index: + return neighbors, distances.raw + else: + return distances.raw + + metric = self.metric + k = self.k + + if metric == "precomputed": + # scikit-learn could handle that, but they insist on calling fit() with an unused square array, which is too unnatural. + if self.return_index: + n_jobs = self.params.get("n_jobs", 1) + # Supposedly numpy can be compiled with OpenMP and handle this, but nobody does that?! + if n_jobs == 1: + neighbors = numpy.argpartition(X, k - 1)[:, 0:k] + if self.params.get("sort_results", True): + X = numpy.take_along_axis(X, neighbors, axis=-1) + ngb_order = numpy.argsort(X, axis=-1) + neighbors = numpy.take_along_axis(neighbors, ngb_order, axis=-1) + else: + ngb_order = neighbors + if self.return_distance: + distances = numpy.take_along_axis(X, ngb_order, axis=-1) + return neighbors, distances + else: + return neighbors + else: + from joblib import Parallel, delayed, effective_n_jobs + from sklearn.utils import gen_even_slices + + slices = gen_even_slices(len(X), effective_n_jobs(-1)) + parallel = Parallel(backend="threading", n_jobs=-1) + if self.params.get("sort_results", True): + + def func(M): + neighbors = numpy.argpartition(M, k - 1)[:, 0:k] + Y = numpy.take_along_axis(M, neighbors, axis=-1) + ngb_order = numpy.argsort(Y, axis=-1) + return numpy.take_along_axis(neighbors, ngb_order, axis=-1) + + else: + + def func(M): + return numpy.argpartition(M, k - 1)[:, 0:k] + + neighbors = numpy.concatenate(parallel(delayed(func)(X[s]) for s in slices)) + if self.return_distance: + distances = numpy.take_along_axis(X, neighbors, axis=-1) + return neighbors, distances + else: + return neighbors + if self.return_distance: + n_jobs = self.params.get("n_jobs", 1) + if n_jobs == 1: + distances = numpy.partition(X, k - 1)[:, 0:k] + if self.params.get("sort_results"): + # partition is not guaranteed to sort the lower half, although it often does + distances.sort(axis=-1) + else: + from joblib import Parallel, delayed, effective_n_jobs + from sklearn.utils import gen_even_slices + + if self.params.get("sort_results"): + + def func(M): + # Not partitioning in place, because we should not modify the user's array? + r = numpy.partition(M, k - 1)[:, 0:k] + r.sort(axis=-1) + return r + + else: + func = lambda M: numpy.partition(M, k - 1)[:, 0:k] + slices = gen_even_slices(len(X), effective_n_jobs(-1)) + parallel = Parallel(backend="threading", n_jobs=-1) + distances = numpy.concatenate(parallel(delayed(func)(X[s]) for s in slices)) + return distances + return None + + if self.params["implementation"] == "hnsw": + ef = self.params.get("ef") + if ef is not None: + self.graph.set_ef(ef) + neighbors, distances = self.graph.knn_query(X, k, num_threads=self.params["num_threads"]) + # The k nearest neighbors are always sorted. I couldn't find it in the doc, but the code calls searchKnn, + # which returns a priority_queue, and then fills the return array backwards with top/pop on the queue. + if self.return_index: + if self.return_distance: + return neighbors, numpy.sqrt(distances) + else: + return neighbors + if self.return_distance: + return numpy.sqrt(distances) + return None + + if self.params["implementation"] == "keops": + import torch + from pykeops.torch import LazyTensor + + # 'float64' is slow except on super expensive GPUs. Allow it with some param? + XX = torch.as_tensor(X, dtype=torch.float32) + if X is self.ref_points: + YY = XX + else: + YY = torch.as_tensor(self.ref_points, dtype=torch.float32) + p = self.params["p"] + if p == numpy.inf: + # Requires pykeops 1.4 or later + mat = (LazyTensor(XX[:, None, :]) - LazyTensor(YY[None, :, :])).abs().max(-1) + elif p == 2: # Any even integer? + mat = ((LazyTensor(XX[:, None, :]) - LazyTensor(YY[None, :, :])) ** p).sum(-1) + else: + mat = ((LazyTensor(XX[:, None, :]) - LazyTensor(YY[None, :, :])).abs() ** p).sum(-1) + + if self.return_index: + if self.return_distance: + distances, neighbors = mat.Kmin_argKmin(k, dim=1) + if p != numpy.inf: + distances = distances ** (1.0 / p) + return neighbors, distances + else: + neighbors = mat.argKmin(k, dim=1) + return neighbors + if self.return_distance: + distances = mat.Kmin(k, dim=1) + if p != numpy.inf: + distances = distances ** (1.0 / p) + return distances + return None + + if self.params["implementation"] == "ckdtree": + qargs = {key: val for key, val in self.params.items() if key in {"p", "eps", "n_jobs"}} + distances, neighbors = self.kdtree.query(X, k=self.k, **qargs) + if self.return_index: + if self.return_distance: + return neighbors, distances + else: + return neighbors + if self.return_distance: + return distances + return None + + assert self.params["implementation"] == "sklearn" + if self.return_distance: + distances, neighbors = self.nn.kneighbors(X, return_distance=True) + if self.return_index: + return neighbors, distances + else: + return distances + if self.return_index: + neighbors = self.nn.kneighbors(X, return_distance=False) + return neighbors + return None diff --git a/src/python/gudhi/representations/metrics.py b/src/python/gudhi/representations/metrics.py index e2c30f8c..59440b1a 100644 --- a/src/python/gudhi/representations/metrics.py +++ b/src/python/gudhi/representations/metrics.py @@ -246,27 +246,23 @@ class BottleneckDistance(BaseEstimator, TransformerMixin): Xfit = pairwise_persistence_diagram_distances(X, self.diagrams_, metric="bottleneck", e=self.epsilon) return Xfit -class WassersteinDistance(BaseEstimator, TransformerMixin): +class PersistenceFisherDistance(BaseEstimator, TransformerMixin): """ - This is a class for computing the Wasserstein distance matrix from a list of persistence diagrams. + This is a class for computing the persistence Fisher distance matrix from a list of persistence diagrams. The persistence Fisher distance is obtained by computing the original Fisher distance between the probability distributions associated to the persistence diagrams given by convolving them with a Gaussian kernel. See http://papers.nips.cc/paper/8205-persistence-fisher-kernel-a-riemannian-manifold-kernel-for-persistence-diagrams for more details. """ - def __init__(self, order=2, internal_p=2, mode="pot", delta=0.01): + def __init__(self, bandwidth=1., kernel_approx=None): """ - Constructor for the WassersteinDistance class. + Constructor for the PersistenceFisherDistance class. Parameters: - order (int): exponent for Wasserstein, default value is 2., see :func:`gudhi.wasserstein.wasserstein_distance`. - internal_p (int): ground metric on the (upper-half) plane (i.e. norm l_p in R^2), default value is 2 (euclidean norm), see :func:`gudhi.wasserstein.wasserstein_distance`. - mode (str): method for computing Wasserstein distance. Either "pot" or "hera". - delta (float): relative error 1+delta. Used only if mode == "hera". + bandwidth (double): bandwidth of the Gaussian kernel used to turn persistence diagrams into probability distributions (default 1.). + kernel_approx (class): kernel approximation class used to speed up computation (default None). Common kernel approximations classes can be found in the scikit-learn library (such as RBFSampler for instance). """ - self.order, self.internal_p, self.mode = order, internal_p, mode - self.metric = "pot_wasserstein" if mode == "pot" else "hera_wasserstein" - self.delta = delta + self.bandwidth, self.kernel_approx = bandwidth, kernel_approx def fit(self, X, y=None): """ - Fit the WassersteinDistance class on a list of persistence diagrams: persistence diagrams are stored in a numpy array called **diagrams**. + Fit the PersistenceFisherDistance class on a list of persistence diagrams: persistence diagrams are stored in a numpy array called **diagrams** and the kernel approximation class (if not None) is applied on them. Parameters: X (list of n x 2 numpy arrays): input persistence diagrams. @@ -277,37 +273,37 @@ class WassersteinDistance(BaseEstimator, TransformerMixin): def transform(self, X): """ - Compute all Wasserstein distances between the persistence diagrams that were stored after calling the fit() method, and a given list of (possibly different) persistence diagrams. + Compute all persistence Fisher distances between the persistence diagrams that were stored after calling the fit() method, and a given list of (possibly different) persistence diagrams. Parameters: X (list of n x 2 numpy arrays): input persistence diagrams. Returns: - numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise Wasserstein distances. + numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise persistence Fisher distances. """ - if self.metric == "hera_wasserstein": - Xfit = pairwise_persistence_diagram_distances(X, self.diagrams_, metric=self.metric, order=self.order, internal_p=self.internal_p, delta=self.delta) - else: - Xfit = pairwise_persistence_diagram_distances(X, self.diagrams_, metric=self.metric, order=self.order, internal_p=self.internal_p) - return Xfit + return pairwise_persistence_diagram_distances(X, self.diagrams_, metric="persistence_fisher", bandwidth=self.bandwidth, kernel_approx=self.kernel_approx) -class PersistenceFisherDistance(BaseEstimator, TransformerMixin): +class WassersteinDistance(BaseEstimator, TransformerMixin): """ - This is a class for computing the persistence Fisher distance matrix from a list of persistence diagrams. The persistence Fisher distance is obtained by computing the original Fisher distance between the probability distributions associated to the persistence diagrams given by convolving them with a Gaussian kernel. See http://papers.nips.cc/paper/8205-persistence-fisher-kernel-a-riemannian-manifold-kernel-for-persistence-diagrams for more details. + This is a class for computing the Wasserstein distance matrix from a list of persistence diagrams. """ - def __init__(self, bandwidth=1., kernel_approx=None): + def __init__(self, order=2, internal_p=2, mode="pot", delta=0.01): """ - Constructor for the PersistenceFisherDistance class. + Constructor for the WassersteinDistance class. Parameters: - bandwidth (double): bandwidth of the Gaussian kernel used to turn persistence diagrams into probability distributions (default 1.). - kernel_approx (class): kernel approximation class used to speed up computation (default None). Common kernel approximations classes can be found in the scikit-learn library (such as RBFSampler for instance). + order (int): exponent for Wasserstein, default value is 2., see :func:`gudhi.wasserstein.wasserstein_distance`. + internal_p (int): ground metric on the (upper-half) plane (i.e. norm l_p in R^2), default value is 2 (euclidean norm), see :func:`gudhi.wasserstein.wasserstein_distance`. + mode (str): method for computing Wasserstein distance. Either "pot" or "hera". + delta (float): relative error 1+delta. Used only if mode == "hera". """ - self.bandwidth, self.kernel_approx = bandwidth, kernel_approx + self.order, self.internal_p, self.mode = order, internal_p, mode + self.metric = "pot_wasserstein" if mode == "pot" else "hera_wasserstein" + self.delta = delta def fit(self, X, y=None): """ - Fit the PersistenceFisherDistance class on a list of persistence diagrams: persistence diagrams are stored in a numpy array called **diagrams** and the kernel approximation class (if not None) is applied on them. + Fit the WassersteinDistance class on a list of persistence diagrams: persistence diagrams are stored in a numpy array called **diagrams**. Parameters: X (list of n x 2 numpy arrays): input persistence diagrams. @@ -318,12 +314,16 @@ class PersistenceFisherDistance(BaseEstimator, TransformerMixin): def transform(self, X): """ - Compute all persistence Fisher distances between the persistence diagrams that were stored after calling the fit() method, and a given list of (possibly different) persistence diagrams. + Compute all Wasserstein distances between the persistence diagrams that were stored after calling the fit() method, and a given list of (possibly different) persistence diagrams. Parameters: X (list of n x 2 numpy arrays): input persistence diagrams. Returns: - numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise persistence Fisher distances. + numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise Wasserstein distances. """ - return pairwise_persistence_diagram_distances(X, self.diagrams_, metric="persistence_fisher", bandwidth=self.bandwidth, kernel_approx=self.kernel_approx) + if self.metric == "hera_wasserstein": + Xfit = pairwise_persistence_diagram_distances(X, self.diagrams_, metric=self.metric, order=self.order, internal_p=self.internal_p, delta=self.delta) + else: + Xfit = pairwise_persistence_diagram_distances(X, self.diagrams_, metric=self.metric, order=self.order, internal_p=self.internal_p) + return Xfit diff --git a/src/python/gudhi/simplex_tree.pxd b/src/python/gudhi/simplex_tree.pxd index 82f155de..5dea2449 100644 --- a/src/python/gudhi/simplex_tree.pxd +++ b/src/python/gudhi/simplex_tree.pxd @@ -48,8 +48,7 @@ cdef extern from "Simplex_tree_interface.h" namespace "Gudhi": int dimension() int upper_bound_dimension() bool find_simplex(vector[int] simplex) - bool insert_simplex_and_subfaces(vector[int] simplex, - double filtration) + bool insert(vector[int] simplex, double filtration) vector[pair[vector[int], double]] get_star(vector[int] simplex) vector[pair[vector[int], double]] get_cofaces(vector[int] simplex, int dimension) @@ -57,6 +56,8 @@ cdef extern from "Simplex_tree_interface.h" namespace "Gudhi": void remove_maximal_simplex(vector[int] simplex) bool prune_above_filtration(double filtration) bool make_filtration_non_decreasing() + void compute_extended_filtration() + vector[vector[pair[int, pair[double, double]]]] compute_extended_persistence_subdiagrams(vector[pair[int, pair[double, double]]] dgm, double min_persistence) # Iterators over Simplex tree pair[vector[int], double] get_simplex_and_filtration(Simplex_tree_simplex_handle f_simplex) Simplex_tree_simplices_iterator get_simplices_iterator_begin() @@ -69,9 +70,10 @@ cdef extern from "Simplex_tree_interface.h" namespace "Gudhi": cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi": cdef cppclass Simplex_tree_persistence_interface "Gudhi::Persistent_cohomology_interface<Gudhi::Simplex_tree<Gudhi::Simplex_tree_options_full_featured>>": Simplex_tree_persistence_interface(Simplex_tree_interface_full_featured * st, bool persistence_dim_max) - vector[pair[int, pair[double, double]]] get_persistence(int homology_coeff_field, double min_persistence) + void compute_persistence(int homology_coeff_field, double min_persistence) + vector[pair[int, pair[double, double]]] get_persistence() vector[int] betti_numbers() vector[int] persistent_betti_numbers(double from_value, double to_value) vector[pair[double,double]] intervals_in_dimension(int dimension) - void write_output_diagram(string diagram_file_name) + void write_output_diagram(string diagram_file_name) except + vector[pair[vector[int], vector[int]]] persistence_pairs() diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index c01cc905..9479118a 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -90,7 +90,7 @@ cdef class SimplexTree: (with more :meth:`assign_filtration` or :meth:`make_filtration_non_decreasing` for instance) before calling any function that relies on the filtration property, like - :meth:`initialize_filtration`. + :meth:`persistence`. """ self.get_ptr().assign_simplex_filtration(simplex, filtration) @@ -98,16 +98,7 @@ cdef class SimplexTree: """This function initializes and sorts the simplicial complex filtration vector. - .. note:: - - This function must be launched before - :func:`persistence()<gudhi.SimplexTree.persistence>`, - :func:`betti_numbers()<gudhi.SimplexTree.betti_numbers>`, - :func:`persistent_betti_numbers()<gudhi.SimplexTree.persistent_betti_numbers>`, - or :func:`get_filtration()<gudhi.SimplexTree.get_filtration>` - after :func:`inserting<gudhi.SimplexTree.insert>` or - :func:`removing<gudhi.SimplexTree.remove_maximal_simplex>` - simplices. + .. deprecated:: 3.2.0 """ self.get_ptr().initialize_filtration() @@ -139,9 +130,9 @@ cdef class SimplexTree: This function is not constant time because it can recompute dimension if required (can be triggered by - :func:`remove_maximal_simplex()<gudhi.SimplexTree.remove_maximal_simplex>` + :func:`remove_maximal_simplex` or - :func:`prune_above_filtration()<gudhi.SimplexTree.prune_above_filtration>` + :func:`prune_above_filtration` methods). """ return self.get_ptr().dimension() @@ -166,9 +157,9 @@ cdef class SimplexTree: This function must be used with caution because it disables dimension recomputation when required (this recomputation can be triggered by - :func:`remove_maximal_simplex()<gudhi.SimplexTree.remove_maximal_simplex>` + :func:`remove_maximal_simplex` or - :func:`prune_above_filtration()<gudhi.SimplexTree.prune_above_filtration>` + :func:`prune_above_filtration` ). """ self.get_ptr().set_dimension(<int>dimension) @@ -182,10 +173,7 @@ cdef class SimplexTree: :returns: true if the simplex was found, false otherwise. :rtype: bool """ - cdef vector[int] csimplex - for i in simplex: - csimplex.push_back(i) - return self.get_ptr().find_simplex(csimplex) + return self.get_ptr().find_simplex(simplex) def insert(self, simplex, filtration=0.0): """This function inserts the given N-simplex and its subfaces with the @@ -202,11 +190,7 @@ cdef class SimplexTree: otherwise (whatever its original filtration value). :rtype: bool """ - cdef vector[int] csimplex - for i in simplex: - csimplex.push_back(i) - return self.get_ptr().insert_simplex_and_subfaces(csimplex, - <double>filtration) + return self.get_ptr().insert(simplex, <double>filtration) def get_simplices(self): """This function returns a generator with simplices and their given @@ -308,17 +292,12 @@ cdef class SimplexTree: .. note:: - Be aware that removing is shifting data in a flat_map - (:func:`initialize_filtration()<gudhi.SimplexTree.initialize_filtration>` to be done). - - .. note:: - The dimension of the simplicial complex may be lower after calling remove_maximal_simplex than it was before. However, - :func:`upper_bound_dimension()<gudhi.SimplexTree.upper_bound_dimension>` + :func:`upper_bound_dimension` method will return the old value, which remains a valid upper bound. If you care, you can call - :func:`dimension()<gudhi.SimplexTree.dimension>` + :func:`dimension` to recompute the exact dimension. """ self.get_ptr().remove_maximal_simplex(simplex) @@ -334,24 +313,14 @@ cdef class SimplexTree: .. note:: - Some simplex tree functions require the filtration to be valid. - prune_above_filtration function is not launching - :func:`initialize_filtration()<gudhi.SimplexTree.initialize_filtration>` - but returns the filtration modification - information. If the complex has changed , please call - :func:`initialize_filtration()<gudhi.SimplexTree.initialize_filtration>` - to recompute it. - - .. note:: - Note that the dimension of the simplicial complex may be lower after calling - :func:`prune_above_filtration()<gudhi.SimplexTree.prune_above_filtration>` + :func:`prune_above_filtration` than it was before. However, - :func:`upper_bound_dimension()<gudhi.SimplexTree.upper_bound_dimension>` + :func:`upper_bound_dimension` will return the old value, which remains a valid upper bound. If you care, you can call - :func:`dimension()<gudhi.SimplexTree.dimension>` + :func:`dimension` method to recompute the exact dimension. """ return self.get_ptr().prune_above_filtration(filtration) @@ -382,22 +351,63 @@ cdef class SimplexTree: :returns: True if any filtration value was modified, False if the filtration was already non-decreasing. :rtype: bool + """ + return self.get_ptr().make_filtration_non_decreasing() + def extend_filtration(self): + """ Extend filtration for computing extended persistence. This function only uses the + filtration values at the 0-dimensional simplices, and computes the extended persistence + diagram induced by the lower-star filtration computed with these values. .. note:: - Some simplex tree functions require the filtration to be valid. - make_filtration_non_decreasing function is not launching - :func:`initialize_filtration()<gudhi.SimplexTree.initialize_filtration>` - but returns the filtration modification - information. If the complex has changed , please call - :func:`initialize_filtration()<gudhi.SimplexTree.initialize_filtration>` - to recompute it. + Note that after calling this function, the filtration + values are actually modified within the Simplex_tree. + The function :func:`extended_persistence` + retrieves the original values. + + .. note:: + + Note that this code creates an extra vertex internally, so you should make sure that + the Simplex_tree does not contain a vertex with the largest possible value (i.e., 4294967295). """ - return self.get_ptr().make_filtration_non_decreasing() + self.get_ptr().compute_extended_filtration() + + def extended_persistence(self, homology_coeff_field=11, min_persistence=0): + """This function retrieves good values for extended persistence, and separate the diagrams + into the Ordinary, Relative, Extended+ and Extended- subdiagrams. + + :param homology_coeff_field: The homology coefficient field. Must be a + prime number. Default value is 11. + :type homology_coeff_field: int. + :param min_persistence: The minimum persistence value (i.e., the absolute value of the difference between the persistence diagram point coordinates) to take into + account (strictly greater than min_persistence). Default value is + 0.0. + Sets min_persistence to -1.0 to see all values. + :type min_persistence: float. + :returns: A list of four persistence diagrams in the format described in :func:`persistence`. The first one is Ordinary, the second one is Relative, the third one is Extended+ and the fourth one is Extended-. See https://link.springer.com/article/10.1007/s10208-008-9027-z and/or section 2.2 in https://link.springer.com/article/10.1007/s10208-017-9370-z for a description of these subtypes. + + .. note:: + + This function should be called only if :func:`extend_filtration` has been called first! + + .. note:: + + The coordinates of the persistence diagram points might be a little different than the + original filtration values due to the internal transformation (scaling to [-2,-1]) that is + performed on these values during the computation of extended persistence. + """ + cdef vector[pair[int, pair[double, double]]] persistence_result + if self.pcohptr != NULL: + del self.pcohptr + self.pcohptr = new Simplex_tree_persistence_interface(self.get_ptr(), False) + self.pcohptr.compute_persistence(homology_coeff_field, -1.) + persistence_result = self.pcohptr.get_persistence() + return self.get_ptr().compute_extended_persistence_subdiagrams(persistence_result, min_persistence) + def persistence(self, homology_coeff_field=11, min_persistence=0, persistence_dim_max = False): - """This function returns the persistence of the simplicial complex. + """This function computes and returns the persistence of the simplicial complex. :param homology_coeff_field: The homology coefficient field. Must be a prime number. Default value is 11. @@ -414,13 +424,32 @@ cdef class SimplexTree: :returns: The persistence of the simplicial complex. :rtype: list of pairs(dimension, pair(birth, death)) """ + self.compute_persistence(homology_coeff_field, min_persistence, persistence_dim_max) + return self.pcohptr.get_persistence() + + def compute_persistence(self, homology_coeff_field=11, min_persistence=0, persistence_dim_max = False): + """This function computes the persistence of the simplicial complex, so it can be accessed through + :func:`persistent_betti_numbers`, :func:`persistence_pairs`, etc. This function is equivalent to :func:`persistence` + when you do not want the list :func:`persistence` returns. + + :param homology_coeff_field: The homology coefficient field. Must be a + prime number. Default value is 11. + :type homology_coeff_field: int. + :param min_persistence: The minimum persistence value to take into + account (strictly greater than min_persistence). Default value is + 0.0. + Sets min_persistence to -1.0 to see all values. + :type min_persistence: float. + :param persistence_dim_max: If true, the persistent homology for the + maximal dimension in the complex is computed. If false, it is + ignored. Default is false. + :type persistence_dim_max: bool + :returns: Nothing. + """ if self.pcohptr != NULL: del self.pcohptr self.pcohptr = new Simplex_tree_persistence_interface(self.get_ptr(), persistence_dim_max) - cdef vector[pair[int, pair[double, double]]] persistence_result - if self.pcohptr != NULL: - persistence_result = self.pcohptr.get_persistence(homology_coeff_field, min_persistence) - return persistence_result + self.pcohptr.compute_persistence(homology_coeff_field, min_persistence) def betti_numbers(self): """This function returns the Betti numbers of the simplicial complex. @@ -429,16 +458,11 @@ cdef class SimplexTree: :rtype: list of int :note: betti_numbers function requires - :func:`persistence()<gudhi.SimplexTree.persistence>` + :func:`compute_persistence` function to be launched first. """ - cdef vector[int] bn_result - if self.pcohptr != NULL: - bn_result = self.pcohptr.betti_numbers() - else: - print("betti_numbers function requires persistence function" - " to be launched first.") - return bn_result + assert self.pcohptr != NULL, "compute_persistence() must be called before betti_numbers()" + return self.pcohptr.betti_numbers() def persistent_betti_numbers(self, from_value, to_value): """This function returns the persistent Betti numbers of the @@ -455,16 +479,11 @@ cdef class SimplexTree: :rtype: list of int :note: persistent_betti_numbers function requires - :func:`persistence()<gudhi.SimplexTree.persistence>` + :func:`compute_persistence` function to be launched first. """ - cdef vector[int] pbn_result - if self.pcohptr != NULL: - pbn_result = self.pcohptr.persistent_betti_numbers(<double>from_value, <double>to_value) - else: - print("persistent_betti_numbers function requires persistence function" - " to be launched first.") - return pbn_result + assert self.pcohptr != NULL, "compute_persistence() must be called before persistent_betti_numbers()" + return self.pcohptr.persistent_betti_numbers(<double>from_value, <double>to_value) def persistence_intervals_in_dimension(self, dimension): """This function returns the persistence intervals of the simplicial @@ -476,16 +495,11 @@ cdef class SimplexTree: :rtype: numpy array of dimension 2 :note: intervals_in_dim function requires - :func:`persistence()<gudhi.SimplexTree.persistence>` + :func:`compute_persistence` function to be launched first. """ - cdef vector[pair[double,double]] intervals_result - if self.pcohptr != NULL: - intervals_result = self.pcohptr.intervals_in_dimension(dimension) - else: - print("intervals_in_dim function requires persistence function" - " to be launched first.") - return np_array(intervals_result) + assert self.pcohptr != NULL, "compute_persistence() must be called before persistence_intervals_in_dimension()" + return np_array(self.pcohptr.intervals_in_dimension(dimension)) def persistence_pairs(self): """This function returns a list of persistence birth and death simplices pairs. @@ -494,33 +508,22 @@ cdef class SimplexTree: :rtype: list of pair of list of int :note: persistence_pairs function requires - :func:`persistence()<gudhi.SimplexTree.persistence>` + :func:`compute_persistence` function to be launched first. """ - cdef vector[pair[vector[int],vector[int]]] persistence_pairs_result - if self.pcohptr != NULL: - persistence_pairs_result = self.pcohptr.persistence_pairs() - else: - print("persistence_pairs function requires persistence function" - " to be launched first.") - return persistence_pairs_result + assert self.pcohptr != NULL, "compute_persistence() must be called before persistence_pairs()" + return self.pcohptr.persistence_pairs() - def write_persistence_diagram(self, persistence_file=''): + def write_persistence_diagram(self, persistence_file): """This function writes the persistence intervals of the simplicial complex in a user given file name. - :param persistence_file: The specific dimension. + :param persistence_file: Name of the file. :type persistence_file: string. :note: intervals_in_dim function requires - :func:`persistence()<gudhi.SimplexTree.persistence>` + :func:`compute_persistence` function to be launched first. """ - if self.pcohptr != NULL: - if persistence_file != '': - self.pcohptr.write_output_diagram(persistence_file.encode('utf-8')) - else: - print("persistence_file must be specified") - else: - print("intervals_in_dim function requires persistence function" - " to be launched first.") + assert self.pcohptr != NULL, "compute_persistence() must be called before write_persistence_diagram()" + self.pcohptr.write_output_diagram(persistence_file.encode('utf-8')) diff --git a/src/python/gudhi/wasserstein/__init__.py b/src/python/gudhi/wasserstein/__init__.py new file mode 100644 index 00000000..ed225ba4 --- /dev/null +++ b/src/python/gudhi/wasserstein/__init__.py @@ -0,0 +1 @@ +from .wasserstein import wasserstein_distance diff --git a/src/python/gudhi/wasserstein/barycenter.py b/src/python/gudhi/wasserstein/barycenter.py new file mode 100644 index 00000000..de7aea81 --- /dev/null +++ b/src/python/gudhi/wasserstein/barycenter.py @@ -0,0 +1,159 @@ +# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. +# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. +# Author(s): Theo Lacombe +# +# Copyright (C) 2019 Inria +# +# Modification(s): +# - YYYY/MM Author: Description of the modification + + +import ot +import numpy as np +import scipy.spatial.distance as sc + +from gudhi.wasserstein import wasserstein_distance + + +def _mean(x, m): + ''' + :param x: a list of 2D-points, off diagonal, x_0... x_{k-1} + :param m: total amount of points taken into account, + that is we have (m-k) copies of diagonal + :returns: the weighted mean of x with (m-k) copies of the diagonal + ''' + k = len(x) + if k > 0: + w = np.mean(x, axis=0) + w_delta = (w[0] + w[1]) / 2 * np.ones(2) + return (k * w + (m-k) * w_delta) / m + else: + return np.array([0, 0]) + + +def lagrangian_barycenter(pdiagset, init=None, verbose=False): + ''' + :param pdiagset: a list of ``numpy.array`` of shape `(n x 2)` + (`n` can variate), encoding a set of + persistence diagrams with only finite coordinates. + :param init: The initial value for barycenter estimate. + If ``None``, init is made on a random diagram from the dataset. + Otherwise, it can be an ``int`` + (then initialization is made on ``pdiagset[init]``) + or a `(n x 2)` ``numpy.array`` enconding + a persistence diagram with `n` points. + :type init: ``int``, or (n x 2) ``np.array`` + :param verbose: if ``True``, returns additional information about the + barycenter. + :type verbose: boolean + :returns: If not verbose (default), a ``numpy.array`` encoding + the barycenter estimate of pdiagset + (local minimum of the energy function). + If ``pdiagset`` is empty, returns ``None``. + If verbose, returns a couple ``(Y, log)`` + where ``Y`` is the barycenter estimate, + and ``log`` is a ``dict`` that contains additional informations: + + - `"groupings"`, a list of list of pairs ``(i,j)``. + Namely, ``G[k] = [...(i, j)...]``, where ``(i,j)`` indicates + that ``pdiagset[k][i]`` is matched to ``Y[j]`` + if ``i = -1`` or ``j = -1``, it means they + represent the diagonal. + + - `"energy"`, ``float`` representing the Frechet energy value obtained. + It is the mean of squared distances of observations to the output. + + - `"nb_iter"`, ``int`` number of iterations performed before convergence of the algorithm. + ''' + X = pdiagset # to shorten notations, not a copy + m = len(X) # number of diagrams we are averaging + if m == 0: + print("Warning: computing barycenter of empty diag set. Returns None") + return None + + # store the number of off-diagonal point for each of the X_i + nb_off_diag = np.array([len(X_i) for X_i in X]) + # Initialisation of barycenter + if init is None: + i0 = np.random.randint(m) # Index of first state for the barycenter + Y = X[i0].copy() + else: + if type(init)==int: + Y = X[init].copy() + else: + Y = init.copy() + + nb_iter = 0 + + converged = False # stoping criterion + while not converged: + nb_iter += 1 + K = len(Y) # current nb of points in Y (some might be on diagonal) + G = np.full((K, m), -1, dtype=int) # will store for each j, the (index) + # point matched in each other diagram + #(might be the diagonal). + # that is G[j, i] = k <=> y_j is matched to + # x_k in the diagram i-th diagram X[i] + updated_points = np.zeros((K, 2)) # will store the new positions of + # the points of Y. + # If points disappear, there thrown + # on [0,0] by default. + new_created_points = [] # will store potential new points. + + # Step 1 : compute optimal matching (Y, X_i) for each X_i + # and create new points in Y if needed + for i in range(m): + _, indices = wasserstein_distance(Y, X[i], matching=True, order=2., internal_p=2.) + for y_j, x_i_j in indices: + if y_j >= 0: # we matched an off diagonal point to x_i_j... + if x_i_j >= 0: # ...which is also an off-diagonal point. + G[y_j, i] = x_i_j + else: # ...which is a diagonal point + G[y_j, i] = -1 # -1 stands for the diagonal (mask) + else: # We matched a diagonal point to x_i_j... + if x_i_j >= 0: # which is a off-diag point ! + # need to create new point in Y + new_y = _mean(np.array([X[i][x_i_j]]), m) + # Average this point with (m-1) copies of Delta + new_created_points.append(new_y) + + # Step 2 : Update current point position thanks to groupings computed + to_delete = [] + for j in range(K): + matched_points = [X[i][G[j, i]] for i in range(m) if G[j, i] > -1] + new_y_j = _mean(matched_points, m) + if not np.array_equal(new_y_j, np.array([0,0])): + updated_points[j] = new_y_j + else: # this points is no longer of any use. + to_delete.append(j) + # we remove the point to be deleted now. + updated_points = np.delete(updated_points, to_delete, axis=0) + + # we cannot converge if there have been new created points. + if new_created_points: + Y = np.concatenate((updated_points, new_created_points)) + else: + # Step 3 : we check convergence + if np.array_equal(updated_points, Y): + converged = True + Y = updated_points + + + if verbose: + groupings = [] + energy = 0 + log = {} + n_y = len(Y) + for i in range(m): + cost, edges = wasserstein_distance(Y, X[i], matching=True, order=2., internal_p=2.) + groupings.append(edges) + energy += cost + log["groupings"] = groupings + energy = energy/m + log["energy"] = energy + log["nb_iter"] = nb_iter + + return Y, log + else: + return Y + diff --git a/src/python/gudhi/wasserstein.py b/src/python/gudhi/wasserstein/wasserstein.py index 3dd993f9..efc851a0 100644 --- a/src/python/gudhi/wasserstein.py +++ b/src/python/gudhi/wasserstein/wasserstein.py @@ -9,11 +9,14 @@ import numpy as np import scipy.spatial.distance as sc + try: import ot except ImportError: print("POT (Python Optimal Transport) package is not installed. Try to run $ conda install -c conda-forge pot ; or $ pip install POT") + +# Currently unused, but Théo says it is likely to be used again. def _proj_on_diag(X): ''' :param X: (n x 2) array encoding the points of a persistent diagram. @@ -23,28 +26,36 @@ def _proj_on_diag(X): return np.array([Z , Z]).T -def _build_dist_matrix(X, Y, order=2., internal_p=2.): +def _dist_to_diag(X, internal_p): + ''' + :param X: (n x 2) array encoding the points of a persistent diagram. + :param internal_p: Ground metric (i.e. norm L^p). + :returns: (n) array encoding the (respective orthogonal) distances of the points to the diagonal + + .. note:: + Assumes that the points are above the diagonal. + ''' + return (X[:, 1] - X[:, 0]) * 2 ** (1.0 / internal_p - 1) + + +def _build_dist_matrix(X, Y, order, internal_p): ''' :param X: (n x 2) numpy.array encoding the (points of the) first diagram. :param Y: (m x 2) numpy.array encoding the second diagram. :param order: exponent for the Wasserstein metric. :param internal_p: Ground metric (i.e. norm L^p). - :returns: (n+1) x (m+1) np.array encoding the cost matrix C. - For 0 <= i < n, 0 <= j < m, C[i,j] encodes the distance between X[i] and Y[j], - while C[i, m] (resp. C[n, j]) encodes the distance (to the p) between X[i] (resp Y[j]) + :returns: (n+1) x (m+1) np.array encoding the cost matrix C. + For 0 <= i < n, 0 <= j < m, C[i,j] encodes the distance between X[i] and Y[j], + while C[i, m] (resp. C[n, j]) encodes the distance (to the p) between X[i] (resp Y[j]) and its orthogonal projection onto the diagonal. note also that C[n, m] = 0 (it costs nothing to move from the diagonal to the diagonal). ''' - Xdiag = _proj_on_diag(X) - Ydiag = _proj_on_diag(Y) + Cxd = _dist_to_diag(X, internal_p)**order + Cdy = _dist_to_diag(Y, internal_p)**order if np.isinf(internal_p): C = sc.cdist(X,Y, metric='chebyshev')**order - Cxd = np.linalg.norm(X - Xdiag, ord=internal_p, axis=1)**order - Cdy = np.linalg.norm(Y - Ydiag, ord=internal_p, axis=1)**order else: C = sc.cdist(X,Y, metric='minkowski', p=internal_p)**order - Cxd = np.linalg.norm(X - Xdiag, ord=internal_p, axis=1)**order - Cdy = np.linalg.norm(Y - Ydiag, ord=internal_p, axis=1)**order Cf = np.hstack((C, Cxd[:,None])) Cdy = np.append(Cdy, 0) @@ -58,24 +69,23 @@ def _perstot(X, order, internal_p): :param X: (n x 2) numpy.array (points of a given diagram). :param order: exponent for Wasserstein. Default value is 2. :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); Default value is 2 (Euclidean norm). - :returns: float, the total persistence of the diagram (that is, its distance to the empty diagram). + :returns: float, the total persistence of the diagram (that is, its distance to the empty diagram). ''' - Xdiag = _proj_on_diag(X) - return (np.sum(np.linalg.norm(X - Xdiag, ord=internal_p, axis=1)**order))**(1./order) + return np.linalg.norm(_dist_to_diag(X, internal_p), ord=order) def wasserstein_distance(X, Y, matching=False, order=2., internal_p=2.): ''' - :param X: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points + :param X: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points (i.e. with infinite coordinate). :param Y: (m x 2) numpy.array encoding the second diagram. :param matching: if True, computes and returns the optimal matching between X and Y, encoded as a (n x 2) np.array [...[i,j]...], meaning the i-th point in X is matched to the j-th point in Y, with the convention (-1) represents the diagonal. :param order: exponent for Wasserstein; Default value is 2. - :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); + :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); Default value is 2 (Euclidean norm). - :returns: the Wasserstein distance of order q (1 <= q < infinity) between persistence diagrams with + :returns: the Wasserstein distance of order q (1 <= q < infinity) between persistence diagrams with respect to the internal_p-norm as ground metric. If matching is set to True, also returns the optimal matching between X and Y. ''' diff --git a/src/python/include/Alpha_complex_interface.h b/src/python/include/Alpha_complex_interface.h index 8614eee3..40de88f3 100644 --- a/src/python/include/Alpha_complex_interface.h +++ b/src/python/include/Alpha_complex_interface.h @@ -58,7 +58,6 @@ class Alpha_complex_interface { void create_simplex_tree(Simplex_tree_interface<>* simplex_tree, double max_alpha_square) { alpha_complex_->create_complex(*simplex_tree, max_alpha_square); - simplex_tree->initialize_filtration(); } private: diff --git a/src/python/include/Euclidean_strong_witness_complex_interface.h b/src/python/include/Euclidean_strong_witness_complex_interface.h index c1c72737..f94c51ef 100644 --- a/src/python/include/Euclidean_strong_witness_complex_interface.h +++ b/src/python/include/Euclidean_strong_witness_complex_interface.h @@ -50,12 +50,10 @@ class Euclidean_strong_witness_complex_interface { void create_simplex_tree(Gudhi::Simplex_tree<>* simplex_tree, double max_alpha_square, std::size_t limit_dimension) { witness_complex_->create_complex(*simplex_tree, max_alpha_square, limit_dimension); - simplex_tree->initialize_filtration(); } void create_simplex_tree(Gudhi::Simplex_tree<>* simplex_tree, double max_alpha_square) { witness_complex_->create_complex(*simplex_tree, max_alpha_square); - simplex_tree->initialize_filtration(); } std::vector<double> get_point(unsigned vh) { diff --git a/src/python/include/Euclidean_witness_complex_interface.h b/src/python/include/Euclidean_witness_complex_interface.h index 5d7dbdc2..4411ae79 100644 --- a/src/python/include/Euclidean_witness_complex_interface.h +++ b/src/python/include/Euclidean_witness_complex_interface.h @@ -49,12 +49,10 @@ class Euclidean_witness_complex_interface { void create_simplex_tree(Gudhi::Simplex_tree<>* simplex_tree, double max_alpha_square, std::size_t limit_dimension) { witness_complex_->create_complex(*simplex_tree, max_alpha_square, limit_dimension); - simplex_tree->initialize_filtration(); } void create_simplex_tree(Gudhi::Simplex_tree<>* simplex_tree, double max_alpha_square) { witness_complex_->create_complex(*simplex_tree, max_alpha_square); - simplex_tree->initialize_filtration(); } std::vector<double> get_point(unsigned vh) { diff --git a/src/python/include/Nerve_gic_interface.h b/src/python/include/Nerve_gic_interface.h index 5e7f8ae6..ab14c318 100644 --- a/src/python/include/Nerve_gic_interface.h +++ b/src/python/include/Nerve_gic_interface.h @@ -29,7 +29,6 @@ class Nerve_gic_interface : public Cover_complex<std::vector<double>> { public: void create_simplex_tree(Simplex_tree_interface<>* simplex_tree) { create_complex(*simplex_tree); - simplex_tree->initialize_filtration(); } void set_cover_from_Euclidean_Voronoi(int m) { set_cover_from_Voronoi(Gudhi::Euclidean_distance(), m); diff --git a/src/python/include/Persistent_cohomology_interface.h b/src/python/include/Persistent_cohomology_interface.h index 8c79e6f3..e2b69a52 100644 --- a/src/python/include/Persistent_cohomology_interface.h +++ b/src/python/include/Persistent_cohomology_interface.h @@ -23,6 +23,7 @@ template<class FilteredComplex> class Persistent_cohomology_interface : public persistent_cohomology::Persistent_cohomology<FilteredComplex, persistent_cohomology::Field_Zp> { private: + typedef persistent_cohomology::Persistent_cohomology<FilteredComplex, persistent_cohomology::Field_Zp> Base; /* * Compare two intervals by dimension, then by length. */ @@ -42,26 +43,20 @@ persistent_cohomology::Persistent_cohomology<FilteredComplex, persistent_cohomol }; public: - Persistent_cohomology_interface(FilteredComplex* stptr) - : persistent_cohomology::Persistent_cohomology<FilteredComplex, persistent_cohomology::Field_Zp>(*stptr), - stptr_(stptr) { } - - Persistent_cohomology_interface(FilteredComplex* stptr, bool persistence_dim_max) - : persistent_cohomology::Persistent_cohomology<FilteredComplex, - persistent_cohomology::Field_Zp>(*stptr, persistence_dim_max), + Persistent_cohomology_interface(FilteredComplex* stptr, bool persistence_dim_max=false) + : Base(*stptr, persistence_dim_max), stptr_(stptr) { } - std::vector<std::pair<int, std::pair<double, double>>> get_persistence(int homology_coeff_field, - double min_persistence) { - persistent_cohomology::Persistent_cohomology<FilteredComplex, - persistent_cohomology::Field_Zp>::init_coefficients(homology_coeff_field); - persistent_cohomology::Persistent_cohomology<FilteredComplex, - persistent_cohomology::Field_Zp>::compute_persistent_cohomology(min_persistence); + // TODO: move to the constructors? + void compute_persistence(int homology_coeff_field, double min_persistence) { + Base::init_coefficients(homology_coeff_field); + Base::compute_persistent_cohomology(min_persistence); + } + std::vector<std::pair<int, std::pair<double, double>>> get_persistence() { // Custom sort and output persistence cmp_intervals_by_dim_then_length cmp(stptr_); - auto persistent_pairs = persistent_cohomology::Persistent_cohomology<FilteredComplex, - persistent_cohomology::Field_Zp>::get_persistent_pairs(); + auto persistent_pairs = Base::get_persistent_pairs(); std::sort(std::begin(persistent_pairs), std::end(persistent_pairs), cmp); std::vector<std::pair<int, std::pair<double, double>>> persistence; @@ -74,8 +69,7 @@ persistent_cohomology::Persistent_cohomology<FilteredComplex, persistent_cohomol } std::vector<std::pair<std::vector<int>, std::vector<int>>> persistence_pairs() { - auto pairs = persistent_cohomology::Persistent_cohomology<FilteredComplex, - persistent_cohomology::Field_Zp>::get_persistent_pairs(); + auto pairs = Base::get_persistent_pairs(); std::vector<std::pair<std::vector<int>, std::vector<int>>> persistence_pairs; persistence_pairs.reserve(pairs.size()); diff --git a/src/python/include/Rips_complex_interface.h b/src/python/include/Rips_complex_interface.h index a66b0e5b..d98b0226 100644 --- a/src/python/include/Rips_complex_interface.h +++ b/src/python/include/Rips_complex_interface.h @@ -53,7 +53,6 @@ class Rips_complex_interface { rips_complex_->create_complex(*simplex_tree, dim_max); else sparse_rips_complex_->create_complex(*simplex_tree, dim_max); - simplex_tree->initialize_filtration(); } private: diff --git a/src/python/include/Simplex_tree_interface.h b/src/python/include/Simplex_tree_interface.h index 4a7062d6..56d7c41d 100644 --- a/src/python/include/Simplex_tree_interface.h +++ b/src/python/include/Simplex_tree_interface.h @@ -16,8 +16,6 @@ #include <gudhi/Simplex_tree.h> #include <gudhi/Points_off_io.h> -#include "Persistent_cohomology_interface.h" - #include <iostream> #include <vector> #include <utility> // std::pair @@ -37,18 +35,25 @@ class Simplex_tree_interface : public Simplex_tree<SimplexTreeOptions> { using Filtered_simplices = std::vector<Simplex_and_filtration>; using Skeleton_simplex_iterator = typename Base::Skeleton_simplex_iterator; using Complex_simplex_iterator = typename Base::Complex_simplex_iterator; + using Extended_filtration_data = typename Base::Extended_filtration_data; public: - bool find_simplex(const Simplex& vh) { - return (Base::find(vh) != Base::null_simplex()); + + Extended_filtration_data efd; + + bool find_simplex(const Simplex& simplex) { + return (Base::find(simplex) != Base::null_simplex()); } - void assign_simplex_filtration(const Simplex& vh, Filtration_value filtration) { - Base::assign_filtration(Base::find(vh), filtration); + void assign_simplex_filtration(const Simplex& simplex, Filtration_value filtration) { + Base::assign_filtration(Base::find(simplex), filtration); + Base::clear_filtration(); } bool insert(const Simplex& simplex, Filtration_value filtration = 0) { Insertion_result result = Base::insert_simplex_and_subfaces(simplex, filtration); + if (result.first != Base::null_simplex()) + Base::clear_filtration(); return (result.second); } @@ -82,7 +87,7 @@ class Simplex_tree_interface : public Simplex_tree<SimplexTreeOptions> { void remove_maximal_simplex(const Simplex& simplex) { Base::remove_maximal_simplex(Base::find(simplex)); - Base::initialize_filtration(); + Base::clear_filtration(); } Simplex_and_filtration get_simplex_and_filtration(Simplex_handle f_simplex) { @@ -117,9 +122,39 @@ class Simplex_tree_interface : public Simplex_tree<SimplexTreeOptions> { return cofaces; } - void create_persistence(Gudhi::Persistent_cohomology_interface<Base>* pcoh) { - Base::initialize_filtration(); - pcoh = new Gudhi::Persistent_cohomology_interface<Base>(*this); + void compute_extended_filtration() { + this->efd = this->extend_filtration(); + return; + } + + std::vector<std::vector<std::pair<int, std::pair<Filtration_value, Filtration_value>>>> compute_extended_persistence_subdiagrams(const std::vector<std::pair<int, std::pair<Filtration_value, Filtration_value>>>& dgm, Filtration_value min_persistence){ + std::vector<std::vector<std::pair<int, std::pair<Filtration_value, Filtration_value>>>> new_dgm(4); + for (unsigned int i = 0; i < dgm.size(); i++){ + std::pair<Filtration_value, Extended_simplex_type> px = this->decode_extended_filtration(dgm[i].second.first, this->efd); + std::pair<Filtration_value, Extended_simplex_type> py = this->decode_extended_filtration(dgm[i].second.second, this->efd); + std::pair<int, std::pair<Filtration_value, Filtration_value>> pd_point = std::make_pair(dgm[i].first, std::make_pair(px.first, py.first)); + if(std::abs(px.first - py.first) > min_persistence){ + //Ordinary + if (px.second == Extended_simplex_type::UP && py.second == Extended_simplex_type::UP){ + new_dgm[0].push_back(pd_point); + } + // Relative + else if (px.second == Extended_simplex_type::DOWN && py.second == Extended_simplex_type::DOWN){ + new_dgm[1].push_back(pd_point); + } + else{ + // Extended+ + if (px.first < py.first){ + new_dgm[2].push_back(pd_point); + } + //Extended- + else{ + new_dgm[3].push_back(pd_point); + } + } + } + } + return new_dgm; } // Iterator over the simplex tree diff --git a/src/python/include/Strong_witness_complex_interface.h b/src/python/include/Strong_witness_complex_interface.h index cda5b514..e9ab0c7b 100644 --- a/src/python/include/Strong_witness_complex_interface.h +++ b/src/python/include/Strong_witness_complex_interface.h @@ -41,13 +41,11 @@ class Strong_witness_complex_interface { void create_simplex_tree(Simplex_tree_interface<>* simplex_tree, double max_alpha_square, std::size_t limit_dimension) { witness_complex_->create_complex(*simplex_tree, max_alpha_square, limit_dimension); - simplex_tree->initialize_filtration(); } void create_simplex_tree(Simplex_tree_interface<>* simplex_tree, double max_alpha_square) { witness_complex_->create_complex(*simplex_tree, max_alpha_square); - simplex_tree->initialize_filtration(); } private: diff --git a/src/python/include/Tangential_complex_interface.h b/src/python/include/Tangential_complex_interface.h index 698226cc..b1afce94 100644 --- a/src/python/include/Tangential_complex_interface.h +++ b/src/python/include/Tangential_complex_interface.h @@ -90,7 +90,6 @@ class Tangential_complex_interface { void create_simplex_tree(Simplex_tree<>* simplex_tree) { tangential_complex_->create_complex<Gudhi::Simplex_tree<Gudhi::Simplex_tree_options_full_featured>>(*simplex_tree); - simplex_tree->initialize_filtration(); } void set_max_squared_edge_length(double max_squared_edge_length) { diff --git a/src/python/include/Witness_complex_interface.h b/src/python/include/Witness_complex_interface.h index 45e14253..76947e53 100644 --- a/src/python/include/Witness_complex_interface.h +++ b/src/python/include/Witness_complex_interface.h @@ -41,13 +41,11 @@ class Witness_complex_interface { void create_simplex_tree(Simplex_tree_interface<>* simplex_tree, double max_alpha_square, std::size_t limit_dimension) { witness_complex_->create_complex(*simplex_tree, max_alpha_square, limit_dimension); - simplex_tree->initialize_filtration(); } void create_simplex_tree(Simplex_tree_interface<>* simplex_tree, double max_alpha_square) { witness_complex_->create_complex(*simplex_tree, max_alpha_square); - simplex_tree->initialize_filtration(); } private: diff --git a/src/python/test/test_cover_complex.py b/src/python/test/test_cover_complex.py index 32bc5a26..260f6a5c 100755 --- a/src/python/test/test_cover_complex.py +++ b/src/python/test/test_cover_complex.py @@ -9,6 +9,7 @@ """ from gudhi import CoverComplex +import pytest __author__ = "Vincent Rouvreau" __copyright__ = "Copyright (C) 2018 Inria" @@ -24,7 +25,8 @@ def test_empty_constructor(): def test_non_existing_file_read(): # Try to open a non existing file cover = CoverComplex() - assert cover.read_point_cloud("pouetpouettralala.toubiloubabdou") == False + with pytest.raises(FileNotFoundError): + cover.read_point_cloud("pouetpouettralala.toubiloubabdou") def test_files_creation(): diff --git a/src/python/test/test_cubical_complex.py b/src/python/test/test_cubical_complex.py index 8c1b2600..fce4875c 100755 --- a/src/python/test/test_cubical_complex.py +++ b/src/python/test/test_cubical_complex.py @@ -10,6 +10,7 @@ from gudhi import CubicalComplex, PeriodicCubicalComplex import numpy as np +import pytest __author__ = "Vincent Rouvreau" __copyright__ = "Copyright (C) 2016 Inria" @@ -25,9 +26,8 @@ def test_empty_constructor(): def test_non_existing_perseus_file_constructor(): # Try to open a non existing file - cub = CubicalComplex(perseus_file="pouetpouettralala.toubiloubabdou") - assert cub.__is_defined() == False - assert cub.__is_persistence_defined() == False + with pytest.raises(FileNotFoundError): + cub = CubicalComplex(perseus_file="pouetpouettralala.toubiloubabdou") def test_dimension_or_perseus_file_constructor(): diff --git a/src/python/test/test_dtm.py b/src/python/test/test_dtm.py new file mode 100755 index 00000000..bff4c267 --- /dev/null +++ b/src/python/test/test_dtm.py @@ -0,0 +1,68 @@ +""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. + See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. + Author(s): Marc Glisse + + Copyright (C) 2020 Inria + + Modification(s): + - YYYY/MM Author: Description of the modification +""" + +from gudhi.point_cloud.dtm import DistanceToMeasure +import numpy +import pytest +import torch + + +def test_dtm_compare_euclidean(): + pts = numpy.random.rand(1000, 4) + k = 6 + dtm = DistanceToMeasure(k, implementation="ckdtree") + r0 = dtm.fit_transform(pts) + dtm = DistanceToMeasure(k, implementation="sklearn") + r1 = dtm.fit_transform(pts) + assert r1 == pytest.approx(r0) + dtm = DistanceToMeasure(k, implementation="sklearn", algorithm="brute") + r2 = dtm.fit_transform(pts) + assert r2 == pytest.approx(r0) + dtm = DistanceToMeasure(k, implementation="hnsw") + r3 = dtm.fit_transform(pts) + assert r3 == pytest.approx(r0, rel=0.1) + from scipy.spatial.distance import cdist + + d = cdist(pts, pts) + dtm = DistanceToMeasure(k, metric="precomputed") + r4 = dtm.fit_transform(d) + assert r4 == pytest.approx(r0) + dtm = DistanceToMeasure(k, metric="precomputed", n_jobs=2) + r4b = dtm.fit_transform(d) + assert r4b == pytest.approx(r0) + dtm = DistanceToMeasure(k, implementation="keops") + r5 = dtm.fit_transform(pts) + assert r5 == pytest.approx(r0) + pts2 = torch.tensor(pts, requires_grad=True) + assert pts2.grad is None + dtm = DistanceToMeasure(k, implementation="keops", enable_autodiff=True) + r6 = dtm.fit_transform(pts2) + assert r6.detach().numpy() == pytest.approx(r0) + r6.sum().backward() + assert not torch.isnan(pts2.grad).any() + pts2 = torch.tensor(pts, requires_grad=True) + assert pts2.grad is None + dtm = DistanceToMeasure(k, implementation="ckdtree", enable_autodiff=True) + r7 = dtm.fit_transform(pts2) + assert r7.detach().numpy() == pytest.approx(r0) + r7.sum().backward() + assert not torch.isnan(pts2.grad).any() + + +def test_dtm_precomputed(): + dist = numpy.array([[1.0, 3, 8], [1, 5, 5], [0, 2, 3]]) + dtm = DistanceToMeasure(2, q=1, metric="neighbors") + r = dtm.fit_transform(dist) + assert r == pytest.approx([2.0, 3, 1]) + + dist = numpy.array([[2.0, 2], [0, 1], [3, 4]]) + dtm = DistanceToMeasure(2, q=2, metric="neighbors") + r = dtm.fit_transform(dist) + assert r == pytest.approx([2.0, 0.707, 3.5355], rel=0.01) diff --git a/src/python/test/test_knn.py b/src/python/test/test_knn.py new file mode 100755 index 00000000..a87ec212 --- /dev/null +++ b/src/python/test/test_knn.py @@ -0,0 +1,130 @@ +""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. + See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. + Author(s): Marc Glisse + + Copyright (C) 2020 Inria + + Modification(s): + - YYYY/MM Author: Description of the modification +""" + +from gudhi.point_cloud.knn import KNearestNeighbors +import numpy as np +import pytest + + +def test_knn_explicit(): + base = np.array([[1.0, 1], [1, 2], [4, 2], [4, 3]]) + query = np.array([[1.0, 1], [2, 2], [4, 4]]) + knn = KNearestNeighbors(2, metric="manhattan", return_distance=True, return_index=True) + knn.fit(base) + r = knn.transform(query) + assert r[0] == pytest.approx(np.array([[0, 1], [1, 0], [3, 2]])) + assert r[1] == pytest.approx(np.array([[0.0, 1], [1, 2], [1, 2]])) + + knn = KNearestNeighbors(2, metric="chebyshev", return_distance=True, return_index=False) + knn.fit(base) + r = knn.transform(query) + assert r == pytest.approx(np.array([[0.0, 1], [1, 1], [1, 2]])) + r = ( + KNearestNeighbors(2, metric="chebyshev", return_distance=True, return_index=False, implementation="keops") + .fit(base) + .transform(query) + ) + assert r == pytest.approx(np.array([[0.0, 1], [1, 1], [1, 2]])) + r = ( + KNearestNeighbors(2, metric="chebyshev", return_distance=True, return_index=False, implementation="keops", enable_autodiff=True) + .fit(base) + .transform(query) + ) + assert r == pytest.approx(np.array([[0.0, 1], [1, 1], [1, 2]])) + + knn = KNearestNeighbors(2, metric="minkowski", p=3, return_distance=False, return_index=True) + knn.fit(base) + r = knn.transform(query) + assert np.array_equal(r, [[0, 1], [1, 0], [3, 2]]) + r = ( + KNearestNeighbors(2, metric="minkowski", p=3, return_distance=False, return_index=True, implementation="keops") + .fit(base) + .transform(query) + ) + assert np.array_equal(r, [[0, 1], [1, 0], [3, 2]]) + + dist = np.array([[0.0, 3, 8], [1, 0, 5], [1, 2, 0]]) + knn = KNearestNeighbors(2, metric="precomputed", return_index=True, return_distance=False) + r = knn.fit_transform(dist) + assert np.array_equal(r, [[0, 1], [1, 0], [2, 0]]) + knn = KNearestNeighbors(2, metric="precomputed", return_index=True, return_distance=True, sort_results=True) + r = knn.fit_transform(dist) + assert np.array_equal(r[0], [[0, 1], [1, 0], [2, 0]]) + assert np.array_equal(r[1], [[0, 3], [0, 1], [0, 1]]) + # Second time in parallel + knn = KNearestNeighbors(2, metric="precomputed", return_index=True, return_distance=False, n_jobs=2, sort_results=True) + r = knn.fit_transform(dist) + assert np.array_equal(r, [[0, 1], [1, 0], [2, 0]]) + knn = KNearestNeighbors(2, metric="precomputed", return_index=True, return_distance=True, n_jobs=2) + r = knn.fit_transform(dist) + assert np.array_equal(r[0], [[0, 1], [1, 0], [2, 0]]) + assert np.array_equal(r[1], [[0, 3], [0, 1], [0, 1]]) + + +def test_knn_compare(): + base = np.array([[1.0, 1], [1, 2], [4, 2], [4, 3]]) + query = np.array([[1.0, 1], [2, 2], [4, 4]]) + r0 = ( + KNearestNeighbors(2, implementation="ckdtree", return_index=True, return_distance=False) + .fit(base) + .transform(query) + ) + r1 = ( + KNearestNeighbors(2, implementation="sklearn", return_index=True, return_distance=False) + .fit(base) + .transform(query) + ) + r2 = ( + KNearestNeighbors(2, implementation="hnsw", return_index=True, return_distance=False).fit(base).transform(query) + ) + r3 = ( + KNearestNeighbors(2, implementation="keops", return_index=True, return_distance=False) + .fit(base) + .transform(query) + ) + assert np.array_equal(r0, r1) and np.array_equal(r0, r2) and np.array_equal(r0, r3) + + r0 = ( + KNearestNeighbors(2, implementation="ckdtree", return_index=True, return_distance=True) + .fit(base) + .transform(query) + ) + r1 = ( + KNearestNeighbors(2, implementation="sklearn", return_index=True, return_distance=True) + .fit(base) + .transform(query) + ) + r2 = KNearestNeighbors(2, implementation="hnsw", return_index=True, return_distance=True).fit(base).transform(query) + r3 = ( + KNearestNeighbors(2, implementation="keops", return_index=True, return_distance=True).fit(base).transform(query) + ) + assert np.array_equal(r0[0], r1[0]) and np.array_equal(r0[0], r2[0]) and np.array_equal(r0[0], r3[0]) + d0 = pytest.approx(r0[1]) + assert r1[1] == d0 and r2[1] == d0 and r3[1] == d0 + + +def test_knn_nop(): + # This doesn't look super useful... + p = np.array([[0.0]]) + assert None is KNearestNeighbors( + k=1, return_index=False, return_distance=False, implementation="sklearn" + ).fit_transform(p) + assert None is KNearestNeighbors( + k=1, return_index=False, return_distance=False, implementation="ckdtree" + ).fit_transform(p) + assert None is KNearestNeighbors( + k=1, return_index=False, return_distance=False, implementation="hnsw", ef=5 + ).fit_transform(p) + assert None is KNearestNeighbors( + k=1, return_index=False, return_distance=False, implementation="keops" + ).fit_transform(p) + assert None is KNearestNeighbors( + k=1, return_index=False, return_distance=False, metric="precomputed" + ).fit_transform(p) diff --git a/src/python/test/test_simplex_tree.py b/src/python/test/test_simplex_tree.py index f7848379..2137d822 100755 --- a/src/python/test/test_simplex_tree.py +++ b/src/python/test/test_simplex_tree.py @@ -9,6 +9,7 @@ """ from gudhi import SimplexTree +import pytest __author__ = "Vincent Rouvreau" __copyright__ = "Copyright (C) 2016 Inria" @@ -45,7 +46,6 @@ def test_insertion(): assert st.find([2, 3]) == False # filtration test - st.initialize_filtration() assert st.filtration([0, 1, 2]) == 4.0 assert st.filtration([0, 2]) == 4.0 assert st.filtration([1, 2]) == 4.0 @@ -92,7 +92,6 @@ def test_insertion(): assert st.find([1]) == True assert st.find([2]) == True - st.initialize_filtration() assert st.persistence(persistence_dim_max=True) == [ (1, (4.0, float("inf"))), (0, (0.0, float("inf"))), @@ -150,7 +149,6 @@ def test_expansion(): st.expansion(3) assert st.num_vertices() == 7 assert st.num_simplices() == 22 - st.initialize_filtration() assert list(st.get_filtration()) == [ ([2], 0.1), @@ -250,6 +248,87 @@ def test_make_filtration_non_decreasing(): assert st.filtration([3, 4]) == 2.0 assert st.filtration([4, 5]) == 2.0 +def test_extend_filtration(): + + # Inserted simplex: + # 5 4 + # o o + # / \ / + # o o + # /2\ /3 + # o o + # 1 0 + + st = SimplexTree() + st.insert([0,2]) + st.insert([1,2]) + st.insert([0,3]) + st.insert([2,5]) + st.insert([3,4]) + st.insert([3,5]) + st.assign_filtration([0], 1.) + st.assign_filtration([1], 2.) + st.assign_filtration([2], 3.) + st.assign_filtration([3], 4.) + st.assign_filtration([4], 5.) + st.assign_filtration([5], 6.) + + assert list(st.get_filtration()) == [ + ([0, 2], 0.0), + ([1, 2], 0.0), + ([0, 3], 0.0), + ([3, 4], 0.0), + ([2, 5], 0.0), + ([3, 5], 0.0), + ([0], 1.0), + ([1], 2.0), + ([2], 3.0), + ([3], 4.0), + ([4], 5.0), + ([5], 6.0) + ] + + st.extend_filtration() + + assert list(st.get_filtration()) == [ + ([6], -3.0), + ([0], -2.0), + ([1], -1.8), + ([2], -1.6), + ([0, 2], -1.6), + ([1, 2], -1.6), + ([3], -1.4), + ([0, 3], -1.4), + ([4], -1.2), + ([3, 4], -1.2), + ([5], -1.0), + ([2, 5], -1.0), + ([3, 5], -1.0), + ([5, 6], 1.0), + ([4, 6], 1.2), + ([3, 6], 1.4), + ([3, 4, 6], 1.4), + ([3, 5, 6], 1.4), + ([2, 6], 1.6), + ([2, 5, 6], 1.6), + ([1, 6], 1.8), + ([1, 2, 6], 1.8), + ([0, 6], 2.0), + ([0, 2, 6], 2.0), + ([0, 3, 6], 2.0) + ] + + dgms = st.extended_persistence(min_persistence=-1.) + + assert dgms[0][0][1][0] == pytest.approx(2.) + assert dgms[0][0][1][1] == pytest.approx(3.) + assert dgms[1][0][1][0] == pytest.approx(5.) + assert dgms[1][0][1][1] == pytest.approx(4.) + assert dgms[2][0][1][0] == pytest.approx(1.) + assert dgms[2][0][1][1] == pytest.approx(6.) + assert dgms[3][0][1][0] == pytest.approx(6.) + assert dgms[3][0][1][1] == pytest.approx(1.) + def test_simplices_iterator(): st = SimplexTree() diff --git a/src/python/test/test_subsampling.py b/src/python/test/test_subsampling.py index fe0985fa..31f64e32 100755 --- a/src/python/test/test_subsampling.py +++ b/src/python/test/test_subsampling.py @@ -120,7 +120,6 @@ def test_simple_pick_n_random_points(): # Go furter than point set on purpose for iter in range(1, 10): sub_set = gudhi.pick_n_random_points(points=point_set, nb_points=iter) - print(5) for sub in sub_set: found = False for point in point_set: diff --git a/src/python/test/test_wasserstein_barycenter.py b/src/python/test/test_wasserstein_barycenter.py new file mode 100755 index 00000000..f68c748e --- /dev/null +++ b/src/python/test/test_wasserstein_barycenter.py @@ -0,0 +1,46 @@ +from gudhi.wasserstein.barycenter import lagrangian_barycenter +import numpy as np + +""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. + See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. + Author(s): Theo Lacombe + + Copyright (C) 2019 Inria + + Modification(s): + - YYYY/MM Author: Description of the modification +""" + +__author__ = "Theo Lacombe" +__copyright__ = "Copyright (C) 2019 Inria" +__license__ = "MIT" + + +def test_lagrangian_barycenter(): + + dg1 = np.array([[0.2, 0.5]]) + dg2 = np.array([[0.2, 0.7]]) + dg3 = np.array([[0.3, 0.6], [0.7, 0.8], [0.2, 0.3]]) + dg4 = np.array([]) + dg5 = np.array([]) + dg6 = np.array([]) + res = np.array([[0.27916667, 0.55416667], [0.7375, 0.7625], [0.2375, 0.2625]]) + + dg7 = np.array([[0.1, 0.15], [0.1, 0.7], [0.2, 0.22], [0.55, 0.84], [0.11, 0.91], [0.61, 0.75], [0.33, 0.46], [0.12, 0.41], [0.32, 0.48]]) + dg8 = np.array([[0., 4.], [4, 8]]) + + # error crit. + eps = 1e-7 + + + assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg1, dg2, dg3, dg4],init=3, verbose=False) - res) < eps + assert np.array_equal(lagrangian_barycenter(pdiagset=[dg4, dg5, dg6], verbose=False), np.empty(shape=(0,2))) + assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg7], verbose=False) - dg7) < eps + Y, log = lagrangian_barycenter(pdiagset=[dg4, dg8], verbose=True) + assert np.linalg.norm(Y - np.array([[1,3], [5, 7]])) < eps + assert np.abs(log["energy"] - 2) < eps + assert np.array_equal(log["groupings"][0] , np.array([[0, -1], [1, -1]])) + assert np.array_equal(log["groupings"][1] , np.array([[0, 0], [1, 1]])) + assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg8, dg4], init=np.array([[0.2, 0.6], [0.5, 0.7]]), verbose=False) - np.array([[1, 3], [5, 7]])) < eps + assert lagrangian_barycenter(pdiagset = []) is None + diff --git a/src/python/test/test_wasserstein_distance.py b/src/python/test/test_wasserstein_distance.py index 0d70e11a..1a4acc1d 100755 --- a/src/python/test/test_wasserstein_distance.py +++ b/src/python/test/test_wasserstein_distance.py @@ -8,6 +8,7 @@ - YYYY/MM Author: Description of the modification """ +from gudhi.wasserstein.wasserstein import _proj_on_diag from gudhi.wasserstein import wasserstein_distance as pot from gudhi.hera import wasserstein_distance as hera import numpy as np @@ -17,6 +18,12 @@ __author__ = "Theo Lacombe" __copyright__ = "Copyright (C) 2019 Inria" __license__ = "MIT" +def test_proj_on_diag(): + dgm = np.array([[1., 1.], [1., 2.], [3., 5.]]) + assert np.array_equal(_proj_on_diag(dgm), [[1., 1.], [1.5, 1.5], [4., 4.]]) + empty = np.empty((0, 2)) + assert np.array_equal(_proj_on_diag(empty), empty) + def _basic_wasserstein(wasserstein_distance, delta, test_infinity=True, test_matching=True): diag1 = np.array([[2.7, 3.7], [9.6, 14.0], [34.2, 34.974]]) diag2 = np.array([[2.8, 4.45], [9.5, 14.1]]) @@ -70,7 +77,7 @@ def _basic_wasserstein(wasserstein_distance, delta, test_infinity=True, test_mat assert np.array_equal(match , [[0, -1], [1, -1]]) match = wasserstein_distance(diag1, diag2, matching=True, internal_p=2., order=2.)[1] assert np.array_equal(match, [[0, 0], [1, 1], [2, -1]]) - + def hera_wrap(delta): |