summaryrefslogtreecommitdiff
path: root/src/python
diff options
context:
space:
mode:
authorGard Spreemann <gspr@nonempty.org>2020-08-11 13:55:57 +0200
committerGard Spreemann <gspr@nonempty.org>2020-08-11 13:55:57 +0200
commit1c05c20d7cf92c96b5036620cc892cb956c96785 (patch)
tree8ae9a9396ea2b97f617915b8730632917cf786ec /src/python
parent9b3079646ee3f6a494b83e864b3e10b8a93597d0 (diff)
parent92fe082aed387ef050d5077157daea9ee3a7c7f4 (diff)
Merge tag 'tags/gudhi-release-3.3.0' into dfsg/latest
Diffstat (limited to 'src/python')
-rw-r--r--src/python/CMakeLists.txt57
-rw-r--r--src/python/doc/_templates/layout.html53
-rw-r--r--src/python/doc/alpha_complex_sum.inc14
-rw-r--r--src/python/doc/alpha_complex_user.rst71
-rw-r--r--src/python/doc/bottleneck_distance_user.rst19
-rw-r--r--src/python/doc/clustering.inc12
-rw-r--r--src/python/doc/clustering.rst73
-rw-r--r--src/python/doc/cubical_complex_sum.inc6
-rw-r--r--src/python/doc/examples.rst33
-rw-r--r--src/python/doc/img/spiral-color.pngbin0 -> 222425 bytes
-rw-r--r--src/python/doc/index.rst9
-rw-r--r--src/python/doc/installation.rst73
-rw-r--r--src/python/doc/nerve_gic_complex_user.rst5
-rw-r--r--src/python/doc/persistence_graphical_tools_user.rst11
-rw-r--r--src/python/doc/persistent_cohomology_sum.inc4
-rw-r--r--src/python/doc/point_cloud_sum.inc4
-rwxr-xr-xsrc/python/doc/python3-sphinx-build.py11
-rw-r--r--src/python/doc/representations_sum.inc2
-rw-r--r--src/python/doc/rips_complex_ref.rst13
-rw-r--r--src/python/doc/rips_complex_sum.inc15
-rw-r--r--src/python/doc/rips_complex_user.rst22
-rw-r--r--src/python/doc/tangential_complex_sum.inc8
-rwxr-xr-xsrc/python/example/rips_complex_edge_collapse_example.py62
-rw-r--r--src/python/gudhi/alpha_complex.pyx62
-rw-r--r--src/python/gudhi/bottleneck.cc15
-rw-r--r--src/python/gudhi/clustering/__init__.py0
-rw-r--r--src/python/gudhi/clustering/_tomato.cc277
-rw-r--r--src/python/gudhi/clustering/tomato.py321
-rw-r--r--src/python/gudhi/cubical_complex.pyx50
-rw-r--r--src/python/gudhi/dtm_rips_complex.py51
-rw-r--r--src/python/gudhi/hera/__init__.py7
-rw-r--r--src/python/gudhi/hera/bottleneck.cc54
-rw-r--r--src/python/gudhi/hera/wasserstein.cc (renamed from src/python/gudhi/hera.cc)2
-rw-r--r--src/python/gudhi/periodic_cubical_complex.pyx86
-rw-r--r--src/python/gudhi/persistence_graphical_tools.py37
-rw-r--r--src/python/gudhi/point_cloud/dtm.py109
-rw-r--r--src/python/gudhi/point_cloud/knn.py6
-rw-r--r--src/python/gudhi/representations/kernel_methods.py41
-rw-r--r--src/python/gudhi/representations/metrics.py71
-rw-r--r--src/python/gudhi/representations/vector_methods.py144
-rw-r--r--src/python/gudhi/simplex_tree.pxd1
-rw-r--r--src/python/gudhi/simplex_tree.pyx71
-rw-r--r--src/python/gudhi/wasserstein/wasserstein.py16
-rw-r--r--src/python/include/Alpha_complex_factory.h139
-rw-r--r--src/python/include/Alpha_complex_interface.h69
-rw-r--r--src/python/include/Simplex_tree_interface.h31
-rw-r--r--src/python/include/pybind11_diagram_utils.h8
-rw-r--r--src/python/introduction.rst24
-rw-r--r--src/python/setup.py.in27
-rwxr-xr-xsrc/python/test/test_alpha_complex.py133
-rwxr-xr-xsrc/python/test/test_bottleneck_distance.py6
-rwxr-xr-xsrc/python/test/test_cubical_complex.py17
-rwxr-xr-xsrc/python/test/test_dtm.py23
-rw-r--r--src/python/test/test_dtm_rips_complex.py32
-rwxr-xr-xsrc/python/test/test_representations.py50
-rwxr-xr-xsrc/python/test/test_simplex_tree.py18
-rwxr-xr-xsrc/python/test/test_tomato.py65
-rw-r--r--src/python/test/test_weighted_rips_complex.py (renamed from src/python/test/test_weighted_rips.py)0
58 files changed, 2264 insertions, 376 deletions
diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt
index ab08cd6d..4f26481e 100644
--- a/src/python/CMakeLists.txt
+++ b/src/python/CMakeLists.txt
@@ -14,6 +14,15 @@ function( add_GUDHI_PYTHON_lib THE_LIB )
endif(EXISTS ${THE_LIB})
endfunction( add_GUDHI_PYTHON_lib )
+function( add_GUDHI_PYTHON_lib_dir THE_LIB_DIR )
+ # deals when it is not set - error on windows
+ if(EXISTS ${THE_LIB_DIR})
+ set(GUDHI_PYTHON_LIBRARY_DIRS "${GUDHI_PYTHON_LIBRARY_DIRS}'${THE_LIB_DIR}', " PARENT_SCOPE)
+ else()
+ message("add_GUDHI_PYTHON_lib_dir - '${THE_LIB_DIR}' does not exist")
+ endif()
+endfunction( add_GUDHI_PYTHON_lib_dir )
+
# THE_TEST is the python test file name (without .py extension) containing tests functions
function( add_gudhi_py_test THE_TEST )
if(PYTEST_FOUND)
@@ -36,6 +45,7 @@ if(PYTHONINTERP_FOUND)
add_gudhi_debug_info("Pybind11 version ${PYBIND11_VERSION}")
set(GUDHI_PYTHON_MODULES "${GUDHI_PYTHON_MODULES}'bottleneck', ")
set(GUDHI_PYTHON_MODULES_EXTRA "${GUDHI_PYTHON_MODULES_EXTRA}'hera', ")
+ set(GUDHI_PYTHON_MODULES_EXTRA "${GUDHI_PYTHON_MODULES_EXTRA}'clustering', ")
endif()
if(CYTHON_FOUND)
set(GUDHI_PYTHON_MODULES "${GUDHI_PYTHON_MODULES}'off_reader', ")
@@ -58,6 +68,7 @@ if(PYTHONINTERP_FOUND)
set(GUDHI_PYTHON_MODULES_EXTRA "${GUDHI_PYTHON_MODULES_EXTRA}'wasserstein', ")
set(GUDHI_PYTHON_MODULES_EXTRA "${GUDHI_PYTHON_MODULES_EXTRA}'point_cloud', ")
set(GUDHI_PYTHON_MODULES_EXTRA "${GUDHI_PYTHON_MODULES_EXTRA}'weighted_rips_complex', ")
+ set(GUDHI_PYTHON_MODULES_EXTRA "${GUDHI_PYTHON_MODULES_EXTRA}'dtm_rips_complex', ")
add_gudhi_debug_info("Python version ${PYTHON_VERSION_STRING}")
add_gudhi_debug_info("Cython version ${CYTHON_VERSION}")
@@ -129,7 +140,9 @@ if(PYTHONINTERP_FOUND)
set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'reader_utils', ")
set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'witness_complex', ")
set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'strong_witness_complex', ")
- set(GUDHI_PYBIND11_MODULES "${GUDHI_PYBIND11_MODULES}'hera', ")
+ set(GUDHI_PYBIND11_MODULES "${GUDHI_PYBIND11_MODULES}'clustering/_tomato', ")
+ set(GUDHI_PYBIND11_MODULES "${GUDHI_PYBIND11_MODULES}'hera/wasserstein', ")
+ set(GUDHI_PYBIND11_MODULES "${GUDHI_PYBIND11_MODULES}'hera/bottleneck', ")
if (NOT CGAL_VERSION VERSION_LESS 4.11.0)
set(GUDHI_PYBIND11_MODULES "${GUDHI_PYBIND11_MODULES}'bottleneck', ")
set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'nerve_gic', ")
@@ -150,7 +163,7 @@ if(PYTHONINTERP_FOUND)
else(CGAL_HEADER_ONLY)
add_gudhi_debug_info("CGAL version ${CGAL_VERSION}")
add_GUDHI_PYTHON_lib("${CGAL_LIBRARY}")
- set(GUDHI_PYTHON_LIBRARY_DIRS "${GUDHI_PYTHON_LIBRARY_DIRS}'${CGAL_LIBRARIES_DIR}', ")
+ add_GUDHI_PYTHON_lib_dir("${CGAL_LIBRARIES_DIR}")
message("** Add CGAL ${CGAL_LIBRARIES_DIR}")
# If CGAL is not header only, CGAL library may link with boost system,
if(CMAKE_BUILD_TYPE MATCHES Debug)
@@ -158,7 +171,7 @@ if(PYTHONINTERP_FOUND)
else()
add_GUDHI_PYTHON_lib("${Boost_SYSTEM_LIBRARY_RELEASE}")
endif()
- set(GUDHI_PYTHON_LIBRARY_DIRS "${GUDHI_PYTHON_LIBRARY_DIRS}'${Boost_LIBRARY_DIRS}', ")
+ add_GUDHI_PYTHON_lib_dir("${Boost_LIBRARY_DIRS}")
message("** Add Boost ${Boost_LIBRARY_DIRS}")
endif(CGAL_HEADER_ONLY)
# GMP and GMPXX are not required, but if present, CGAL will link with them.
@@ -166,13 +179,17 @@ if(PYTHONINTERP_FOUND)
add_gudhi_debug_info("GMP_LIBRARIES = ${GMP_LIBRARIES}")
set(GUDHI_PYTHON_EXTRA_COMPILE_ARGS "${GUDHI_PYTHON_EXTRA_COMPILE_ARGS}'-DCGAL_USE_GMP', ")
add_GUDHI_PYTHON_lib("${GMP_LIBRARIES}")
- set(GUDHI_PYTHON_LIBRARY_DIRS "${GUDHI_PYTHON_LIBRARY_DIRS}'${GMP_LIBRARIES_DIR}', ")
+ if(NOT GMP_LIBRARIES_DIR)
+ get_filename_component(GMP_LIBRARIES_DIR ${GMP_LIBRARIES} PATH)
+ message("GMP_LIBRARIES_DIR from GMP_LIBRARIES set to ${GMP_LIBRARIES_DIR}")
+ endif(NOT GMP_LIBRARIES_DIR)
+ add_GUDHI_PYTHON_lib_dir("${GMP_LIBRARIES_DIR}")
message("** Add gmp ${GMP_LIBRARIES_DIR}")
if(GMPXX_FOUND)
add_gudhi_debug_info("GMPXX_LIBRARIES = ${GMPXX_LIBRARIES}")
set(GUDHI_PYTHON_EXTRA_COMPILE_ARGS "${GUDHI_PYTHON_EXTRA_COMPILE_ARGS}'-DCGAL_USE_GMPXX', ")
add_GUDHI_PYTHON_lib("${GMPXX_LIBRARIES}")
- set(GUDHI_PYTHON_LIBRARY_DIRS "${GUDHI_PYTHON_LIBRARY_DIRS}'${GMPXX_LIBRARIES_DIR}', ")
+ add_GUDHI_PYTHON_lib_dir("${GMPXX_LIBRARIES_DIR}")
message("** Add gmpxx ${GMPXX_LIBRARIES_DIR}")
endif(GMPXX_FOUND)
endif(GMP_FOUND)
@@ -183,9 +200,10 @@ if(PYTHONINTERP_FOUND)
# In case CGAL is not header only, all MPFR variables are set except MPFR_LIBRARIES_DIR - Just set it
if(NOT MPFR_LIBRARIES_DIR)
get_filename_component(MPFR_LIBRARIES_DIR ${MPFR_LIBRARIES} PATH)
+ message("MPFR_LIBRARIES_DIR from MPFR_LIBRARIES set to ${MPFR_LIBRARIES_DIR}")
endif(NOT MPFR_LIBRARIES_DIR)
- set(GUDHI_PYTHON_LIBRARY_DIRS "${GUDHI_PYTHON_LIBRARY_DIRS}'${MPFR_LIBRARIES_DIR}', ")
- message("** Add mpfr ${MPFR_LIBRARIES}")
+ add_GUDHI_PYTHON_lib_dir("${MPFR_LIBRARIES_DIR}")
+ message("** Add mpfr ${MPFR_LIBRARIES_DIR}")
endif(MPFR_FOUND)
endif(CGAL_FOUND)
@@ -212,7 +230,7 @@ if(PYTHONINTERP_FOUND)
add_GUDHI_PYTHON_lib("${TBB_RELEASE_LIBRARY}")
add_GUDHI_PYTHON_lib("${TBB_MALLOC_RELEASE_LIBRARY}")
endif()
- set(GUDHI_PYTHON_LIBRARY_DIRS "${GUDHI_PYTHON_LIBRARY_DIRS}'${TBB_LIBRARY_DIRS}', ")
+ add_GUDHI_PYTHON_lib_dir("${TBB_LIBRARY_DIRS}")
message("** Add tbb ${TBB_LIBRARY_DIRS}")
set(GUDHI_PYTHON_INCLUDE_DIRS "${GUDHI_PYTHON_INCLUDE_DIRS}'${TBB_INCLUDE_DIRS}', ")
endif()
@@ -233,7 +251,13 @@ if(PYTHONINTERP_FOUND)
file(COPY "gudhi/representations" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi/")
file(COPY "gudhi/wasserstein" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi")
file(COPY "gudhi/point_cloud" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi")
+ file(COPY "gudhi/clustering" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi" FILES_MATCHING PATTERN "*.py")
file(COPY "gudhi/weighted_rips_complex.py" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi")
+ file(COPY "gudhi/dtm_rips_complex.py" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi")
+ file(COPY "gudhi/hera/__init__.py" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi/hera")
+
+ # Some files for pip package
+ file(COPY "introduction.rst" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/")
add_custom_command(
OUTPUT gudhi.so
@@ -353,7 +377,9 @@ if(PYTHONINTERP_FOUND)
COMMAND ${CMAKE_COMMAND} -E env "PYTHONPATH=${CMAKE_CURRENT_BINARY_DIR}"
${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/example/bottleneck_basic_example.py")
- add_gudhi_py_test(test_bottleneck_distance)
+ if (PYBIND11_FOUND)
+ add_gudhi_py_test(test_bottleneck_distance)
+ endif()
# Cover complex
file(COPY ${CMAKE_SOURCE_DIR}/data/points/human.off DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/)
@@ -490,11 +516,22 @@ if(PYTHONINTERP_FOUND)
add_gudhi_py_test(test_dtm)
endif()
+ # Tomato
+ if(SCIPY_FOUND AND SKLEARN_FOUND AND PYBIND11_FOUND)
+ add_gudhi_py_test(test_tomato)
+ endif()
+
# Weighted Rips
if(SCIPY_FOUND)
- add_gudhi_py_test(test_weighted_rips)
+ add_gudhi_py_test(test_weighted_rips_complex)
endif()
+ # DTM Rips
+ if(SCIPY_FOUND)
+ add_gudhi_py_test(test_dtm_rips_complex)
+ endif()
+
+
# Set missing or not modules
set(GUDHI_MODULES ${GUDHI_MODULES} "python" CACHE INTERNAL "GUDHI_MODULES")
else(CYTHON_FOUND)
diff --git a/src/python/doc/_templates/layout.html b/src/python/doc/_templates/layout.html
index a672a281..cd40a51b 100644
--- a/src/python/doc/_templates/layout.html
+++ b/src/python/doc/_templates/layout.html
@@ -175,58 +175,59 @@
<h1 class="show-for-small-only"><a href="" class="icon-tree"> GUDHI library</a></h1>
</li>
<!-- Remove the class "menu-icon" to get rid of menu icon. Take out "Menu" to just have icon alone -->
- <li class="toggle-topbar menu-icon"><a href="#"><span>Navigation</span></a></li>
+ <li class="toggle-topbar menu-icon"><a href="#"><span>Nav</span></a></li>
</ul>
<section class="top-bar-section">
<ul class="right">
<li class="divider"></li>
- <li><a href="/contact/">Contact</a></li>
+ <li><a href="/contact/">Contact</a></li>
</ul>
<ul class="left">
- <li><a href="/"> <img src="/assets/img/home.png" alt="&nbsp;&nbsp;GUDHI">&nbsp;&nbsp;GUDHI </a></li>
+ <li><a href="/"> <img src="/assets/img/home.png" alt=" GUDHI"> GUDHI </a></li>
<li class="divider"></li>
<li class="has-dropdown">
- <a href="#">Project</a>
+ <a href="#">Project</a>
<ul class="dropdown">
- <li><a href="/people/">People</a></li>
- <li><a href="/keepintouch/">Keep in touch</a></li>
- <li><a href="/partners/">Partners and Funding</a></li>
- <li><a href="/relatedprojects/">Related projects</a></li>
- <li><a href="/theyaretalkingaboutus/">They are talking about us</a></li>
- <li><a href="/inaction/">GUDHI in action</a></li>
+ <li><a href="/people/">People</a></li>
+ <li><a href="/keepintouch/">Keep in touch</a></li>
+ <li><a href="/partners/">Partners and Funding</a></li>
+ <li><a href="/relatedprojects/">Related projects</a></li>
+ <li><a href="/theyaretalkingaboutus/">They are talking about us</a></li>
+ <li><a href="/inaction/">GUDHI in action</a></li>
</ul>
</li>
<li class="divider"></li>
<li class="has-dropdown">
- <a href="#">Download</a>
+ <a href="#">Download</a>
<ul class="dropdown">
- <li><a href="/licensing/">Licensing</a></li>
- <li><a href="https://github.com/GUDHI/gudhi-devel/releases/latest" target="_blank">Get the latest sources</a></li>
- <li><a href="/conda/">Conda package</a></li>
- <li><a href="/dockerfile/">Dockerfile</a></li>
+ <li><a href="/licensing/">Licensing</a></li>
+ <li><a href="https://github.com/GUDHI/gudhi-devel/releases/latest" target="_blank">Get the latest sources</a></li>
+ <li><a href="/conda/">Conda package</a></li>
+ <li><a href="https://pypi.org/project/gudhi/" target="_blank">Pip package</a></li>
+ <li><a href="/dockerfile/">Dockerfile</a></li>
</ul>
</li>
<li class="divider"></li>
<li class="has-dropdown">
- <a href="#">Documentation</a>
+ <a href="#">Documentation</a>
<ul class="dropdown">
- <li><a href="/introduction/">Introduction</a></li>
- <li><a href="https://gudhi.inria.fr/doc/latest/installation.html">C++ installation manual</a></li>
- <li><a href="https://gudhi.inria.fr/doc/latest/">C++ documentation</a></li>
- <li><a href="https://gudhi.inria.fr/python/latest/installation.html">Python installation manual</a></li>
- <li><a href="https://gudhi.inria.fr/python/latest/">Python documentation</a></li>
- <li><a href="/utils/">Utilities</a></li>
- <li><a href="/tutorials/">Tutorials</a></li>
+ <li><a href="/introduction/">Introduction</a></li>
+ <li><a href="/doc/latest/installation.html">C++ installation manual</a></li>
+ <li><a href="/doc/latest/">C++ documentation</a></li>
+ <li><a href="/python/latest/installation.html">Python installation manual</a></li>
+ <li><a href="/python/latest/">Python documentation</a></li>
+ <li><a href="/utils/">Utilities</a></li>
+ <li><a href="/tutorials/">Tutorials</a></li>
</ul>
</li>
<li class="divider"></li>
- <li><a href="/interfaces/">Interfaces</a></li>
+ <li><a href="/interfaces/">Interfaces</a></li>
<li class="divider"></li>
</ul>
</section>
</nav>
- </div><!-- /#navigation -->
- <!-- GUDHI website header BEGIN -->
+ </div><!-- /#navigation -->
+ <!-- GUDHI website header END -->
{%- block header %}{% endblock %}
diff --git a/src/python/doc/alpha_complex_sum.inc b/src/python/doc/alpha_complex_sum.inc
index 3aba0d71..aeab493f 100644
--- a/src/python/doc/alpha_complex_sum.inc
+++ b/src/python/doc/alpha_complex_sum.inc
@@ -3,15 +3,13 @@
+----------------------------------------------------------------+-------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------+
| .. figure:: | Alpha complex is a simplicial complex constructed from the finite | :Author: Vincent Rouvreau |
- | ../../doc/Alpha_complex/alpha_complex_representation.png | cells of a Delaunay Triangulation. | |
- | :alt: Alpha complex representation | | :Since: GUDHI 2.0.0 |
- | :figclass: align-center | The filtration value of each simplex is computed as the **square** of | |
- | | the circumradius of the simplex if the circumsphere is empty (the | :License: MIT (`GPL v3 </licensing/>`_) |
- | | simplex is then said to be Gabriel), and as the minimum of the | |
- | | filtration values of the codimension 1 cofaces that make it not | :Requires: `Eigen <installation.html#eigen>`_ :math:`\geq` 3.1.0 and `CGAL <installation.html#cgal>`_ :math:`\geq` 4.11.0 |
- | | Gabriel otherwise. | |
+ | ../../doc/Alpha_complex/alpha_complex_representation.png | cells of a Delaunay Triangulation. It has the same persistent homology | |
+ | :alt: Alpha complex representation | as the Čech complex and is significantly smaller. | :Since: GUDHI 2.0.0 |
+ | :figclass: align-center | | |
+ | | | :License: MIT (`GPL v3 </licensing/>`_) |
+ | | | |
+ | | | :Requires: `Eigen <installation.html#eigen>`_ :math:`\geq` 3.1.0 and `CGAL <installation.html#cgal>`_ :math:`\geq` 4.11.0 |
| | | |
- | | For performances reasons, it is advised to use CGAL :math:`\geq` 5.0.0. | |
+----------------------------------------------------------------+-------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------+
| * :doc:`alpha_complex_user` | * :doc:`alpha_complex_ref` |
+----------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
diff --git a/src/python/doc/alpha_complex_user.rst b/src/python/doc/alpha_complex_user.rst
index d49f45b4..fffcb3db 100644
--- a/src/python/doc/alpha_complex_user.rst
+++ b/src/python/doc/alpha_complex_user.rst
@@ -9,15 +9,33 @@ Definition
.. include:: alpha_complex_sum.inc
-`AlphaComplex` is constructing a :doc:`SimplexTree <simplex_tree_ref>` using
+:doc:`AlphaComplex <alpha_complex_ref>` is constructing a :doc:`SimplexTree <simplex_tree_ref>` using
`Delaunay Triangulation <http://doc.cgal.org/latest/Triangulation/index.html#Chapter_Triangulations>`_
:cite:`cgal:hdj-t-19b` from the `Computational Geometry Algorithms Library <http://www.cgal.org/>`_
:cite:`cgal:eb-19b`.
Remarks
^^^^^^^
-When an :math:`\alpha`-complex is constructed with an infinite value of :math:`\alpha^2`,
-the complex is a Delaunay complex (with special filtration values).
+* When an :math:`\alpha`-complex is constructed with an infinite value of :math:`\alpha^2`, the complex is a Delaunay
+ complex (with special filtration values). The Delaunay complex without filtration values is also available by
+ passing :code:`default_filtration_value = True` to :func:`~gudhi.AlphaComplex.create_simplex_tree`.
+* For people only interested in the topology of the Alpha complex (for instance persistence), Alpha complex is
+ equivalent to the `Čech complex <https://gudhi.inria.fr/doc/latest/group__cech__complex.html>`_ and much smaller if
+ you do not bound the radii. `Čech complex <https://gudhi.inria.fr/doc/latest/group__cech__complex.html>`_ can still
+ make sense in higher dimension precisely because you can bound the radii.
+* Using the default :code:`precision = 'safe'` makes the construction safe.
+ If you pass :code:`precision = 'exact'` to :func:`~gudhi.AlphaComplex.__init__`, the filtration values are the exact
+ ones converted to float. This can be very slow.
+ If you pass :code:`precision = 'safe'` (the default), the filtration values are only
+ guaranteed to have a small multiplicative error compared to the exact value.
+ A drawback, when computing persistence, is that an empty exact interval [10^12,10^12] may become a
+ non-empty approximate interval [10^12,10^12+10^6].
+ Using :code:`precision = 'fast'` makes the computations slightly faster, and the combinatorics are still exact, but
+ the computation of filtration values can exceptionally be arbitrarily bad. In all cases, we still guarantee that the
+ output is a valid filtration (faces have a filtration value no larger than their cofaces).
+* For performances reasons, it is advised to use Alpha_complex with `CGAL <installation.html#cgal>`_ :math:`\geq` 5.0.0.
+* The vertices in the output simplex tree are not guaranteed to match the order of the input points. One can use
+ :func:`~gudhi.AlphaComplex.get_point` to get the initial point back.
Example from points
-------------------
@@ -160,49 +178,22 @@ In the following example, a threshold of :math:`\alpha^2 = 32.0` is used.
Example from OFF file
^^^^^^^^^^^^^^^^^^^^^
-This example builds the Delaunay triangulation from the points given by an OFF file, and initializes the alpha complex
-with it.
+This example builds the alpha complex from 300 random points on a 2-torus.
+Then, it computes the persistence diagram and displays it:
-Then, it is asked to display information about the alpha complex:
-
-.. testcode::
+.. plot::
+ :include-source:
+ import matplotlib.pyplot as plt
import gudhi
alpha_complex = gudhi.AlphaComplex(off_file=gudhi.__root_source_dir__ + \
- '/data/points/alphacomplexdoc.off')
- simplex_tree = alpha_complex.create_simplex_tree(max_alpha_square=32.0)
+ '/data/points/tore3D_300.off')
+ simplex_tree = alpha_complex.create_simplex_tree()
result_str = 'Alpha complex is of dimension ' + repr(simplex_tree.dimension()) + ' - ' + \
repr(simplex_tree.num_simplices()) + ' simplices - ' + \
repr(simplex_tree.num_vertices()) + ' vertices.'
print(result_str)
- fmt = '%s -> %.2f'
- for filtered_value in simplex_tree.get_filtration():
- print(fmt % tuple(filtered_value))
-
-the program output is:
-
-.. testoutput::
-
- Alpha complex is of dimension 2 - 20 simplices - 7 vertices.
- [0] -> 0.00
- [1] -> 0.00
- [2] -> 0.00
- [3] -> 0.00
- [4] -> 0.00
- [5] -> 0.00
- [6] -> 0.00
- [2, 3] -> 6.25
- [4, 5] -> 7.25
- [0, 2] -> 8.50
- [0, 1] -> 9.25
- [1, 3] -> 10.00
- [1, 2] -> 11.25
- [1, 2, 3] -> 12.50
- [0, 1, 2] -> 13.00
- [5, 6] -> 13.25
- [2, 4] -> 20.00
- [4, 6] -> 22.74
- [4, 5, 6] -> 22.74
- [3, 6] -> 30.25
-
+ diag = simplex_tree.persistence()
+ gudhi.plot_persistence_diagram(diag)
+ plt.show()
diff --git a/src/python/doc/bottleneck_distance_user.rst b/src/python/doc/bottleneck_distance_user.rst
index 89da89d3..6c6e08d9 100644
--- a/src/python/doc/bottleneck_distance_user.rst
+++ b/src/python/doc/bottleneck_distance_user.rst
@@ -9,14 +9,23 @@ Definition
.. include:: bottleneck_distance_sum.inc
-This implementation is based on ideas from "Geometry Helps in Bottleneck Matching and Related Problems"
-:cite:`DBLP:journals/algorithmica/EfratIK01`. Another relevant publication, although it was not used is
-"Geometry Helps to Compare Persistence Diagrams" :cite:`Kerber:2017:GHC:3047249.3064175`.
+This implementation by François Godi is based on ideas from "Geometry Helps in Bottleneck Matching and Related Problems"
+:cite:`DBLP:journals/algorithmica/EfratIK01` and requires `CGAL <installation.html#cgal>`_ (`GPL v3 </licensing/>`_).
-Function
---------
.. autofunction:: gudhi.bottleneck_distance
+This other implementation comes from `Hera
+<https://bitbucket.org/grey_narn/hera/src/master/>`_ (BSD-3-Clause) which is
+based on "Geometry Helps to Compare Persistence Diagrams"
+:cite:`Kerber:2017:GHC:3047249.3064175` by Michael Kerber, Dmitriy
+Morozov, and Arnur Nigmetov.
+
+.. warning::
+ Beware that its approximation allows for a multiplicative error, while the function above uses an additive error.
+
+.. autofunction:: gudhi.hera.bottleneck_distance
+
+
Distance computation
--------------------
diff --git a/src/python/doc/clustering.inc b/src/python/doc/clustering.inc
new file mode 100644
index 00000000..2d07ae88
--- /dev/null
+++ b/src/python/doc/clustering.inc
@@ -0,0 +1,12 @@
+.. table::
+ :widths: 30 40 30
+
+ +--------------------------+-------------------------------------------------------+---------------------------------+
+ | .. figure:: | Clustering tools. | :Author: Marc Glisse |
+ | img/spiral-color.png | | |
+ | | | :Since: GUDHI 3.3.0 |
+ | | | |
+ | | | :License: MIT |
+ +--------------------------+-------------------------------------------------------+---------------------------------+
+ | * :doc:`clustering` |
+ +--------------------------+-----------------------------------------------------------------------------------------+
diff --git a/src/python/doc/clustering.rst b/src/python/doc/clustering.rst
new file mode 100644
index 00000000..c5a57d3c
--- /dev/null
+++ b/src/python/doc/clustering.rst
@@ -0,0 +1,73 @@
+:orphan:
+
+.. To get rid of WARNING: document isn't included in any toctree
+
+=================
+Clustering manual
+=================
+
+We provide an implementation of ToMATo :cite:`tomato`, a persistence-based clustering algorithm. In short, this algorithm uses a density estimator and a neighborhood graph, starts with a mode-seeking phase (naive hill-climbing) to build initial clusters, and finishes by merging clusters based on their prominence.
+
+The merging phase depends on a parameter, which is the minimum prominence a cluster needs to avoid getting merged into another, bigger cluster. This parameter determines the number of clusters, and for convenience we allow you to choose instead the number of clusters. Decreasing the prominence threshold defines a hierarchy of clusters: if 2 points are in separate clusters when we have k clusters, they are still in different clusters for k+1 clusters.
+
+As a by-product, we produce the persistence diagram of the merge tree of the initial clusters. This is a convenient graphical tool to help decide how many clusters we want.
+
+.. plot::
+ :context:
+ :include-source:
+
+ import gudhi
+ f = open(gudhi.__root_source_dir__ + '/data/points/spiral_2d.csv', 'r')
+ import numpy as np
+ data = np.loadtxt(f)
+ import matplotlib.pyplot as plt
+ plt.scatter(data[:,0],data[:,1],marker='.',s=1)
+ plt.show()
+
+.. plot::
+ :context: close-figs
+ :include-source:
+
+ from gudhi.clustering.tomato import Tomato
+ t = Tomato()
+ t.fit(data)
+ t.plot_diagram()
+
+As one can see in `t.n_clusters_`, the algorithm found 6316 initial clusters. The diagram shows their prominence as their distance to the diagonal. There is always one point infinitely far: there is at least one cluster. Among the others, one point seems significantly farther from the diagonal than the others, which indicates that splitting the points into 2 clusters may be a sensible idea.
+
+.. plot::
+ :context: close-figs
+ :include-source:
+
+ t.n_clusters_=2
+ plt.scatter(data[:,0],data[:,1],marker='.',s=1,c=t.labels_)
+ plt.show()
+
+Of course this is just the result for one set of parameters. We can ask for a different density estimator and a different neighborhood graph computed with different parameters.
+
+.. plot::
+ :context: close-figs
+ :include-source:
+
+ t = Tomato(density_type='DTM', k=100)
+ t.fit(data)
+ t.plot_diagram()
+
+Makes the number of clusters clearer, and changes a bit the shape of the clusters.
+
+A quick look at the corresponding density estimate
+
+.. plot::
+ :context: close-figs
+ :include-source:
+
+ plt.scatter(data[:,0],data[:,1],marker='.',s=1,c=t.weights_)
+ plt.show()
+
+The code provides a few density estimators and graph constructions for convenience when first experimenting, but it is actually expected that advanced users provide their own graph and density estimates instead of point coordinates.
+
+Since the algorithm essentially computes basins of attraction, it is also encouraged to use it on functions that do not represent densities at all.
+
+.. autoclass:: gudhi.clustering.tomato.Tomato
+ :members:
+ :special-members: __init__
diff --git a/src/python/doc/cubical_complex_sum.inc b/src/python/doc/cubical_complex_sum.inc
index 28bf8e94..87db184d 100644
--- a/src/python/doc/cubical_complex_sum.inc
+++ b/src/python/doc/cubical_complex_sum.inc
@@ -2,9 +2,9 @@
:widths: 30 40 30
+--------------------------------------------------------------------------+----------------------------------------------------------------------+-----------------------------+
- | .. figure:: | The cubical complex is an example of a structured complex useful in | :Author: Pawel Dlotko |
- | ../../doc/Bitmap_cubical_complex/Cubical_complex_representation.png | computational mathematics (specially rigorous numerics) and image | |
- | :alt: Cubical complex representation | analysis. | :Since: GUDHI 2.0.0 |
+ | .. figure:: | The cubical complex represents a grid as a cell complex with | :Author: Pawel Dlotko |
+ | ../../doc/Bitmap_cubical_complex/Cubical_complex_representation.png | cells of all dimensions. | |
+ | :alt: Cubical complex representation | | :Since: GUDHI 2.0.0 |
| :figclass: align-center | | |
| | | :License: MIT |
| | | |
diff --git a/src/python/doc/examples.rst b/src/python/doc/examples.rst
index a42227e3..76e5d4c7 100644
--- a/src/python/doc/examples.rst
+++ b/src/python/doc/examples.rst
@@ -7,27 +7,30 @@ Examples
.. only:: builder_html
- * :download:`rips_complex_from_points_example.py <../example/rips_complex_from_points_example.py>`
+ * :download:`alpha_complex_diagram_persistence_from_off_file_example.py <../example/alpha_complex_diagram_persistence_from_off_file_example.py>`
* :download:`alpha_complex_from_points_example.py <../example/alpha_complex_from_points_example.py>`
- * :download:`simplex_tree_example.py <../example/simplex_tree_example.py>`
* :download:`alpha_rips_persistence_bottleneck_distance.py <../example/alpha_rips_persistence_bottleneck_distance.py>`
- * :download:`tangential_complex_plain_homology_from_off_file_example.py <../example/tangential_complex_plain_homology_from_off_file_example.py>`
- * :download:`alpha_complex_diagram_persistence_from_off_file_example.py <../example/alpha_complex_diagram_persistence_from_off_file_example.py>`
- * :download:`periodic_cubical_complex_barcode_persistence_from_perseus_file_example.py <../example/periodic_cubical_complex_barcode_persistence_from_perseus_file_example.py>`
* :download:`bottleneck_basic_example.py <../example/bottleneck_basic_example.py>`
- * :download:`gudhi_graphical_tools_example.py <../example/gudhi_graphical_tools_example.py>`
- * :download:`plot_simplex_tree_dim012.py <../example/plot_simplex_tree_dim012.py>`
- * :download:`plot_rips_complex.py <../example/plot_rips_complex.py>`
- * :download:`plot_alpha_complex.py <../example/plot_alpha_complex.py>`
- * :download:`witness_complex_from_nearest_landmark_table.py <../example/witness_complex_from_nearest_landmark_table.py>`
+ * :download:`coordinate_graph_induced_complex.py <../example/coordinate_graph_induced_complex.py>`
+ * :download:`diagram_vectorizations_distances_kernels.py <../example/diagram_vectorizations_distances_kernels.py>`
* :download:`euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py>`
* :download:`euclidean_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_witness_complex_diagram_persistence_from_off_file_example.py>`
- * :download:`rips_complex_diagram_persistence_from_off_file_example.py <../example/rips_complex_diagram_persistence_from_off_file_example.py>`
+ * :download:`functional_graph_induced_complex.py <../example/functional_graph_induced_complex.py>`
+ * :download:`gudhi_graphical_tools_example.py <../example/gudhi_graphical_tools_example.py>`
+ * :download:`nerve_of_a_covering.py <../example/nerve_of_a_covering.py>`
+ * :download:`periodic_cubical_complex_barcode_persistence_from_perseus_file_example.py <../example/periodic_cubical_complex_barcode_persistence_from_perseus_file_example.py>`
+ * :download:`plot_alpha_complex.py <../example/plot_alpha_complex.py>`
+ * :download:`plot_rips_complex.py <../example/plot_rips_complex.py>`
+ * :download:`plot_simplex_tree_dim012.py <../example/plot_simplex_tree_dim012.py>`
+ * :download:`random_cubical_complex_persistence_example.py <../example/random_cubical_complex_persistence_example.py>`
+ * :download:`rips_complex_diagram_persistence_from_correlation_matrix_file_example.py <../example/rips_complex_diagram_persistence_from_correlation_matrix_file_example.py>`
* :download:`rips_complex_diagram_persistence_from_distance_matrix_file_example.py <../example/rips_complex_diagram_persistence_from_distance_matrix_file_example.py>`
+ * :download:`rips_complex_diagram_persistence_from_off_file_example.py <../example/rips_complex_diagram_persistence_from_off_file_example.py>`
+ * :download:`rips_complex_edge_collapse_example.py <../example/rips_complex_edge_collapse_example.py>`
+ * :download:`rips_complex_from_points_example.py <../example/rips_complex_from_points_example.py>`
* :download:`rips_persistence_diagram.py <../example/rips_persistence_diagram.py>`
+ * :download:`simplex_tree_example.py <../example/simplex_tree_example.py>`
* :download:`sparse_rips_persistence_diagram.py <../example/sparse_rips_persistence_diagram.py>`
- * :download:`random_cubical_complex_persistence_example.py <../example/random_cubical_complex_persistence_example.py>`
- * :download:`coordinate_graph_induced_complex.py <../example/coordinate_graph_induced_complex.py>`
- * :download:`functional_graph_induced_complex.py <../example/functional_graph_induced_complex.py>`
+ * :download:`tangential_complex_plain_homology_from_off_file_example.py <../example/tangential_complex_plain_homology_from_off_file_example.py>`
* :download:`voronoi_graph_induced_complex.py <../example/voronoi_graph_induced_complex.py>`
- * :download:`nerve_of_a_covering.py <../example/nerve_of_a_covering.py>`
+ * :download:`witness_complex_from_nearest_landmark_table.py <../example/witness_complex_from_nearest_landmark_table.py>`
diff --git a/src/python/doc/img/spiral-color.png b/src/python/doc/img/spiral-color.png
new file mode 100644
index 00000000..21b62dfc
--- /dev/null
+++ b/src/python/doc/img/spiral-color.png
Binary files differ
diff --git a/src/python/doc/index.rst b/src/python/doc/index.rst
index 13e51047..040e57a4 100644
--- a/src/python/doc/index.rst
+++ b/src/python/doc/index.rst
@@ -53,8 +53,8 @@ Tangential complex
Topological descriptors computation
***********************************
-Persistence cohomology
-======================
+Persistent cohomology
+=====================
.. include:: persistent_cohomology_sum.inc
@@ -86,3 +86,8 @@ Point cloud utilities
*********************
.. include:: point_cloud_sum.inc
+
+Clustering
+**********
+
+.. include:: clustering.inc
diff --git a/src/python/doc/installation.rst b/src/python/doc/installation.rst
index de09c5b3..78e1af73 100644
--- a/src/python/doc/installation.rst
+++ b/src/python/doc/installation.rst
@@ -5,20 +5,47 @@
Installation
############
-Conda
-*****
-The easiest way to install the Python version of GUDHI is using
-`conda <https://gudhi.inria.fr/conda/>`_.
+Packages
+********
+The easiest way to install the Python version of GUDHI is using pre-built packages.
+We recommend `conda <https://gudhi.inria.fr/conda/>`_
+
+.. code-block:: bash
+
+ conda install -c conda-forge gudhi
+
+Gudhi is also available on `PyPI <https://pypi.org/project/gudhi/>`_
+
+.. code-block:: bash
+
+ pip install gudhi
+
+Third party packages are also available, for instance on Debian or Ubuntu
+
+.. code-block:: bash
+
+ apt install python3-gudhi
+
+In all cases, you may still want to install some of the optional `run time dependencies`_.
Compiling
*********
+These instructions are for people who want to compile gudhi from source, they are
+unnecessary if you installed a binary package of Gudhi as above. They assume that
+you have downloaded a `release <https://github.com/GUDHI/gudhi-devel/releases>`_,
+with a name like `gudhi.3.2.0.tar.gz`, then run `tar xf gudhi.3.2.0.tar.gz`, which
+created a directory `gudhi.3.2.0`, hereinafter referred to as `/path-to-gudhi/`.
+If you are instead using a git checkout, beware that the paths are a bit
+different, and in particular the `python/` subdirectory is actually `src/python/`
+there.
+
The library uses c++14 and requires `Boost <https://www.boost.org/>`_ :math:`\geq` 1.56.0,
`CMake <https://www.cmake.org/>`_ :math:`\geq` 3.1 to generate makefiles,
`NumPy <http://numpy.org>`_, `Cython <https://www.cython.org/>`_ and
`pybind11 <https://github.com/pybind/pybind11>`_ to compile
the GUDHI Python module.
It is a multi-platform library and compiles on Linux, Mac OSX and Visual
-Studio 2017.
+Studio 2017 or later.
On `Windows <https://wiki.python.org/moin/WindowsCompilers>`_ , only Python
:math:`\geq` 3.5 are available because of the required Visual Studio version.
@@ -260,6 +287,13 @@ a flag `enable_autodiff=True` is used). In order to reduce code duplication, we
use `EagerPy <https://eagerpy.jonasrauber.de/>`_ which wraps arrays from
PyTorch, TensorFlow and JAX in a common interface.
+Joblib
+------
+
+`Joblib <https://joblib.readthedocs.io/>`_ is used both as a dependency of `Scikit-learn`_,
+and directly for parallelism in some modules (:class:`~gudhi.point_cloud.knn.KNearestNeighbors`,
+:func:`~gudhi.representations.metrics.pairwise_persistence_diagram_distances`).
+
Hnswlib
-------
@@ -289,6 +323,35 @@ The following examples require the `Matplotlib <http://matplotlib.org>`_:
* :download:`euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py>`
* :download:`euclidean_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_witness_complex_diagram_persistence_from_off_file_example.py>`
+LaTeX
+~~~~~
+
+If a sufficiently complete LaTeX toolchain is available (including dvipng and ghostscript), the LaTeX option of
+matplotlib is enabled for prettier captions (cf.
+`matplotlib text rendering with LaTeX <https://matplotlib.org/3.3.0/tutorials/text/usetex.html>`_).
+It also requires `type1cm` LaTeX package (not detected by matplotlib).
+
+If you are facing issues with LaTeX rendering, like this one:
+
+.. code-block:: none
+
+ Traceback (most recent call last):
+ File "/usr/lib/python3/dist-packages/matplotlib/texmanager.py", line 302, in _run_checked_subprocess
+ report = subprocess.check_output(command,
+ ...
+ ! LaTeX Error: File `type1cm.sty' not found.
+ ...
+
+This is because the LaTeX package is not installed on your system. On Ubuntu systems you can install texlive-full
+(for all LaTeX packages), or more specific packages like texlive-latex-extra, cm-super.
+
+You can still deactivate LaTeX rendering by saying:
+
+.. code-block:: python
+
+ import gudhi
+ gudhi.persistence_graphical_tools._gudhi_matplotlib_use_tex=False
+
PyKeOps
-------
diff --git a/src/python/doc/nerve_gic_complex_user.rst b/src/python/doc/nerve_gic_complex_user.rst
index 0e67fc78..0b820abf 100644
--- a/src/python/doc/nerve_gic_complex_user.rst
+++ b/src/python/doc/nerve_gic_complex_user.rst
@@ -50,7 +50,7 @@ The cover C comes from the preimages of intervals (10 intervals with gain 0.3)
covering the height function (coordinate 2),
which are then refined into their connected components using the triangulation of the .OFF file.
-.. testcode::
+.. code-block:: python
import gudhi
nerve_complex = gudhi.CoverComplex()
@@ -99,9 +99,6 @@ the program output is:
[-0.171433, 0.367393]
[-0.909111, 0.745853]
0 interval(s) in dimension 1:
-
-.. testoutput::
-
Nerve is of dimension 1 - 41 simplices - 21 vertices.
[0]
[1]
diff --git a/src/python/doc/persistence_graphical_tools_user.rst b/src/python/doc/persistence_graphical_tools_user.rst
index b5a38eb1..d95b9d2b 100644
--- a/src/python/doc/persistence_graphical_tools_user.rst
+++ b/src/python/doc/persistence_graphical_tools_user.rst
@@ -90,3 +90,14 @@ If you want more information on a specific dimension, for instance:
gudhi.plot_persistence_density(persistence=pers_diag,
dimension=1, legend=True, axes=axes[1])
plt.show()
+
+LaTeX support
+-------------
+
+If you are facing issues with `LaTeX <installation.html#latex>`_ rendering, you can still deactivate LaTeX rendering by
+saying:
+
+.. code-block:: python
+
+ import gudhi
+ gudhi.persistence_graphical_tools._gudhi_matplotlib_use_tex=False
diff --git a/src/python/doc/persistent_cohomology_sum.inc b/src/python/doc/persistent_cohomology_sum.inc
index a1ff2eee..58e44b8a 100644
--- a/src/python/doc/persistent_cohomology_sum.inc
+++ b/src/python/doc/persistent_cohomology_sum.inc
@@ -6,9 +6,7 @@
| ../../doc/Persistent_cohomology/3DTorus_poch.png | a sequence of (homology) groups, capturing global topological | |
| :figclass: align-center | features like connected components, holes, cavities, etc. Persistent | :Since: GUDHI 2.0.0 |
| | homology studies the evolution -- birth, life and death -- of these | |
- | Rips Persistent Cohomology on a 3D | features when the topological space is changing. Consequently, the | :License: MIT |
- | Torus | theory is essentially composed of three elements: topological spaces, | |
- | | their homology groups and an evolution scheme. | |
+ | Rips Persistent Cohomology on a 3D Torus | features when the topological space is changing. | :License: MIT |
| | | |
| | Computation of persistent cohomology using the algorithm of | |
| | :cite:`DBLP:journals/dcg/SilvaMV11` and | |
diff --git a/src/python/doc/point_cloud_sum.inc b/src/python/doc/point_cloud_sum.inc
index 4315cea6..f955c3ab 100644
--- a/src/python/doc/point_cloud_sum.inc
+++ b/src/python/doc/point_cloud_sum.inc
@@ -3,8 +3,8 @@
+-----------------------------------+---------------------------------------------------------------+-------------------------------------------------------------------+
| | :math:`(x_1, x_2, \ldots, x_d)` | Utilities to process point clouds: read from file, subsample, | :Authors: Vincent Rouvreau, Marc Glisse, Masatoshi Takenouchi |
- | | :math:`(y_1, y_2, \ldots, y_d)` | find neighbors, embed time series in higher dimension, etc. | |
- | | | :Since: GUDHI 2.0.0 |
+ | | :math:`(y_1, y_2, \ldots, y_d)` | find neighbors, embed time series in higher dimension, | |
+ | | estimate a density, etc. | :Since: GUDHI 2.0.0 |
| | | |
| | | :License: MIT (`GPL v3 </licensing/>`_, BSD-3-Clause, Apache-2.0) |
+-----------------------------------+---------------------------------------------------------------+-------------------------------------------------------------------+
diff --git a/src/python/doc/python3-sphinx-build.py b/src/python/doc/python3-sphinx-build.py
deleted file mode 100755
index 84d158cf..00000000
--- a/src/python/doc/python3-sphinx-build.py
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/usr/bin/env python3
-
-"""
-Emulate sphinx-build for python3
-"""
-
-from sys import exit, argv
-from sphinx import main
-
-if __name__ == '__main__':
- exit(main(argv))
diff --git a/src/python/doc/representations_sum.inc b/src/python/doc/representations_sum.inc
index 323a0920..4298aea9 100644
--- a/src/python/doc/representations_sum.inc
+++ b/src/python/doc/representations_sum.inc
@@ -2,7 +2,7 @@
:widths: 30 40 30
+------------------------------------------------------------------+----------------------------------------------------------------+-------------------------------------------------------------+
- | .. figure:: | Vectorizations, distances and kernels that work on persistence | :Author: Mathieu Carrière |
+ | .. figure:: | Vectorizations, distances and kernels that work on persistence | :Author: Mathieu Carrière, Martin Royer |
| img/sklearn-tda.png | diagrams, compatible with scikit-learn. | |
| | | :Since: GUDHI 3.1.0 |
| | | |
diff --git a/src/python/doc/rips_complex_ref.rst b/src/python/doc/rips_complex_ref.rst
index 5f3e46c1..f0582d5c 100644
--- a/src/python/doc/rips_complex_ref.rst
+++ b/src/python/doc/rips_complex_ref.rst
@@ -13,8 +13,6 @@ Rips complex reference manual
.. automethod:: gudhi.RipsComplex.__init__
-.. _weighted-rips-complex-reference-manual:
-
======================================
Weighted Rips complex reference manual
======================================
@@ -25,3 +23,14 @@ Weighted Rips complex reference manual
:show-inheritance:
.. automethod:: gudhi.weighted_rips_complex.WeightedRipsComplex.__init__
+
+=================================
+DTM Rips complex reference manual
+=================================
+
+.. autoclass:: gudhi.dtm_rips_complex.DTMRipsComplex
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+ .. automethod:: gudhi.dtm_rips_complex.DTMRipsComplex.__init__ \ No newline at end of file
diff --git a/src/python/doc/rips_complex_sum.inc b/src/python/doc/rips_complex_sum.inc
index f7580714..c123ea2a 100644
--- a/src/python/doc/rips_complex_sum.inc
+++ b/src/python/doc/rips_complex_sum.inc
@@ -2,18 +2,13 @@
:widths: 30 40 30
+----------------------------------------------------------------+------------------------------------------------------------------------+----------------------------------------------------------------------+
- | .. figure:: | Rips complex is a simplicial complex constructed from a one skeleton | :Authors: Clément Maria, Pawel Dlotko, Vincent Rouvreau, Marc Glisse |
- | ../../doc/Rips_complex/rips_complex_representation.png | graph. | |
+ | .. figure:: | The Vietoris-Rips complex is a simplicial complex built as the | :Authors: Clément Maria, Pawel Dlotko, Vincent Rouvreau, Marc Glisse |
+ | ../../doc/Rips_complex/rips_complex_representation.png | clique-complex of a proximity graph. | |
| :figclass: align-center | | :Since: GUDHI 2.0.0 |
- | | The filtration value of each edge is computed from a user-given | |
- | | distance function and is inserted until a user-given threshold | :License: MIT |
- | | value. | |
+ | | We also provide sparse approximations, to speed-up the computation | |
+ | | of persistent homology, and weighted versions, which are more robust | :License: MIT |
+ | | to outliers. | |
| | | |
- | | This complex can be built from a point cloud and a distance function, | |
- | | or from a distance matrix. | |
- | | | |
- | | Weighted Rips complex constructs a simplicial complex from a distance | |
- | | matrix and weights on vertices. | |
+----------------------------------------------------------------+------------------------------------------------------------------------+----------------------------------------------------------------------+
| * :doc:`rips_complex_user` | * :doc:`rips_complex_ref` |
+----------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------+
diff --git a/src/python/doc/rips_complex_user.rst b/src/python/doc/rips_complex_user.rst
index 819568be..6048cc4e 100644
--- a/src/python/doc/rips_complex_user.rst
+++ b/src/python/doc/rips_complex_user.rst
@@ -378,6 +378,7 @@ Example from a point cloud combined with DistanceToMeasure
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Combining with DistanceToMeasure, one can compute the DTM-filtration of a point set, as in `this notebook <https://github.com/GUDHI/TDA-tutorial/blob/master/Tuto-GUDHI-DTM-filtrations.ipynb>`_.
+Remark that `DTMRipsComplex <rips_complex_user.html#dtm-rips-complex>`_ class provides exactly this function.
.. testcode::
@@ -398,3 +399,24 @@ The output is:
.. testoutput::
[(0, (3.1622776601683795, inf)), (0, (3.1622776601683795, 5.39834563766817)), (0, (3.1622776601683795, 5.39834563766817))]
+
+DTM Rips Complex
+----------------
+
+:class:`~gudhi.dtm_rips_complex.DTMRipsComplex` builds a simplicial complex from a point set or a full distance matrix (in the form of ndarray), as described in the above example.
+This class constructs a weighted Rips complex giving larger weights to outliers, which reduces their impact on the persistence diagram. See `this notebook <https://github.com/GUDHI/TDA-tutorial/blob/master/Tuto-GUDHI-DTM-filtrations.ipynb>`_ for some experiments.
+
+.. testcode::
+
+ import numpy as np
+ from gudhi.dtm_rips_complex import DTMRipsComplex
+ pts = np.array([[2.0, 2.0], [0.0, 1.0], [3.0, 4.0]])
+ dtm_rips = DTMRipsComplex(points=pts, k=2)
+ st = dtm_rips.create_simplex_tree(max_dimension=2)
+ print(st.persistence())
+
+The output is:
+
+.. testoutput::
+
+ [(0, (3.1622776601683795, inf)), (0, (3.1622776601683795, 5.39834563766817)), (0, (3.1622776601683795, 5.39834563766817))]
diff --git a/src/python/doc/tangential_complex_sum.inc b/src/python/doc/tangential_complex_sum.inc
index 22314a2d..2f330a07 100644
--- a/src/python/doc/tangential_complex_sum.inc
+++ b/src/python/doc/tangential_complex_sum.inc
@@ -3,10 +3,10 @@
+----------------------------------------------------------------+------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------+
| .. figure:: | A Tangential Delaunay complex is a simplicial complex designed to | :Author: Clément Jamin |
- | ../../doc/Tangential_complex/tc_examples.png | reconstruct a :math:`k`-dimensional manifold embedded in :math:`d`- | |
- | :figclass: align-center | dimensional Euclidean space. The input is a point sample coming from | :Since: GUDHI 2.0.0 |
- | | an unknown manifold. The running time depends only linearly on the | |
- | | extrinsic dimension :math:`d` and exponentially on the intrinsic | :License: MIT (`GPL v3 </licensing/>`_) |
+ | ../../doc/Tangential_complex/tc_examples.png | reconstruct a :math:`k`-dimensional manifold embedded in | |
+ | :figclass: align-center | :math:`d`-dimensional Euclidean space. The input is a point sample | :Since: GUDHI 2.0.0 |
+ | | coming from an unknown manifold. The running time depends only linearly| |
+ | | on the extrinsic dimension :math:`d` and exponentially on the intrinsic| :License: MIT (`GPL v3 </licensing/>`_) |
| | dimension :math:`k`. | |
| | | :Requires: `Eigen <installation.html#eigen>`_ :math:`\geq` 3.1.0 and `CGAL <installation.html#cgal>`_ :math:`\geq` 4.11.0 |
+----------------------------------------------------------------+------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------+
diff --git a/src/python/example/rips_complex_edge_collapse_example.py b/src/python/example/rips_complex_edge_collapse_example.py
new file mode 100755
index 00000000..b26eb9fc
--- /dev/null
+++ b/src/python/example/rips_complex_edge_collapse_example.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python
+
+import gudhi
+import matplotlib.pyplot as plt
+import time
+
+""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+ See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+ Author(s): Vincent Rouvreau
+
+ Copyright (C) 2016 Inria
+
+ Modification(s):
+ - YYYY/MM Author: Description of the modification
+"""
+
+__author__ = "Vincent Rouvreau"
+__copyright__ = "Copyright (C) 2020 Inria"
+__license__ = "MIT"
+
+
+print("#####################################################################")
+print("RipsComplex (only the one-skeleton) creation from tore3D_300.off file")
+
+off_file = gudhi.__root_source_dir__ + '/data/points/tore3D_300.off'
+point_cloud = gudhi.read_points_from_off_file(off_file = off_file)
+rips_complex = gudhi.RipsComplex(points=point_cloud, max_edge_length=12.0)
+simplex_tree = rips_complex.create_simplex_tree(max_dimension=1)
+print('1. Rips complex is of dimension ', simplex_tree.dimension(), ' - ',
+ simplex_tree.num_simplices(), ' simplices - ',
+ simplex_tree.num_vertices(), ' vertices.')
+
+# Expansion of this one-skeleton would require a lot of memory. Let's collapse it
+start = time.process_time()
+simplex_tree.collapse_edges()
+print('2. Rips complex is of dimension ', simplex_tree.dimension(), ' - ',
+ simplex_tree.num_simplices(), ' simplices - ',
+ simplex_tree.num_vertices(), ' vertices.')
+simplex_tree.expansion(3)
+diag = simplex_tree.persistence()
+print("Collapse, expansion and persistence computation took ", time.process_time() - start, " sec.")
+
+# Use subplots to display diagram and density side by side
+fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12, 5))
+gudhi.plot_persistence_diagram(diag, axes=axes[0])
+axes[0].set_title("Persistence after 1 collapse")
+
+# Collapse can be performed several times. Let's collapse it 3 times
+start = time.process_time()
+simplex_tree.collapse_edges(nb_iterations = 3)
+print('3. Rips complex is of dimension ', simplex_tree.dimension(), ' - ',
+ simplex_tree.num_simplices(), ' simplices - ',
+ simplex_tree.num_vertices(), ' vertices.')
+simplex_tree.expansion(3)
+diag = simplex_tree.persistence()
+print("Collapse, expansion and persistence computation took ", time.process_time() - start, " sec.")
+
+gudhi.plot_persistence_diagram(diag, axes=axes[1])
+axes[1].set_title("Persistence after 3 more collapses")
+
+# Plot the 2 persistence diagrams side to side to check the persistence is the same
+plt.show() \ No newline at end of file
diff --git a/src/python/gudhi/alpha_complex.pyx b/src/python/gudhi/alpha_complex.pyx
index d75e374a..ea128743 100644
--- a/src/python/gudhi/alpha_complex.pyx
+++ b/src/python/gudhi/alpha_complex.pyx
@@ -20,6 +20,7 @@ import os
from gudhi.simplex_tree cimport *
from gudhi.simplex_tree import SimplexTree
+from gudhi import read_points_from_off_file
__author__ = "Vincent Rouvreau"
__copyright__ = "Copyright (C) 2016 Inria"
@@ -27,11 +28,9 @@ __license__ = "GPL v3"
cdef extern from "Alpha_complex_interface.h" namespace "Gudhi":
cdef cppclass Alpha_complex_interface "Gudhi::alpha_complex::Alpha_complex_interface":
- Alpha_complex_interface(vector[vector[double]] points) nogil except +
- # bool from_file is a workaround for cython to find the correct signature
- Alpha_complex_interface(string off_file, bool from_file) nogil except +
+ Alpha_complex_interface(vector[vector[double]] points, bool fast_version, bool exact_version) nogil except +
vector[double] get_point(int vertex) nogil except +
- void create_simplex_tree(Simplex_tree_interface_full_featured* simplex_tree, double max_alpha_square) nogil except +
+ void create_simplex_tree(Simplex_tree_interface_full_featured* simplex_tree, double max_alpha_square, bool default_filtration_value) nogil except +
# AlphaComplex python interface
cdef class AlphaComplex:
@@ -53,10 +52,10 @@ cdef class AlphaComplex:
"""
- cdef Alpha_complex_interface * thisptr
+ cdef Alpha_complex_interface * this_ptr
# Fake constructor that does nothing but documenting the constructor
- def __init__(self, points=None, off_file=''):
+ def __init__(self, points=None, off_file='', precision='safe'):
"""AlphaComplex constructor.
:param points: A list of points in d-Dimension.
@@ -66,58 +65,65 @@ cdef class AlphaComplex:
:param off_file: An OFF file style name.
:type off_file: string
+
+ :param precision: Alpha complex precision can be 'fast', 'safe' or 'exact'. Default is 'safe'.
+ :type precision: string
"""
# The real cython constructor
- def __cinit__(self, points = None, off_file = ''):
+ def __cinit__(self, points = None, off_file = '', precision = 'safe'):
+ assert precision in ['fast', 'safe', 'exact'], "Alpha complex precision can only be 'fast', 'safe' or 'exact'"
+ cdef bool fast = precision == 'fast'
+ cdef bool exact = precision == 'exact'
+
cdef vector[vector[double]] pts
if off_file:
if os.path.isfile(off_file):
- self.thisptr = new Alpha_complex_interface(
- off_file.encode('utf-8'), True)
+ points = read_points_from_off_file(off_file = off_file)
else:
print("file " + off_file + " not found.")
- else:
- if points is None:
- # Empty Alpha construction
- points=[]
- pts = points
- with nogil:
- self.thisptr = new Alpha_complex_interface(pts)
-
+ if points is None:
+ # Empty Alpha construction
+ points=[]
+ pts = points
+ with nogil:
+ self.this_ptr = new Alpha_complex_interface(pts, fast, exact)
def __dealloc__(self):
- if self.thisptr != NULL:
- del self.thisptr
+ if self.this_ptr != NULL:
+ del self.this_ptr
def __is_defined(self):
"""Returns true if AlphaComplex pointer is not NULL.
"""
- return self.thisptr != NULL
+ return self.this_ptr != NULL
def get_point(self, vertex):
- """This function returns the point corresponding to a given vertex.
+ """This function returns the point corresponding to a given vertex from the :class:`~gudhi.SimplexTree`.
:param vertex: The vertex.
:type vertex: int
:rtype: list of float
:returns: the point.
"""
- return self.thisptr.get_point(vertex)
+ return self.this_ptr.get_point(vertex)
- def create_simplex_tree(self, max_alpha_square = float('inf')):
+ def create_simplex_tree(self, max_alpha_square = float('inf'), default_filtration_value = False):
"""
- :param max_alpha_square: The maximum alpha square threshold the
- simplices shall not exceed. Default is set to infinity, and
- there is very little point using anything else since it does
- not save time.
+ :param max_alpha_square: The maximum alpha square threshold the simplices shall not exceed. Default is set to
+ infinity, and there is very little point using anything else since it does not save time.
:type max_alpha_square: float
+ :param default_filtration_value: Set this value to `True` if filtration values are not needed to be computed
+ (will be set to `NaN`). Default value is `False` (which means compute the filtration values).
+ :type default_filtration_value: bool
:returns: A simplex tree created from the Delaunay Triangulation.
:rtype: SimplexTree
"""
stree = SimplexTree()
cdef double mas = max_alpha_square
cdef intptr_t stree_int_ptr=stree.thisptr
+ cdef bool compute_filtration = default_filtration_value == True
with nogil:
- self.thisptr.create_simplex_tree(<Simplex_tree_interface_full_featured*>stree_int_ptr, mas)
+ self.this_ptr.create_simplex_tree(<Simplex_tree_interface_full_featured*>stree_int_ptr,
+ mas, compute_filtration)
return stree
diff --git a/src/python/gudhi/bottleneck.cc b/src/python/gudhi/bottleneck.cc
index 732cb9a8..8a3d669a 100644
--- a/src/python/gudhi/bottleneck.cc
+++ b/src/python/gudhi/bottleneck.cc
@@ -12,24 +12,29 @@
#include <pybind11_diagram_utils.h>
-double bottleneck(Dgm d1, Dgm d2, double epsilon)
+// For compatibility with older versions, we want to support e=None.
+// In C++17, the recommended way is std::optional<double>.
+double bottleneck(Dgm d1, Dgm d2, py::object epsilon)
{
+ double e = (std::numeric_limits<double>::min)();
+ if (!epsilon.is_none()) e = epsilon.cast<double>();
// I *think* the call to request() has to be before releasing the GIL.
auto diag1 = numpy_to_range_of_pairs(d1);
auto diag2 = numpy_to_range_of_pairs(d2);
py::gil_scoped_release release;
- return Gudhi::persistence_diagram::bottleneck_distance(diag1, diag2, epsilon);
+ return Gudhi::persistence_diagram::bottleneck_distance(diag1, diag2, e);
}
PYBIND11_MODULE(bottleneck, m) {
m.attr("__license__") = "GPL v3";
m.def("bottleneck_distance", &bottleneck,
py::arg("diagram_1"), py::arg("diagram_2"),
- py::arg("e") = (std::numeric_limits<double>::min)(),
+ py::arg("e") = py::none(),
R"pbdoc(
- This function returns the point corresponding to a given vertex.
+ Compute the Bottleneck distance between two diagrams.
+ Points at infinity and on the diagonal are supported.
:param diagram_1: The first diagram.
:type diagram_1: numpy array of shape (m,2)
@@ -42,7 +47,7 @@ PYBIND11_MODULE(bottleneck, m) {
bits of the mantissa may be wrong). This version of the algorithm takes
advantage of the limited precision of `double` and is usually a lot
faster to compute, whatever the value of `e`.
- Thus, by default, `e` is the smallest positive double.
+ Thus, by default (`e=None`), `e` is the smallest positive double.
:type e: float
:rtype: float
:returns: the bottleneck distance.
diff --git a/src/python/gudhi/clustering/__init__.py b/src/python/gudhi/clustering/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/src/python/gudhi/clustering/__init__.py
diff --git a/src/python/gudhi/clustering/_tomato.cc b/src/python/gudhi/clustering/_tomato.cc
new file mode 100644
index 00000000..a76a2c3a
--- /dev/null
+++ b/src/python/gudhi/clustering/_tomato.cc
@@ -0,0 +1,277 @@
+/* This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+ * See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+ * Author(s): Marc Glisse
+ *
+ * Copyright (C) 2020 Inria
+ *
+ * Modification(s):
+ * - YYYY/MM Author: Description of the modification
+ */
+
+#include <boost/container/flat_map.hpp>
+#include <boost/pending/disjoint_sets.hpp>
+#include <boost/property_map/property_map.hpp>
+#include <boost/property_map/transform_value_property_map.hpp>
+#include <boost/property_map/vector_property_map.hpp>
+#include <boost/property_map/function_property_map.hpp>
+#include <boost/iterator/counting_iterator.hpp>
+#include <boost/range/irange.hpp>
+#include <boost/range/adaptor/transformed.hpp>
+#include <vector>
+#include <unordered_map>
+#include <pybind11/pybind11.h>
+#include <pybind11/numpy.h>
+#include <iostream>
+
+namespace py = pybind11;
+
+template <class T, class = std::enable_if_t<std::is_integral<T>::value>>
+int getint(int i) {
+ return i;
+}
+// Gcc-8 has a bug that breaks this version, fixed in gcc-9
+// template<class T, class=decltype(std::declval<T>().template cast<int>())>
+// int getint(T i){return i.template cast<int>();}
+template <class T>
+auto getint(T i) -> decltype(i.template cast<int>()) {
+ return i.template cast<int>();
+}
+
+// Raw clusters are clusters obtained through single-linkage, no merging.
+
+typedef int Point_index;
+typedef int Cluster_index;
+struct Merge {
+ Cluster_index first, second;
+ double persist;
+};
+
+template <class Neighbors, class Density, class Order, class ROrder>
+auto tomato(Point_index num_points, Neighbors const& neighbors, Density const& density, Order const& order,
+ ROrder const& rorder) {
+ // point index --> index of raw cluster it belongs to
+ std::vector<Cluster_index> raw_cluster;
+ raw_cluster.reserve(num_points);
+ // cluster index --> index of top point in the cluster
+ Cluster_index n_raw_clusters = 0; // current number of raw clusters seen
+ //
+ std::vector<Merge> merges;
+ struct Data {
+ Cluster_index parent;
+ int rank;
+ Point_index max;
+ }; // information on a cluster
+ std::vector<Data> ds_base;
+ // boost::vector_property_map does resize(size+1) for every new element, don't use it
+ auto ds_data =
+ boost::make_function_property_map<std::size_t>([&ds_base](std::size_t n) -> Data& { return ds_base[n]; });
+ auto ds_parent =
+ boost::make_transform_value_property_map([](auto& p) -> Cluster_index& { return p.parent; }, ds_data);
+ auto ds_rank = boost::make_transform_value_property_map([](auto& p) -> int& { return p.rank; }, ds_data);
+ boost::disjoint_sets<decltype(ds_rank), decltype(ds_parent)> ds(
+ ds_rank, ds_parent); // on the clusters, not directly the points
+ std::vector<std::array<double, 2>> persistence; // diagram (finite points)
+ boost::container::flat_map<Cluster_index, Cluster_index>
+ adj_clusters; // first: the merged cluster, second: the raw cluster
+ // we only care about the raw cluster, we could use a vector to store the second, store first into a set, and only
+ // insert in the vector if merged is absent from the set
+
+ for (Point_index i = 0; i < num_points; ++i) {
+ // auto&& ngb = neighbors[order[i]];
+ // TODO: have a specialization where we don't need python types and py::cast
+ // TODO: move py::cast and getint into Neighbors
+ py::object ngb = neighbors[py::cast(order[i])]; // auto&& also seems to work
+ adj_clusters.clear();
+ Point_index j = i; // highest neighbor
+ // for(Point_index k : ngb)
+ for (auto k_py : ngb) {
+ Point_index k = rorder[getint(k_py)];
+ if (k >= i || k < 0) // ???
+ continue;
+ if (k < j) j = k;
+ Cluster_index rk = raw_cluster[k];
+ adj_clusters.emplace(ds.find_set(rk), rk);
+ // does not insert if ck=ds.find_set(rk) already seen
+ // which rk we keep from those with the same ck is arbitrary
+ }
+ assert((Point_index)raw_cluster.size() == i);
+ if (i == j) { // local maximum -> new cluster
+ Cluster_index c = n_raw_clusters++;
+ ds_base.emplace_back(); // could be integrated in ds_data, but then we would check the size for every access
+ ds.make_set(c);
+ ds_base[c].max = i; // max
+ raw_cluster.push_back(c);
+ continue;
+ } else { // add i to the cluster of j
+ assert(j < i);
+ raw_cluster.push_back(raw_cluster[j]);
+ // FIXME: we are adding point i to the raw cluster of j, but that one might not be in adj_clusters, so we may
+ // merge clusters A and B through a point of C. It is strange, but I don't know if it can really cause problems.
+ // We could just not set j at all and use arbitrarily the first element of adj_clusters.
+ }
+ // possibly merge clusters
+ // we could sort, in case there are several merges, but that doesn't seem so useful
+ Cluster_index rj = raw_cluster[j];
+ Cluster_index cj = ds.find_set(rj);
+ Cluster_index orig_cj = cj;
+ for (auto ckk : adj_clusters) {
+ Cluster_index rk = ckk.second;
+ Cluster_index ck = ckk.first;
+ if (ck == orig_cj) continue;
+ assert(ck == ds.find_set(rk));
+ Point_index j = ds_base[cj].max;
+ Point_index k = ds_base[ck].max;
+ Point_index young = std::max(j, k);
+ Point_index old = std::min(j, k);
+ auto d_young = density[order[young]];
+ auto d_i = density[order[i]];
+ assert(d_young >= d_i);
+ // Always merge (the non-hierarchical algorithm would only conditionally merge here
+ persistence.push_back({d_young, d_i});
+ assert(ds.find_set(rj) != ds.find_set(rk));
+ ds.link(cj, ck);
+ cj = ds.find_set(cj);
+ ds_base[cj].max = old; // just one parent, no need for find_set
+ // record the raw clusters, we don't know what will have already been merged.
+ merges.push_back({rj, rk, d_young - d_i});
+ }
+ }
+ {
+ boost::counting_iterator<int> b(0), e(ds_base.size());
+ ds.compress_sets(b, e);
+ // Now we stop using find_sets and look at the parent directly
+ // rank is reused to rename clusters contiguously 0, 1, etc
+ }
+ // Maximum for each connected component
+ std::vector<double> max_cc;
+ for (Cluster_index i = 0; i < n_raw_clusters; ++i) {
+ if (ds_base[i].parent == i) max_cc.push_back(density[order[ds_base[i].max]]);
+ }
+ assert((Cluster_index)(merges.size() + max_cc.size()) == n_raw_clusters);
+
+ // TODO: create a "noise" cluster, merging all those not prominent enough?
+
+ // Replay the merges, in increasing order of prominence, to build the hierarchy
+ std::sort(merges.begin(), merges.end(), [](Merge const& a, Merge const& b) { return a.persist < b.persist; });
+ std::vector<std::array<Cluster_index, 2>> children;
+ children.reserve(merges.size());
+ {
+ struct Dat {
+ Cluster_index parent;
+ int rank;
+ Cluster_index name;
+ };
+ std::vector<Dat> ds_bas(2 * n_raw_clusters - 1);
+ Cluster_index i;
+ auto ds_dat =
+ boost::make_function_property_map<std::size_t>([&ds_bas](std::size_t n) -> Dat& { return ds_bas[n]; });
+ auto ds_par = boost::make_transform_value_property_map([](auto& p) -> Cluster_index& { return p.parent; }, ds_dat);
+ auto ds_ran = boost::make_transform_value_property_map([](auto& p) -> int& { return p.rank; }, ds_dat);
+ boost::disjoint_sets<decltype(ds_ran), decltype(ds_par)> ds(ds_ran, ds_par);
+ for (i = 0; i < n_raw_clusters; ++i) {
+ ds.make_set(i);
+ ds_bas[i].name = i;
+ }
+ for (Merge const& m : merges) {
+ Cluster_index j = ds.find_set(m.first);
+ Cluster_index k = ds.find_set(m.second);
+ assert(j != k);
+ children.push_back({ds_bas[j].name, ds_bas[k].name});
+ ds.make_set(i);
+ ds.link(i, j);
+ ds.link(i, k);
+ ds_bas[ds.find_set(i)].name = i;
+ ++i;
+ }
+ }
+
+ std::vector<Cluster_index> raw_cluster_ordered(num_points);
+ for (int i = 0; i < num_points; ++i) raw_cluster_ordered[i] = raw_cluster[rorder[i]];
+ // return raw_cluster, children, persistence
+ // TODO avoid copies: https://github.com/pybind/pybind11/issues/1042
+ return py::make_tuple(py::array(raw_cluster_ordered.size(), raw_cluster_ordered.data()),
+ py::array(children.size(), children.data()), py::array(persistence.size(), persistence.data()),
+ py::array(max_cc.size(), max_cc.data()));
+}
+
+auto merge(py::array_t<Cluster_index, py::array::c_style> children, Cluster_index n_leaves, Cluster_index n_final) {
+ if (n_final > n_leaves) {
+ std::cerr << "The number of clusters required " << n_final << " is larger than the number of mini-clusters " << n_leaves << '\n';
+ n_final = n_leaves; // or return something special and let Tomato use leaf_labels_?
+ }
+ py::buffer_info cbuf = children.request();
+ if ((cbuf.ndim != 2 || cbuf.shape[1] != 2) && (cbuf.ndim != 1 || cbuf.shape[0] != 0))
+ throw std::runtime_error("internal error: children have to be (n,2) or empty");
+ const int n_merges = cbuf.shape[0];
+ Cluster_index* d = (Cluster_index*)cbuf.ptr;
+ if (n_merges + n_final < n_leaves) {
+ std::cerr << "The number of clusters required " << n_final << " is smaller than the number of connected components " << n_leaves - n_merges << '\n';
+ n_final = n_leaves - n_merges;
+ }
+ struct Dat {
+ Cluster_index parent;
+ int rank;
+ int name;
+ };
+ std::vector<Dat> ds_bas(2 * n_leaves - 1);
+ auto ds_dat = boost::make_function_property_map<std::size_t>([&ds_bas](std::size_t n) -> Dat& { return ds_bas[n]; });
+ auto ds_par = boost::make_transform_value_property_map([](auto& p) -> Cluster_index& { return p.parent; }, ds_dat);
+ auto ds_ran = boost::make_transform_value_property_map([](auto& p) -> int& { return p.rank; }, ds_dat);
+ boost::disjoint_sets<decltype(ds_ran), decltype(ds_par)> ds(ds_ran, ds_par);
+ Cluster_index i;
+ for (i = 0; i < n_leaves; ++i) {
+ ds.make_set(i);
+ ds_bas[i].name = -1;
+ }
+ for (Cluster_index m = 0; m < n_leaves - n_final; ++m) {
+ Cluster_index j = ds.find_set(d[2 * m]);
+ Cluster_index k = ds.find_set(d[2 * m + 1]);
+ assert(j != k);
+ ds.make_set(i);
+ ds.link(i, j);
+ ds.link(i, k);
+ ++i;
+ }
+ Cluster_index next_cluster_name = 0;
+ std::vector<Cluster_index> ret;
+ ret.reserve(n_leaves);
+ for (Cluster_index j = 0; j < n_leaves; ++j) {
+ Cluster_index k = ds.find_set(j);
+ if (ds_bas[k].name == -1) ds_bas[k].name = next_cluster_name++;
+ ret.push_back(ds_bas[k].name);
+ }
+ return py::array(ret.size(), ret.data());
+}
+
+// TODO: Do a special version when ngb is a numpy array, where we can cast to int[k][n] ?
+// py::isinstance<py::array_t<std::int32_t>> (ou py::isinstance<py::array> et tester dtype) et flags&c_style
+// ou overload (en virant forcecast?)
+// aussi le faire au cas où on n'aurait pas un tableau, mais où chaque liste de voisins serait un tableau ?
+auto hierarchy(py::handle ngb, py::array_t<double, py::array::c_style | py::array::forcecast> density) {
+ // used to be py::iterable ngb, but that's inconvenient if it doesn't come pre-sorted
+ // use py::handle and check if [] (aka __getitem__) works? But then we need to build an object to pass it to []
+ // (I _think_ handle is ok and we don't need object here)
+ py::buffer_info wbuf = density.request();
+ if (wbuf.ndim != 1) throw std::runtime_error("density must be 1D");
+ const int n = wbuf.shape[0];
+ double* d = (double*)wbuf.ptr;
+ // Vector { 0, 1, ..., n-1 }
+ std::vector<Point_index> order(boost::counting_iterator<Point_index>(0), boost::counting_iterator<Point_index>(n));
+ // Permutation of the indices to get points in decreasing order of density
+ std::sort(std::begin(order), std::end(order), [=](Point_index i, Point_index j) { return d[i] > d[j]; });
+ // Inverse permutation
+ std::vector<Point_index> rorder(n);
+ for (Point_index i : boost::irange(0, n)) rorder[order[i]] = i;
+ // Used as:
+ // order[i] is the index of the point with i-th largest density
+ // rorder[i] is the rank of the i-th point in order of decreasing density
+ // TODO: put a wrapper on ngb and d so we don't need to pass (r)order (there is still the issue of reordering the
+ // output)
+ return tomato(n, ngb, d, order, rorder);
+}
+
+PYBIND11_MODULE(_tomato, m) {
+ m.doc() = "Internals of tomato clustering";
+ m.def("hierarchy", &hierarchy, "does the clustering");
+ m.def("merge", &merge, "merge clusters");
+}
diff --git a/src/python/gudhi/clustering/tomato.py b/src/python/gudhi/clustering/tomato.py
new file mode 100644
index 00000000..fbba3cc8
--- /dev/null
+++ b/src/python/gudhi/clustering/tomato.py
@@ -0,0 +1,321 @@
+# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+# Author(s): Marc Glisse
+#
+# Copyright (C) 2020 Inria
+#
+# Modification(s):
+# - YYYY/MM Author: Description of the modification
+
+import numpy
+from ..point_cloud.knn import KNearestNeighbors
+from ..point_cloud.dtm import DTMDensity
+from ._tomato import *
+
+# The fit/predict interface is not so well suited...
+
+
+class Tomato:
+ """
+ This clustering algorithm needs a neighborhood graph on the points, and an estimation of the density at each point.
+ A few possible graph constructions and density estimators are provided for convenience, but it is perfectly natural
+ to provide your own.
+
+ :Requires: `SciPy <installation.html#scipy>`_, `Scikit-learn <installation.html#scikit-learn>`_ or others
+ (see :class:`~gudhi.point_cloud.knn.KNearestNeighbors`) in function of the options.
+
+ Attributes
+ ----------
+ n_clusters_: int
+ The number of clusters. Writing to it automatically adjusts `labels_`.
+ merge_threshold_: float
+ minimum prominence of a cluster so it doesn't get merged. Writing to it automatically adjusts `labels_`.
+ n_leaves_: int
+ number of leaves (unstable clusters) in the hierarchical tree
+ leaf_labels_: ndarray of shape (n_samples,)
+ cluster labels for each point, at the very bottom of the hierarchy
+ labels_: ndarray of shape (n_samples,)
+ cluster labels for each point, after merging
+ diagram_: ndarray of shape (`n_leaves_`, 2)
+ persistence diagram (only the finite points)
+ max_weight_per_cc_: ndarray of shape (n_connected_components,)
+ maximum of the density function on each connected component. This corresponds to the abscissa of infinite
+ points in the diagram
+ children_: ndarray of shape (`n_leaves_`-n_connected_components, 2)
+ The children of each non-leaf node. Values less than `n_leaves_` correspond to leaves of the tree.
+ A node i greater than or equal to `n_leaves_` is a non-leaf node and has children children_[i - `n_leaves_`].
+ Alternatively at the i-th iteration, children[i][0] and children[i][1] are merged to form node `n_leaves_` + i
+ weights_: ndarray of shape (n_samples,)
+ weights of the points, as computed by the density estimator or provided by the user
+ params_: dict
+ Parameters like metric, etc
+ """
+
+ def __init__(
+ self,
+ graph_type="knn",
+ density_type="logDTM",
+ n_clusters=None,
+ merge_threshold=None,
+ # eliminate_threshold=None,
+ # eliminate_threshold (float): minimum max weight of a cluster so it doesn't get eliminated
+ **params
+ ):
+ """
+ Args:
+ graph_type (str): 'manual', 'knn' or 'radius'. Default is 'knn'.
+ density_type (str): 'manual', 'DTM', 'logDTM', 'KDE' or 'logKDE'. When you have many points,
+ 'KDE' and 'logKDE' tend to be slower. Default is 'logDTM'.
+ metric (str|Callable): metric used when calculating the distance between instances in a feature array.
+ Defaults to Minkowski of parameter p.
+ kde_params (dict): if density_type is 'KDE' or 'logKDE', additional parameters passed directly to
+ sklearn.neighbors.KernelDensity.
+ k (int): number of neighbors for a knn graph (including the vertex itself). Defaults to 10.
+ k_DTM (int): number of neighbors for the DTM density estimation (including the vertex itself).
+ Defaults to k.
+ r (float): size of a neighborhood if graph_type is 'radius'. Also used as default bandwidth in kde_params.
+ eps (float): (1+eps) approximation factor when computing distances (ignored in many cases).
+ n_clusters (int): number of clusters requested. Defaults to None, i.e. no merging occurs and we get
+ the maximal number of clusters.
+ merge_threshold (float): minimum prominence of a cluster so it doesn't get merged.
+ symmetrize_graph (bool): whether we should add edges to make the neighborhood graph symmetric.
+ This can be useful with k-NN for small k. Defaults to false.
+ p (float): norm L^p on input points. Defaults to 2.
+ q (float): order used to compute the distance to measure. Defaults to dim.
+ Beware that when the dimension is large, this can easily cause overflows.
+ dim (float): final exponent in DTM density estimation, representing the dimension. Defaults to the
+ dimension, or 2 when the dimension cannot be read from the input (metric is "precomputed").
+ n_jobs (int): Number of jobs to schedule for parallel processing on the CPU.
+ If -1 is given all processors are used. Default: 1.
+ params: extra parameters are passed to :class:`~gudhi.point_cloud.knn.KNearestNeighbors` and
+ :class:`~gudhi.point_cloud.dtm.DTMDensity`.
+ """
+ # Should metric='precomputed' mean input_type='distance_matrix'?
+ # Should we be able to pass metric='minkowski' (what None does currently)?
+ self.graph_type_ = graph_type
+ self.density_type_ = density_type
+ self.params_ = params
+ self.__n_clusters = n_clusters
+ self.__merge_threshold = merge_threshold
+ # self.eliminate_threshold_ = eliminate_threshold
+ if n_clusters and merge_threshold:
+ raise ValueError("Cannot specify both a merge threshold and a number of clusters")
+
+ def fit(self, X, y=None, weights=None):
+ """
+ Args:
+ X ((n,d)-array of float|(n,n)-array of float|Sequence[Iterable[int]]): coordinates of the points,
+ or distance matrix (full, not just a triangle) if metric is "precomputed", or list of neighbors
+ for each point (points are represented by their index, starting from 0) if graph_type is "manual".
+ The number of points is currently limited to about 2 billion.
+ weights (ndarray of shape (n_samples)): if density_type is 'manual', a density estimate at each point
+ y: Not used, present here for API consistency with scikit-learn by convention.
+ """
+ # TODO: First detect if this is a new call with the same data (only threshold changed?)
+ # TODO: less code duplication (subroutines?), less spaghetti, but don't compute neighbors twice if not needed. Clear error message for missing or contradictory parameters.
+ if weights is not None:
+ density_type = "manual"
+ else:
+ density_type = self.density_type_
+ if density_type == "manual":
+ raise ValueError("If density_type is 'manual', you must provide weights to fit()")
+
+ if self.graph_type_ == "manual":
+ self.neighbors_ = X
+ # FIXME: uniformize "message 'option'" vs 'message "option"'
+ assert density_type == "manual", 'If graph_type is "manual", density_type must be as well'
+ else:
+ metric = self.params_.get("metric", "minkowski")
+ if metric != "precomputed":
+ self.points_ = X
+
+ # Slight complication to avoid computing knn twice.
+ need_knn = 0
+ need_knn_ngb = False
+ need_knn_dist = False
+ if self.graph_type_ == "knn":
+ k_graph = self.params_.get("k", 10)
+ # If X has fewer than k points...
+ if k_graph > len(X):
+ k_graph = len(X)
+ need_knn = k_graph
+ need_knn_ngb = True
+ if self.density_type_ in ["DTM", "logDTM"]:
+ k = self.params_.get("k", 10)
+ k_DTM = self.params_.get("k_DTM", k)
+ # If X has fewer than k points...
+ if k_DTM > len(X):
+ k_DTM = len(X)
+ need_knn = max(need_knn, k_DTM)
+ need_knn_dist = True
+ # if we ask for more neighbors for the graph than the DTM, getting the distances is a slight waste,
+ # but it looks negligible
+ if need_knn > 0:
+ knn_args = dict(self.params_)
+ knn_args["k"] = need_knn
+ knn = KNearestNeighbors(return_index=need_knn_ngb, return_distance=need_knn_dist, **knn_args).fit_transform(
+ X
+ )
+ if need_knn_ngb:
+ if need_knn_dist:
+ self.neighbors_ = knn[0][:, 0:k_graph]
+ knn_dist = knn[1]
+ else:
+ self.neighbors_ = knn
+ elif need_knn_dist:
+ knn_dist = knn
+ if self.density_type_ in ["DTM", "logDTM"]:
+ dim = self.params_.get("dim")
+ if dim is None:
+ dim = len(X[0]) if metric != "precomputed" else 2
+ q = self.params_.get("q", dim)
+ weights = DTMDensity(k=k_DTM, metric="neighbors", dim=dim, q=q).fit_transform(knn_dist)
+ if self.density_type_ == "logDTM":
+ weights = numpy.log(weights)
+
+ if self.graph_type_ == "radius":
+ if metric in ["minkowski", "euclidean", "manhattan", "chebyshev"]:
+ from scipy.spatial import cKDTree
+
+ tree = cKDTree(X)
+ # TODO: handle "l1" and "l2" aliases?
+ p = self.params_.get("p")
+ if metric == "euclidean":
+ assert p is None or p == 2, "p=" + str(p) + " is not consistent with metric='euclidean'"
+ p = 2
+ elif metric == "manhattan":
+ assert p is None or p == 1, "p=" + str(p) + " is not consistent with metric='manhattan'"
+ p = 1
+ elif metric == "chebyshev":
+ assert p is None or p == numpy.inf, "p=" + str(p) + " is not consistent with metric='chebyshev'"
+ p = numpy.inf
+ elif p is None:
+ p = 2 # the default
+ eps = self.params_.get("eps", 0)
+ self.neighbors_ = tree.query_ball_tree(tree, r=self.params_["r"], p=p, eps=eps)
+
+ # TODO: sklearn's NearestNeighbors.radius_neighbors can handle more metrics efficiently via its BallTree
+ # (don't bother with the _graph variant, it just calls radius_neighbors).
+ elif metric != "precomputed":
+ from sklearn.metrics import pairwise_distances
+
+ X = pairwise_distances(X, metric=metric, n_jobs=self.params_.get("n_jobs"))
+ metric = "precomputed"
+
+ if metric == "precomputed":
+ # TODO: parallelize? May not be worth it.
+ X = numpy.asarray(X)
+ r = self.params_["r"]
+ self.neighbors_ = [numpy.flatnonzero(l <= r) for l in X]
+
+ if self.density_type_ in {"KDE", "logKDE"}:
+ # Slow...
+ assert (
+ self.graph_type_ != "manual" and metric != "precomputed"
+ ), "Scikit-learn's KernelDensity requires point coordinates"
+ kde_params = dict(self.params_.get("kde_params", dict()))
+ kde_params.setdefault("metric", metric)
+ r = self.params_.get("r")
+ if r is not None:
+ kde_params.setdefault("bandwidth", r)
+ # Should we default rtol to eps?
+ from sklearn.neighbors import KernelDensity
+
+ weights = KernelDensity(**kde_params).fit(self.points_).score_samples(self.points_)
+ if self.density_type_ == "KDE":
+ weights = numpy.exp(weights)
+
+ # TODO: do it at the C++ level and/or in parallel if this is too slow?
+ if self.params_.get("symmetrize_graph"):
+ self.neighbors_ = [set(line) for line in self.neighbors_]
+ for i, line in enumerate(self.neighbors_):
+ line.discard(i)
+ for j in line:
+ self.neighbors_[j].add(i)
+
+ self.weights_ = weights
+ # This is where the main computation happens
+ self.leaf_labels_, self.children_, self.diagram_, self.max_weight_per_cc_ = hierarchy(self.neighbors_, weights)
+ self.n_leaves_ = len(self.max_weight_per_cc_) + len(self.children_)
+ assert self.leaf_labels_.max() + 1 == len(self.max_weight_per_cc_) + len(self.children_)
+ # TODO: deduplicate this code with the setters below
+ if self.__merge_threshold:
+ assert not self.__n_clusters
+ self.__n_clusters = numpy.count_nonzero(
+ self.diagram_[:, 0] - self.diagram_[:, 1] > self.__merge_threshold
+ ) + len(self.max_weight_per_cc_)
+ if self.__n_clusters:
+ # TODO: set corresponding merge_threshold?
+ renaming = merge(self.children_, self.n_leaves_, self.__n_clusters)
+ self.labels_ = renaming[self.leaf_labels_]
+ # In case the user asked for something impossible.
+ # TODO: check for impossible situations before calling merge.
+ self.__n_clusters = self.labels_.max() + 1
+ else:
+ self.labels_ = self.leaf_labels_
+ self.__n_clusters = self.n_leaves_
+ return self
+
+ def fit_predict(self, X, y=None, weights=None):
+ """
+ Equivalent to fit(), and returns the `labels_`.
+ """
+ return self.fit(X, y, weights).labels_
+
+ # TODO: add argument k or threshold? Have a version where you can click and it shows the line and the corresponding k?
+ def plot_diagram(self):
+ """
+ """
+ import matplotlib.pyplot as plt
+
+ l = self.max_weight_per_cc_.min()
+ r = self.max_weight_per_cc_.max()
+ if self.diagram_.size > 0:
+ plt.plot(self.diagram_[:, 0], self.diagram_[:, 1], "ro")
+ l = min(l, self.diagram_[:, 1].min())
+ r = max(r, self.diagram_[:, 0].max())
+ if l == r:
+ if l > 0:
+ l, r = 0.9 * l, 1.1 * r
+ elif l < 0:
+ l, r = 1.1 * l, 0.9 * r
+ else:
+ l, r = -1.0, 1.0
+ plt.plot([l, r], [l, r])
+ plt.plot(
+ self.max_weight_per_cc_, numpy.full(self.max_weight_per_cc_.shape, 1.1 * l - 0.1 * r), "ro", color="green"
+ )
+ plt.show()
+
+ # Use set_params instead?
+ @property
+ def n_clusters_(self):
+ return self.__n_clusters
+
+ @n_clusters_.setter
+ def n_clusters_(self, n_clusters):
+ if n_clusters == self.__n_clusters:
+ return
+ self.__n_clusters = n_clusters
+ self.__merge_threshold = None
+ if hasattr(self, "leaf_labels_"):
+ renaming = merge(self.children_, self.n_leaves_, self.__n_clusters)
+ self.labels_ = renaming[self.leaf_labels_]
+ # In case the user asked for something impossible
+ self.__n_clusters = self.labels_.max() + 1
+
+ @property
+ def merge_threshold_(self):
+ return self.__merge_threshold
+
+ @merge_threshold_.setter
+ def merge_threshold_(self, merge_threshold):
+ if merge_threshold == self.__merge_threshold:
+ return
+ if hasattr(self, "leaf_labels_"):
+ self.n_clusters_ = numpy.count_nonzero(self.diagram_[:, 0] - self.diagram_[:, 1] > merge_threshold) + len(
+ self.max_weight_per_cc_
+ )
+ else:
+ self.__n_clusters = None
+ self.__merge_threshold = merge_threshold
diff --git a/src/python/gudhi/cubical_complex.pyx b/src/python/gudhi/cubical_complex.pyx
index ca979eda..28fbe3af 100644
--- a/src/python/gudhi/cubical_complex.pyx
+++ b/src/python/gudhi/cubical_complex.pyx
@@ -27,20 +27,20 @@ __license__ = "MIT"
cdef extern from "Cubical_complex_interface.h" namespace "Gudhi":
cdef cppclass Bitmap_cubical_complex_base_interface "Gudhi::Cubical_complex::Cubical_complex_interface<>":
- Bitmap_cubical_complex_base_interface(vector[unsigned] dimensions, vector[double] top_dimensional_cells)
- Bitmap_cubical_complex_base_interface(string perseus_file)
- int num_simplices()
- int dimension()
+ Bitmap_cubical_complex_base_interface(vector[unsigned] dimensions, vector[double] top_dimensional_cells) nogil
+ Bitmap_cubical_complex_base_interface(string perseus_file) nogil
+ int num_simplices() nogil
+ int dimension() nogil
cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi":
cdef cppclass Cubical_complex_persistence_interface "Gudhi::Persistent_cohomology_interface<Gudhi::Cubical_complex::Cubical_complex_interface<>>":
- Cubical_complex_persistence_interface(Bitmap_cubical_complex_base_interface * st, bool persistence_dim_max)
- void compute_persistence(int homology_coeff_field, double min_persistence)
- vector[pair[int, pair[double, double]]] get_persistence()
- vector[vector[int]] cofaces_of_cubical_persistence_pairs()
- vector[int] betti_numbers()
- vector[int] persistent_betti_numbers(double from_value, double to_value)
- vector[pair[double,double]] intervals_in_dimension(int dimension)
+ Cubical_complex_persistence_interface(Bitmap_cubical_complex_base_interface * st, bool persistence_dim_max) nogil
+ void compute_persistence(int homology_coeff_field, double min_persistence) nogil
+ vector[pair[int, pair[double, double]]] get_persistence() nogil
+ vector[vector[int]] cofaces_of_cubical_persistence_pairs() nogil
+ vector[int] betti_numbers() nogil
+ vector[int] persistent_betti_numbers(double from_value, double to_value) nogil
+ vector[pair[double,double]] intervals_in_dimension(int dimension) nogil
# CubicalComplex python interface
cdef class CubicalComplex:
@@ -80,7 +80,7 @@ cdef class CubicalComplex:
perseus_file=''):
if ((dimensions is not None) and (top_dimensional_cells is not None)
and (perseus_file == '')):
- self.thisptr = new Bitmap_cubical_complex_base_interface(dimensions, top_dimensional_cells)
+ self._construct_from_cells(dimensions, top_dimensional_cells)
elif ((dimensions is None) and (top_dimensional_cells is not None)
and (perseus_file == '')):
top_dimensional_cells = np.array(top_dimensional_cells,
@@ -88,11 +88,11 @@ cdef class CubicalComplex:
order = 'F')
dimensions = top_dimensional_cells.shape
top_dimensional_cells = top_dimensional_cells.ravel(order='F')
- self.thisptr = new Bitmap_cubical_complex_base_interface(dimensions, top_dimensional_cells)
+ self._construct_from_cells(dimensions, top_dimensional_cells)
elif ((dimensions is None) and (top_dimensional_cells is None)
and (perseus_file != '')):
if os.path.isfile(perseus_file):
- self.thisptr = new Bitmap_cubical_complex_base_interface(perseus_file.encode('utf-8'))
+ self._construct_from_file(perseus_file.encode('utf-8'))
else:
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT),
perseus_file)
@@ -101,6 +101,14 @@ cdef class CubicalComplex:
"top_dimensional_cells or from a Perseus-style file name.",
file=sys.stderr)
+ def _construct_from_cells(self, vector[unsigned] dimensions, vector[double] top_dimensional_cells):
+ with nogil:
+ self.thisptr = new Bitmap_cubical_complex_base_interface(dimensions, top_dimensional_cells)
+
+ def _construct_from_file(self, string filename):
+ with nogil:
+ self.thisptr = new Bitmap_cubical_complex_base_interface(filename)
+
def __dealloc__(self):
if self.thisptr != NULL:
del self.thisptr
@@ -151,8 +159,11 @@ cdef class CubicalComplex:
if self.pcohptr != NULL:
del self.pcohptr
assert self.__is_defined()
- self.pcohptr = new Cubical_complex_persistence_interface(self.thisptr, True)
- self.pcohptr.compute_persistence(homology_coeff_field, min_persistence)
+ cdef int field = homology_coeff_field
+ cdef double minp = min_persistence
+ with nogil:
+ self.pcohptr = new Cubical_complex_persistence_interface(self.thisptr, 1)
+ self.pcohptr.compute_persistence(field, minp)
def persistence(self, homology_coeff_field=11, min_persistence=0):
"""This function computes and returns the persistence of the complex.
@@ -204,19 +215,20 @@ cdef class CubicalComplex:
cdef vector[vector[int]] persistence_result
output = [[],[]]
- persistence_result = self.pcohptr.cofaces_of_cubical_persistence_pairs()
+ with nogil:
+ persistence_result = self.pcohptr.cofaces_of_cubical_persistence_pairs()
pr = np.array(persistence_result)
ess_ind = np.argwhere(pr[:,2] == -1)[:,0]
ess = pr[ess_ind]
- max_h = max(ess[:,0])+1
+ max_h = max(ess[:,0])+1 if len(ess) > 0 else 0
for h in range(max_h):
hidxs = np.argwhere(ess[:,0] == h)[:,0]
output[1].append(ess[hidxs][:,1])
reg_ind = np.setdiff1d(np.array(range(len(pr))), ess_ind)
reg = pr[reg_ind]
- max_h = max(reg[:,0])+1
+ max_h = max(reg[:,0])+1 if len(reg) > 0 else 0
for h in range(max_h):
hidxs = np.argwhere(reg[:,0] == h)[:,0]
output[0].append(reg[hidxs][:,1:])
diff --git a/src/python/gudhi/dtm_rips_complex.py b/src/python/gudhi/dtm_rips_complex.py
new file mode 100644
index 00000000..63c9b138
--- /dev/null
+++ b/src/python/gudhi/dtm_rips_complex.py
@@ -0,0 +1,51 @@
+# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+# Author(s): Yuichi Ike, Raphaël Tinarrage
+#
+# Copyright (C) 2020 Inria, Copyright (C) 2020 FUjitsu Laboratories Ltd.
+#
+# Modification(s):
+# - YYYY/MM Author: Description of the modification
+
+
+from gudhi.weighted_rips_complex import WeightedRipsComplex
+from gudhi.point_cloud.dtm import DistanceToMeasure
+from scipy.spatial.distance import cdist
+
+class DTMRipsComplex(WeightedRipsComplex):
+ """
+ Class to generate a DTM Rips complex from a distance matrix or a point set,
+ in the way described in :cite:`dtmfiltrations`.
+ Remark that all the filtration values are doubled compared to the definition in the paper
+ for the consistency with RipsComplex.
+ :Requires: `SciPy <installation.html#scipy>`_
+ """
+ def __init__(self,
+ points=None,
+ distance_matrix=None,
+ k=1,
+ q=2,
+ max_filtration=float('inf')):
+ """
+ Args:
+ points (numpy.ndarray): array of points.
+ distance_matrix (numpy.ndarray): full distance matrix.
+ k (int): number of neighbors for the computation of DTM. Defaults to 1, which is equivalent to the usual Rips complex.
+ q (float): order used to compute the distance to measure. Defaults to 2.
+ max_filtration (float): specifies the maximal filtration value to be considered.
+ """
+ if distance_matrix is None:
+ if points is None:
+ # Empty Rips construction
+ points=[]
+ distance_matrix = cdist(points,points)
+ self.distance_matrix = distance_matrix
+
+ # TODO: address the error when k is too large
+ if k <= 1:
+ self.weights = [0] * len(distance_matrix)
+ else:
+ dtm = DistanceToMeasure(k, q=q, metric="precomputed")
+ self.weights = dtm.fit_transform(distance_matrix)
+ self.max_filtration = max_filtration
+
diff --git a/src/python/gudhi/hera/__init__.py b/src/python/gudhi/hera/__init__.py
new file mode 100644
index 00000000..f70b92b9
--- /dev/null
+++ b/src/python/gudhi/hera/__init__.py
@@ -0,0 +1,7 @@
+from .wasserstein import wasserstein_distance
+from .bottleneck import bottleneck_distance
+
+
+__author__ = "Marc Glisse"
+__copyright__ = "Copyright (C) 2020 Inria"
+__license__ = "MIT"
diff --git a/src/python/gudhi/hera/bottleneck.cc b/src/python/gudhi/hera/bottleneck.cc
new file mode 100644
index 00000000..0cb562ce
--- /dev/null
+++ b/src/python/gudhi/hera/bottleneck.cc
@@ -0,0 +1,54 @@
+/* This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+ * See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+ * Author(s): Marc Glisse
+ *
+ * Copyright (C) 2020 Inria
+ *
+ * Modification(s):
+ * - YYYY/MM Author: Description of the modification
+ */
+
+#include <pybind11_diagram_utils.h>
+
+#ifdef _MSC_VER
+// https://github.com/grey-narn/hera/issues/3
+// ssize_t is a non-standard type (well, posix)
+using py::ssize_t;
+#endif
+
+#include <bottleneck.h> // Hera
+
+double bottleneck_distance(Dgm d1, Dgm d2, double delta)
+{
+ // I *think* the call to request() has to be before releasing the GIL.
+ auto diag1 = numpy_to_range_of_pairs(d1);
+ auto diag2 = numpy_to_range_of_pairs(d2);
+
+ py::gil_scoped_release release;
+
+ if (delta == 0)
+ return hera::bottleneckDistExact(diag1, diag2);
+ else
+ return hera::bottleneckDistApprox(diag1, diag2, delta);
+}
+
+PYBIND11_MODULE(bottleneck, m) {
+ m.def("bottleneck_distance", &bottleneck_distance,
+ py::arg("X"), py::arg("Y"),
+ py::arg("delta") = .01,
+ R"pbdoc(
+ Compute the Bottleneck distance between two diagrams.
+ Points at infinity are supported.
+
+ .. note::
+ Points on the diagonal are not supported and must be filtered out before calling this function.
+
+ Parameters:
+ X (n x 2 numpy array): First diagram
+ Y (n x 2 numpy array): Second diagram
+ delta (float): Relative error 1+delta
+
+ Returns:
+ float: (approximate) bottleneck distance d_B(X,Y)
+ )pbdoc");
+}
diff --git a/src/python/gudhi/hera.cc b/src/python/gudhi/hera/wasserstein.cc
index ea80a9a8..1a21f02f 100644
--- a/src/python/gudhi/hera.cc
+++ b/src/python/gudhi/hera/wasserstein.cc
@@ -33,7 +33,7 @@ double wasserstein_distance(
return hera::wasserstein_dist(diag1, diag2, params);
}
-PYBIND11_MODULE(hera, m) {
+PYBIND11_MODULE(wasserstein, m) {
m.def("wasserstein_distance", &wasserstein_distance,
py::arg("X"), py::arg("Y"),
py::arg("order") = 1,
diff --git a/src/python/gudhi/periodic_cubical_complex.pyx b/src/python/gudhi/periodic_cubical_complex.pyx
index 06309772..d353d2af 100644
--- a/src/python/gudhi/periodic_cubical_complex.pyx
+++ b/src/python/gudhi/periodic_cubical_complex.pyx
@@ -24,20 +24,20 @@ __license__ = "MIT"
cdef extern from "Cubical_complex_interface.h" namespace "Gudhi":
cdef cppclass Periodic_cubical_complex_base_interface "Gudhi::Cubical_complex::Cubical_complex_interface<Gudhi::cubical_complex::Bitmap_cubical_complex_periodic_boundary_conditions_base<double>>":
- Periodic_cubical_complex_base_interface(vector[unsigned] dimensions, vector[double] top_dimensional_cells, vector[bool] periodic_dimensions)
- Periodic_cubical_complex_base_interface(string perseus_file)
- int num_simplices()
- int dimension()
+ Periodic_cubical_complex_base_interface(vector[unsigned] dimensions, vector[double] top_dimensional_cells, vector[bool] periodic_dimensions) nogil
+ Periodic_cubical_complex_base_interface(string perseus_file) nogil
+ int num_simplices() nogil
+ int dimension() nogil
cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi":
cdef cppclass Periodic_cubical_complex_persistence_interface "Gudhi::Persistent_cohomology_interface<Gudhi::Cubical_complex::Cubical_complex_interface<Gudhi::cubical_complex::Bitmap_cubical_complex_periodic_boundary_conditions_base<double>>>":
- Periodic_cubical_complex_persistence_interface(Periodic_cubical_complex_base_interface * st, bool persistence_dim_max)
- void compute_persistence(int homology_coeff_field, double min_persistence)
- vector[pair[int, pair[double, double]]] get_persistence()
- vector[vector[int]] cofaces_of_cubical_persistence_pairs()
- vector[int] betti_numbers()
- vector[int] persistent_betti_numbers(double from_value, double to_value)
- vector[pair[double,double]] intervals_in_dimension(int dimension)
+ Periodic_cubical_complex_persistence_interface(Periodic_cubical_complex_base_interface * st, bool persistence_dim_max) nogil
+ void compute_persistence(int homology_coeff_field, double min_persistence) nogil
+ vector[pair[int, pair[double, double]]] get_persistence() nogil
+ vector[vector[int]] cofaces_of_cubical_persistence_pairs() nogil
+ vector[int] betti_numbers() nogil
+ vector[int] persistent_betti_numbers(double from_value, double to_value) nogil
+ vector[pair[double,double]] intervals_in_dimension(int dimension) nogil
# PeriodicCubicalComplex python interface
cdef class PeriodicCubicalComplex:
@@ -81,9 +81,7 @@ cdef class PeriodicCubicalComplex:
periodic_dimensions=None, perseus_file=''):
if ((dimensions is not None) and (top_dimensional_cells is not None)
and (periodic_dimensions is not None) and (perseus_file == '')):
- self.thisptr = new Periodic_cubical_complex_base_interface(dimensions,
- top_dimensional_cells,
- periodic_dimensions)
+ self._construct_from_cells(dimensions, top_dimensional_cells, periodic_dimensions)
elif ((dimensions is None) and (top_dimensional_cells is not None)
and (periodic_dimensions is not None) and (perseus_file == '')):
top_dimensional_cells = np.array(top_dimensional_cells,
@@ -91,13 +89,11 @@ cdef class PeriodicCubicalComplex:
order = 'F')
dimensions = top_dimensional_cells.shape
top_dimensional_cells = top_dimensional_cells.ravel(order='F')
- self.thisptr = new Periodic_cubical_complex_base_interface(dimensions,
- top_dimensional_cells,
- periodic_dimensions)
+ self._construct_from_cells(dimensions, top_dimensional_cells, periodic_dimensions)
elif ((dimensions is None) and (top_dimensional_cells is None)
and (periodic_dimensions is None) and (perseus_file != '')):
if os.path.isfile(perseus_file):
- self.thisptr = new Periodic_cubical_complex_base_interface(perseus_file.encode('utf-8'))
+ self._construct_from_file(perseus_file.encode('utf-8'))
else:
print("file " + perseus_file + " not found.", file=sys.stderr)
else:
@@ -106,6 +102,14 @@ cdef class PeriodicCubicalComplex:
"top_dimensional_cells and periodic_dimensions or from "
"a Perseus-style file name.", file=sys.stderr)
+ def _construct_from_cells(self, vector[unsigned] dimensions, vector[double] top_dimensional_cells, vector[bool] periodic_dimensions):
+ with nogil:
+ self.thisptr = new Periodic_cubical_complex_base_interface(dimensions, top_dimensional_cells, periodic_dimensions)
+
+ def _construct_from_file(self, string filename):
+ with nogil:
+ self.thisptr = new Periodic_cubical_complex_base_interface(filename)
+
def __dealloc__(self):
if self.thisptr != NULL:
del self.thisptr
@@ -156,8 +160,11 @@ cdef class PeriodicCubicalComplex:
if self.pcohptr != NULL:
del self.pcohptr
assert self.__is_defined()
- self.pcohptr = new Periodic_cubical_complex_persistence_interface(self.thisptr, True)
- self.pcohptr.compute_persistence(homology_coeff_field, min_persistence)
+ cdef int field = homology_coeff_field
+ cdef double minp = min_persistence
+ with nogil:
+ self.pcohptr = new Periodic_cubical_complex_persistence_interface(self.thisptr, 1)
+ self.pcohptr.compute_persistence(field, minp)
def persistence(self, homology_coeff_field=11, min_persistence=0):
"""This function computes and returns the persistence of the complex.
@@ -204,28 +211,27 @@ cdef class PeriodicCubicalComplex:
The indices of the arrays in the list correspond to the homological dimensions, and the
integers of each row in each array correspond to: (index of positive top-dimensional cell).
"""
+ assert self.pcohptr != NULL, "compute_persistence() must be called before cofaces_of_persistence_pairs()"
cdef vector[vector[int]] persistence_result
- if self.pcohptr != NULL:
- output = [[],[]]
+
+ output = [[],[]]
+ with nogil:
persistence_result = self.pcohptr.cofaces_of_cubical_persistence_pairs()
- pr = np.array(persistence_result)
-
- ess_ind = np.argwhere(pr[:,2] == -1)[:,0]
- ess = pr[ess_ind]
- max_h = max(ess[:,0])+1
- for h in range(max_h):
- hidxs = np.argwhere(ess[:,0] == h)[:,0]
- output[1].append(ess[hidxs][:,1])
-
- reg_ind = np.setdiff1d(np.array(range(len(pr))), ess_ind)
- reg = pr[reg_ind]
- max_h = max(reg[:,0])+1
- for h in range(max_h):
- hidxs = np.argwhere(reg[:,0] == h)[:,0]
- output[0].append(reg[hidxs][:,1:])
- else:
- print("cofaces_of_persistence_pairs function requires persistence function"
- " to be launched first.")
+ pr = np.array(persistence_result)
+
+ ess_ind = np.argwhere(pr[:,2] == -1)[:,0]
+ ess = pr[ess_ind]
+ max_h = max(ess[:,0])+1 if len(ess) > 0 else 0
+ for h in range(max_h):
+ hidxs = np.argwhere(ess[:,0] == h)[:,0]
+ output[1].append(ess[hidxs][:,1])
+
+ reg_ind = np.setdiff1d(np.array(range(len(pr))), ess_ind)
+ reg = pr[reg_ind]
+ max_h = max(reg[:,0])+1 if len(reg) > 0 else 0
+ for h in range(max_h):
+ hidxs = np.argwhere(reg[:,0] == h)[:,0]
+ output[0].append(reg[hidxs][:,1:])
return output
def betti_numbers(self):
diff --git a/src/python/gudhi/persistence_graphical_tools.py b/src/python/gudhi/persistence_graphical_tools.py
index 6a74a6ca..848dc03e 100644
--- a/src/python/gudhi/persistence_graphical_tools.py
+++ b/src/python/gudhi/persistence_graphical_tools.py
@@ -11,6 +11,7 @@
from os import path
from math import isfinite
import numpy as np
+from functools import lru_cache
from gudhi.reader_utils import read_persistence_intervals_in_dimension
from gudhi.reader_utils import read_persistence_intervals_grouped_by_dimension
@@ -19,6 +20,7 @@ __author__ = "Vincent Rouvreau, Bertrand Michel, Theo Lacombe"
__copyright__ = "Copyright (C) 2016 Inria"
__license__ = "MIT"
+_gudhi_matplotlib_use_tex = True
def __min_birth_max_death(persistence, band=0.0):
"""This function returns (min_birth, max_death) from the persistence.
@@ -56,6 +58,17 @@ def _array_handler(a):
else:
return a
+@lru_cache(maxsize=1)
+def _matplotlib_can_use_tex():
+ """This function returns True if matplotlib can deal with LaTeX, False otherwise.
+ The returned value is cached.
+ """
+ try:
+ from matplotlib import checkdep_usetex
+ return checkdep_usetex(True)
+ except ImportError:
+ print("This function is not available, you may be missing matplotlib.")
+
def plot_persistence_barcode(
persistence=[],
@@ -106,8 +119,12 @@ def plot_persistence_barcode(
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib import rc
- plt.rc('text', usetex=True)
- plt.rc('font', family='serif')
+ if _gudhi_matplotlib_use_tex and _matplotlib_can_use_tex():
+ plt.rc('text', usetex=True)
+ plt.rc('font', family='serif')
+ else:
+ plt.rc('text', usetex=False)
+ plt.rc('font', family='DejaVu Sans')
if persistence_file != "":
if path.isfile(persistence_file):
@@ -251,8 +268,12 @@ def plot_persistence_diagram(
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib import rc
- plt.rc('text', usetex=True)
- plt.rc('font', family='serif')
+ if _gudhi_matplotlib_use_tex and _matplotlib_can_use_tex():
+ plt.rc('text', usetex=True)
+ plt.rc('font', family='serif')
+ else:
+ plt.rc('text', usetex=False)
+ plt.rc('font', family='DejaVu Sans')
if persistence_file != "":
if path.isfile(persistence_file):
@@ -423,8 +444,12 @@ def plot_persistence_density(
import matplotlib.patches as mpatches
from scipy.stats import kde
from matplotlib import rc
- plt.rc('text', usetex=True)
- plt.rc('font', family='serif')
+ if _gudhi_matplotlib_use_tex and _matplotlib_can_use_tex():
+ plt.rc('text', usetex=True)
+ plt.rc('font', family='serif')
+ else:
+ plt.rc('text', usetex=False)
+ plt.rc('font', family='DejaVu Sans')
if persistence_file != "":
if dimension is None:
diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py
index 13e16d24..55ac58e6 100644
--- a/src/python/gudhi/point_cloud/dtm.py
+++ b/src/python/gudhi/point_cloud/dtm.py
@@ -8,6 +8,7 @@
# - YYYY/MM Author: Description of the modification
from .knn import KNearestNeighbors
+import numpy as np
__author__ = "Marc Glisse"
__copyright__ = "Copyright (C) 2020 Inria"
@@ -68,3 +69,111 @@ class DistanceToMeasure:
# We compute too many powers, 1/p in knn then q in dtm, 1/q in dtm then q or some log in the caller.
# Add option to skip the final root?
return dtm
+
+
+class DTMDensity:
+ """
+ Density estimator based on the distance to the empirical measure defined by a point set, as defined
+ in :cite:`dtmdensity`. Note that this implementation only renormalizes when asked, and the renormalization
+ only works for a Euclidean metric, so in other cases the total measure may not be 1.
+
+ .. note:: When the dimension is high, using it as an exponent can quickly lead to under- or overflows.
+ We recommend using a small fixed value instead in those cases, even if it won't have the same nice
+ theoretical properties as the dimension.
+ """
+
+ def __init__(self, k=None, weights=None, q=None, dim=None, normalize=False, n_samples=None, **kwargs):
+ """
+ Args:
+ k (int): number of neighbors (possibly including the point itself). Optional if it can be guessed
+ from weights or metric="neighbors".
+ weights (numpy.array): weights of each of the k neighbors, optional. They are supposed to sum to 1.
+ q (float): order used to compute the distance to measure. Defaults to dim.
+ dim (float): final exponent representing the dimension. Defaults to the dimension, and must be specified
+ when the dimension cannot be read from the input (metric is "neighbors" or "precomputed").
+ normalize (bool): normalize the density so it corresponds to a probability measure on ℝᵈ.
+ Only available for the Euclidean metric, defaults to False.
+ n_samples (int): number of sample points used for fitting. Only needed if `normalize` is True and
+ metric is "neighbors".
+ kwargs: same parameters as :class:`~gudhi.point_cloud.knn.KNearestNeighbors`, except that
+ metric="neighbors" means that :func:`transform` expects an array with the distances to
+ the k nearest neighbors.
+ """
+ if weights is None:
+ self.k = k
+ if k is None:
+ assert kwargs.get("metric") == "neighbors", 'Must specify k or weights, unless metric is "neighbors"'
+ self.weights = None
+ else:
+ self.weights = np.full(k, 1.0 / k)
+ else:
+ self.weights = weights
+ self.k = len(weights)
+ assert k is None or k == self.k, "k differs from the length of weights"
+ self.q = q
+ self.dim = dim
+ self.params = kwargs
+ self.normalize = normalize
+ self.n_samples = n_samples
+
+ def fit_transform(self, X, y=None):
+ return self.fit(X).transform(X)
+
+ def fit(self, X, y=None):
+ """
+ Args:
+ X (numpy.array): coordinates for mass points.
+ """
+ if self.params.setdefault("metric", "euclidean") != "neighbors":
+ self.knn = KNearestNeighbors(
+ self.k, return_index=False, return_distance=True, sort_results=False, **self.params
+ )
+ self.knn.fit(X)
+ if self.params["metric"] != "precomputed":
+ self.n_samples = len(X)
+ return self
+
+ def transform(self, X):
+ """
+ Args:
+ X (numpy.array): coordinates for query points, or distance matrix if metric is "precomputed",
+ or distances to the k nearest neighbors if metric is "neighbors" (if the array has more
+ than k columns, the remaining ones are ignored).
+ """
+ q = self.q
+ dim = self.dim
+ if dim is None:
+ assert self.params["metric"] not in {
+ "neighbors",
+ "precomputed",
+ }, "dim not specified and cannot guess the dimension"
+ dim = len(X[0])
+ if q is None:
+ q = dim
+ k = self.k
+ weights = self.weights
+ if self.params["metric"] == "neighbors":
+ distances = np.asarray(X)
+ if weights is None:
+ k = distances.shape[1]
+ weights = np.full(k, 1.0 / k)
+ else:
+ distances = distances[:, :k]
+ else:
+ distances = self.knn.transform(X)
+ distances = distances ** q
+ dtm = (distances * weights).sum(-1)
+ if self.normalize:
+ dtm /= (np.arange(1, k + 1) ** (q / dim) * weights).sum()
+ density = dtm ** (-dim / q)
+ if self.normalize:
+ import math
+
+ if self.params["metric"] == "precomputed":
+ self.n_samples = len(X[0])
+ # Volume of d-ball
+ Vd = math.pi ** (dim / 2) / math.gamma(dim / 2 + 1)
+ density /= self.n_samples * Vd
+ return density
+ # We compute too many powers, 1/p in knn then q in dtm, d/q in dtm then whatever in the caller.
+ # Add option to skip the final root?
diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py
index 86008bc3..994be3b6 100644
--- a/src/python/gudhi/point_cloud/knn.py
+++ b/src/python/gudhi/point_cloud/knn.py
@@ -46,7 +46,7 @@ class KNearestNeighbors:
sort_results (bool): if True, then distances and indices of each point are
sorted on return, so that the first column contains the closest points.
Otherwise, neighbors are returned in an arbitrary order. Defaults to True.
- enable_autodiff (bool): if the input is a torch.tensor, jax.numpy.ndarray or tensorflow.Tensor, this
+ enable_autodiff (bool): if the input is a torch.tensor or tensorflow.Tensor, this
instructs the function to compute distances in a way that works with automatic differentiation.
This is experimental, not supported for all metrics, and requires the package EagerPy.
Defaults to False.
@@ -306,6 +306,10 @@ class KNearestNeighbors:
if self.params["implementation"] == "ckdtree":
qargs = {key: val for key, val in self.params.items() if key in {"p", "eps", "n_jobs"}}
distances, neighbors = self.kdtree.query(X, k=self.k, **qargs)
+ if k == 1:
+ # SciPy decided to squeeze the last dimension for k=1
+ distances = distances[:, None]
+ neighbors = neighbors[:, None]
if self.return_index:
if self.return_distance:
return neighbors, distances
diff --git a/src/python/gudhi/representations/kernel_methods.py b/src/python/gudhi/representations/kernel_methods.py
index 596f4f07..23fd23c7 100644
--- a/src/python/gudhi/representations/kernel_methods.py
+++ b/src/python/gudhi/representations/kernel_methods.py
@@ -10,7 +10,7 @@
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.metrics import pairwise_distances, pairwise_kernels
-from .metrics import SlicedWassersteinDistance, PersistenceFisherDistance, _sklearn_wrapper, pairwise_persistence_diagram_distances, _sliced_wasserstein_distance, _persistence_fisher_distance
+from .metrics import SlicedWassersteinDistance, PersistenceFisherDistance, _sklearn_wrapper, _pairwise, pairwise_persistence_diagram_distances, _sliced_wasserstein_distance, _persistence_fisher_distance
from .preprocessing import Padding
#############################################
@@ -60,7 +60,7 @@ def _persistence_scale_space_kernel(D1, D2, kernel_approx=None, bandwidth=1.):
weight_pss = lambda x: 1 if x[1] >= x[0] else -1
return 0.5 * _persistence_weighted_gaussian_kernel(DD1, DD2, weight=weight_pss, kernel_approx=kernel_approx, bandwidth=bandwidth)
-def pairwise_persistence_diagram_kernels(X, Y=None, kernel="sliced_wasserstein", **kwargs):
+def pairwise_persistence_diagram_kernels(X, Y=None, kernel="sliced_wasserstein", n_jobs=None, **kwargs):
"""
This function computes the kernel matrix between two lists of persistence diagrams given as numpy arrays of shape (nx2).
@@ -68,38 +68,41 @@ def pairwise_persistence_diagram_kernels(X, Y=None, kernel="sliced_wasserstein",
X (list of n numpy arrays of shape (numx2)): first list of persistence diagrams.
Y (list of m numpy arrays of shape (numx2)): second list of persistence diagrams (optional). If None, pairwise kernel values are computed from the first list only.
kernel: kernel to use. It can be either a string ("sliced_wasserstein", "persistence_scale_space", "persistence_weighted_gaussian", "persistence_fisher") or a function taking two numpy arrays of shape (nx2) and (mx2) as inputs. If it is a function, make sure that it is symmetric.
+ n_jobs (int): number of jobs to use for the computation. This uses joblib.Parallel(prefer="threads"), so kernels that do not release the GIL may not scale unless run inside a `joblib.parallel_backend <https://joblib.readthedocs.io/en/latest/parallel.html#joblib.parallel_backend>`_ block.
**kwargs: optional keyword parameters. Any further parameters are passed directly to the kernel function. See the docs of the various kernel classes in this module.
Returns:
numpy array of shape (nxm): kernel matrix.
"""
XX = np.reshape(np.arange(len(X)), [-1,1])
- YY = None if Y is None else np.reshape(np.arange(len(Y)), [-1,1])
+ YY = None if Y is None or Y is X else np.reshape(np.arange(len(Y)), [-1,1])
if kernel == "sliced_wasserstein":
- return np.exp(-pairwise_persistence_diagram_distances(X, Y, metric="sliced_wasserstein", num_directions=kwargs["num_directions"]) / kwargs["bandwidth"])
+ return np.exp(-pairwise_persistence_diagram_distances(X, Y, metric="sliced_wasserstein", num_directions=kwargs["num_directions"], n_jobs=n_jobs) / kwargs["bandwidth"])
elif kernel == "persistence_fisher":
- return np.exp(-pairwise_persistence_diagram_distances(X, Y, metric="persistence_fisher", kernel_approx=kwargs["kernel_approx"], bandwidth=kwargs["bandwidth"]) / kwargs["bandwidth_fisher"])
+ return np.exp(-pairwise_persistence_diagram_distances(X, Y, metric="persistence_fisher", kernel_approx=kwargs["kernel_approx"], bandwidth=kwargs["bandwidth"], n_jobs=n_jobs) / kwargs["bandwidth_fisher"])
elif kernel == "persistence_scale_space":
- return pairwise_kernels(XX, YY, metric=_sklearn_wrapper(_persistence_scale_space_kernel, X, Y, **kwargs))
+ return _pairwise(pairwise_kernels, False, XX, YY, metric=_sklearn_wrapper(_persistence_scale_space_kernel, X, Y, **kwargs), n_jobs=n_jobs)
elif kernel == "persistence_weighted_gaussian":
- return pairwise_kernels(XX, YY, metric=_sklearn_wrapper(_persistence_weighted_gaussian_kernel, X, Y, **kwargs))
+ return _pairwise(pairwise_kernels, False, XX, YY, metric=_sklearn_wrapper(_persistence_weighted_gaussian_kernel, X, Y, **kwargs), n_jobs=n_jobs)
else:
- return pairwise_kernels(XX, YY, metric=_sklearn_wrapper(metric, **kwargs))
+ return _pairwise(pairwise_kernels, False, XX, YY, metric=_sklearn_wrapper(metric, **kwargs), n_jobs=n_jobs)
class SlicedWassersteinKernel(BaseEstimator, TransformerMixin):
"""
This is a class for computing the sliced Wasserstein kernel matrix from a list of persistence diagrams. The sliced Wasserstein kernel is computed by exponentiating the corresponding sliced Wasserstein distance with a Gaussian kernel. See http://proceedings.mlr.press/v70/carriere17a.html for more details.
"""
- def __init__(self, num_directions=10, bandwidth=1.0):
+ def __init__(self, num_directions=10, bandwidth=1.0, n_jobs=None):
"""
Constructor for the SlicedWassersteinKernel class.
Parameters:
bandwidth (double): bandwidth of the Gaussian kernel applied to the sliced Wasserstein distance (default 1.).
num_directions (int): number of lines evenly sampled from [-pi/2,pi/2] in order to approximate and speed up the kernel computation (default 10).
+ n_jobs (int): number of jobs to use for the computation. See :func:`pairwise_persistence_diagram_kernels` for details.
"""
self.bandwidth = bandwidth
self.num_directions = num_directions
+ self.n_jobs = n_jobs
def fit(self, X, y=None):
"""
@@ -122,7 +125,7 @@ class SlicedWassersteinKernel(BaseEstimator, TransformerMixin):
Returns:
numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise sliced Wasserstein kernel values.
"""
- return pairwise_persistence_diagram_kernels(X, self.diagrams_, kernel="sliced_wasserstein", bandwidth=self.bandwidth, num_directions=self.num_directions)
+ return pairwise_persistence_diagram_kernels(X, self.diagrams_, kernel="sliced_wasserstein", bandwidth=self.bandwidth, num_directions=self.num_directions, n_jobs=self.n_jobs)
def __call__(self, diag1, diag2):
"""
@@ -141,7 +144,7 @@ class PersistenceWeightedGaussianKernel(BaseEstimator, TransformerMixin):
"""
This is a class for computing the persistence weighted Gaussian kernel matrix from a list of persistence diagrams. The persistence weighted Gaussian kernel is computed by convolving the persistence diagram points with weighted Gaussian kernels. See http://proceedings.mlr.press/v48/kusano16.html for more details.
"""
- def __init__(self, bandwidth=1., weight=lambda x: 1, kernel_approx=None):
+ def __init__(self, bandwidth=1., weight=lambda x: 1, kernel_approx=None, n_jobs=None):
"""
Constructor for the PersistenceWeightedGaussianKernel class.
@@ -149,9 +152,11 @@ class PersistenceWeightedGaussianKernel(BaseEstimator, TransformerMixin):
bandwidth (double): bandwidth of the Gaussian kernel with which persistence diagrams will be convolved (default 1.)
weight (function): weight function for the persistence diagram points (default constant function, ie lambda x: 1). This function must be defined on 2D points, ie lists or numpy arrays of the form [p_x,p_y].
kernel_approx (class): kernel approximation class used to speed up computation (default None). Common kernel approximations classes can be found in the scikit-learn library (such as RBFSampler for instance).
+ n_jobs (int): number of jobs to use for the computation. See :func:`pairwise_persistence_diagram_kernels` for details.
"""
self.bandwidth, self.weight = bandwidth, weight
self.kernel_approx = kernel_approx
+ self.n_jobs = n_jobs
def fit(self, X, y=None):
"""
@@ -174,7 +179,7 @@ class PersistenceWeightedGaussianKernel(BaseEstimator, TransformerMixin):
Returns:
numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise persistence weighted Gaussian kernel values.
"""
- return pairwise_persistence_diagram_kernels(X, self.diagrams_, kernel="persistence_weighted_gaussian", bandwidth=self.bandwidth, weight=self.weight, kernel_approx=self.kernel_approx)
+ return pairwise_persistence_diagram_kernels(X, self.diagrams_, kernel="persistence_weighted_gaussian", bandwidth=self.bandwidth, weight=self.weight, kernel_approx=self.kernel_approx, n_jobs=self.n_jobs)
def __call__(self, diag1, diag2):
"""
@@ -193,15 +198,17 @@ class PersistenceScaleSpaceKernel(BaseEstimator, TransformerMixin):
"""
This is a class for computing the persistence scale space kernel matrix from a list of persistence diagrams. The persistence scale space kernel is computed by adding the symmetric to the diagonal of each point in each persistence diagram, with negative weight, and then convolving the points with a Gaussian kernel. See https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Reininghaus_A_Stable_Multi-Scale_2015_CVPR_paper.pdf for more details.
"""
- def __init__(self, bandwidth=1., kernel_approx=None):
+ def __init__(self, bandwidth=1., kernel_approx=None, n_jobs=None):
"""
Constructor for the PersistenceScaleSpaceKernel class.
Parameters:
bandwidth (double): bandwidth of the Gaussian kernel with which persistence diagrams will be convolved (default 1.)
kernel_approx (class): kernel approximation class used to speed up computation (default None). Common kernel approximations classes can be found in the scikit-learn library (such as RBFSampler for instance).
+ n_jobs (int): number of jobs to use for the computation. See :func:`pairwise_persistence_diagram_kernels` for details.
"""
self.bandwidth, self.kernel_approx = bandwidth, kernel_approx
+ self.n_jobs = n_jobs
def fit(self, X, y=None):
"""
@@ -224,7 +231,7 @@ class PersistenceScaleSpaceKernel(BaseEstimator, TransformerMixin):
Returns:
numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise persistence scale space kernel values.
"""
- return pairwise_persistence_diagram_kernels(X, self.diagrams_, kernel="persistence_scale_space", bandwidth=self.bandwidth, kernel_approx=self.kernel_approx)
+ return pairwise_persistence_diagram_kernels(X, self.diagrams_, kernel="persistence_scale_space", bandwidth=self.bandwidth, kernel_approx=self.kernel_approx, n_jobs=self.n_jobs)
def __call__(self, diag1, diag2):
"""
@@ -243,7 +250,7 @@ class PersistenceFisherKernel(BaseEstimator, TransformerMixin):
"""
This is a class for computing the persistence Fisher kernel matrix from a list of persistence diagrams. The persistence Fisher kernel is computed by exponentiating the corresponding persistence Fisher distance with a Gaussian kernel. See papers.nips.cc/paper/8205-persistence-fisher-kernel-a-riemannian-manifold-kernel-for-persistence-diagrams for more details.
"""
- def __init__(self, bandwidth_fisher=1., bandwidth=1., kernel_approx=None):
+ def __init__(self, bandwidth_fisher=1., bandwidth=1., kernel_approx=None, n_jobs=None):
"""
Constructor for the PersistenceFisherKernel class.
@@ -251,9 +258,11 @@ class PersistenceFisherKernel(BaseEstimator, TransformerMixin):
bandwidth (double): bandwidth of the Gaussian kernel applied to the persistence Fisher distance (default 1.).
bandwidth_fisher (double): bandwidth of the Gaussian kernel used to turn persistence diagrams into probability distributions by PersistenceFisherDistance class (default 1.).
kernel_approx (class): kernel approximation class used to speed up computation (default None). Common kernel approximations classes can be found in the scikit-learn library (such as RBFSampler for instance).
+ n_jobs (int): number of jobs to use for the computation. See :func:`pairwise_persistence_diagram_kernels` for details.
"""
self.bandwidth = bandwidth
self.bandwidth_fisher, self.kernel_approx = bandwidth_fisher, kernel_approx
+ self.n_jobs = n_jobs
def fit(self, X, y=None):
"""
@@ -276,7 +285,7 @@ class PersistenceFisherKernel(BaseEstimator, TransformerMixin):
Returns:
numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise persistence Fisher kernel values.
"""
- return pairwise_persistence_diagram_kernels(X, self.diagrams_, kernel="persistence_fisher", bandwidth=self.bandwidth, bandwidth_fisher=self.bandwidth_fisher, kernel_approx=self.kernel_approx)
+ return pairwise_persistence_diagram_kernels(X, self.diagrams_, kernel="persistence_fisher", bandwidth=self.bandwidth, bandwidth_fisher=self.bandwidth_fisher, kernel_approx=self.kernel_approx, n_jobs=self.n_jobs)
def __call__(self, diag1, diag2):
"""
diff --git a/src/python/gudhi/representations/metrics.py b/src/python/gudhi/representations/metrics.py
index 8a32f7e9..142ddef1 100644
--- a/src/python/gudhi/representations/metrics.py
+++ b/src/python/gudhi/representations/metrics.py
@@ -12,6 +12,7 @@ from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.metrics import pairwise_distances
from gudhi.hera import wasserstein_distance as hera_wasserstein_distance
from .preprocessing import Padding
+from joblib import Parallel, delayed
#############################################
# Metrics ###################################
@@ -116,6 +117,20 @@ def _persistence_fisher_distance(D1, D2, kernel_approx=None, bandwidth=1.):
vectorj = vectorj/vectorj_sum
return np.arccos( min(np.dot(np.sqrt(vectori), np.sqrt(vectorj)), 1.) )
+def _pairwise(fallback, skipdiag, X, Y, metric, n_jobs):
+ if Y is not None:
+ return fallback(X, Y, metric=metric, n_jobs=n_jobs)
+ triu = np.triu_indices(len(X), k=skipdiag)
+ tril = (triu[1], triu[0])
+ par = Parallel(n_jobs=n_jobs, prefer="threads")
+ d = par(delayed(metric)([triu[0][i]], [triu[1][i]]) for i in range(len(triu[0])))
+ m = np.empty((len(X), len(X)))
+ m[triu] = d
+ m[tril] = d
+ if skipdiag:
+ np.fill_diagonal(m, 0)
+ return m
+
def _sklearn_wrapper(metric, X, Y, **kwargs):
"""
This function is a wrapper for any metric between two persistence diagrams that takes two numpy arrays of shapes (nx2) and (mx2) as arguments.
@@ -134,7 +149,7 @@ PAIRWISE_DISTANCE_FUNCTIONS = {
"persistence_fisher": _persistence_fisher_distance,
}
-def pairwise_persistence_diagram_distances(X, Y=None, metric="bottleneck", **kwargs):
+def pairwise_persistence_diagram_distances(X, Y=None, metric="bottleneck", n_jobs=None, **kwargs):
"""
This function computes the distance matrix between two lists of persistence diagrams given as numpy arrays of shape (nx2).
@@ -142,48 +157,51 @@ def pairwise_persistence_diagram_distances(X, Y=None, metric="bottleneck", **kwa
X (list of n numpy arrays of shape (numx2)): first list of persistence diagrams.
Y (list of m numpy arrays of shape (numx2)): second list of persistence diagrams (optional). If None, pairwise distances are computed from the first list only.
metric: distance to use. It can be either a string ("sliced_wasserstein", "wasserstein", "hera_wasserstein" (Wasserstein distance computed with Hera---note that Hera is also used for the default option "wasserstein"), "pot_wasserstein" (Wasserstein distance computed with POT), "bottleneck", "persistence_fisher") or a function taking two numpy arrays of shape (nx2) and (mx2) as inputs. If it is a function, make sure that it is symmetric and that it outputs 0 if called on the same two arrays.
+ n_jobs (int): number of jobs to use for the computation. This uses joblib.Parallel(prefer="threads"), so metrics that do not release the GIL may not scale unless run inside a `joblib.parallel_backend <https://joblib.readthedocs.io/en/latest/parallel.html#joblib.parallel_backend>`_ block.
**kwargs: optional keyword parameters. Any further parameters are passed directly to the distance function. See the docs of the various distance classes in this module.
Returns:
numpy array of shape (nxm): distance matrix
"""
XX = np.reshape(np.arange(len(X)), [-1,1])
- YY = None if Y is None else np.reshape(np.arange(len(Y)), [-1,1])
+ YY = None if Y is None or Y is X else np.reshape(np.arange(len(Y)), [-1,1])
if metric == "bottleneck":
try:
from .. import bottleneck_distance
- return pairwise_distances(XX, YY, metric=_sklearn_wrapper(bottleneck_distance, X, Y, **kwargs))
+ return _pairwise(pairwise_distances, True, XX, YY, metric=_sklearn_wrapper(bottleneck_distance, X, Y, **kwargs), n_jobs=n_jobs)
except ImportError:
print("Gudhi built without CGAL")
raise
elif metric == "pot_wasserstein":
try:
from gudhi.wasserstein import wasserstein_distance as pot_wasserstein_distance
- return pairwise_distances(XX, YY, metric=_sklearn_wrapper(pot_wasserstein_distance, X, Y, **kwargs))
+ return _pairwise(pairwise_distances, True, XX, YY, metric=_sklearn_wrapper(pot_wasserstein_distance, X, Y, **kwargs), n_jobs=n_jobs)
except ImportError:
print("POT (Python Optimal Transport) is not installed. Please install POT or use metric='wasserstein' or metric='hera_wasserstein'")
raise
elif metric == "sliced_wasserstein":
Xproj = _compute_persistence_diagram_projections(X, **kwargs)
Yproj = None if Y is None else _compute_persistence_diagram_projections(Y, **kwargs)
- return pairwise_distances(XX, YY, metric=_sklearn_wrapper(_sliced_wasserstein_distance_on_projections, Xproj, Yproj))
+ return _pairwise(pairwise_distances, True, XX, YY, metric=_sklearn_wrapper(_sliced_wasserstein_distance_on_projections, Xproj, Yproj), n_jobs=n_jobs)
elif type(metric) == str:
- return pairwise_distances(XX, YY, metric=_sklearn_wrapper(PAIRWISE_DISTANCE_FUNCTIONS[metric], X, Y, **kwargs))
+ return _pairwise(pairwise_distances, True, XX, YY, metric=_sklearn_wrapper(PAIRWISE_DISTANCE_FUNCTIONS[metric], X, Y, **kwargs), n_jobs=n_jobs)
else:
- return pairwise_distances(XX, YY, metric=_sklearn_wrapper(metric, X, Y, **kwargs))
+ return _pairwise(pairwise_distances, True, XX, YY, metric=_sklearn_wrapper(metric, X, Y, **kwargs), n_jobs=n_jobs)
class SlicedWassersteinDistance(BaseEstimator, TransformerMixin):
"""
This is a class for computing the sliced Wasserstein distance matrix from a list of persistence diagrams. The Sliced Wasserstein distance is computed by projecting the persistence diagrams onto lines, comparing the projections with the 1-norm, and finally integrating over all possible lines. See http://proceedings.mlr.press/v70/carriere17a.html for more details.
"""
- def __init__(self, num_directions=10):
+ def __init__(self, num_directions=10, n_jobs=None):
"""
Constructor for the SlicedWassersteinDistance class.
Parameters:
num_directions (int): number of lines evenly sampled from [-pi/2,pi/2] in order to approximate and speed up the distance computation (default 10).
+ n_jobs (int): number of jobs to use for the computation. See :func:`pairwise_persistence_diagram_distances` for details.
"""
self.num_directions = num_directions
+ self.n_jobs = n_jobs
def fit(self, X, y=None):
"""
@@ -206,7 +224,7 @@ class SlicedWassersteinDistance(BaseEstimator, TransformerMixin):
Returns:
numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise sliced Wasserstein distances.
"""
- return pairwise_persistence_diagram_distances(X, self.diagrams_, metric="sliced_wasserstein", num_directions=self.num_directions)
+ return pairwise_persistence_diagram_distances(X, self.diagrams_, metric="sliced_wasserstein", num_directions=self.num_directions, n_jobs=self.n_jobs)
def __call__(self, diag1, diag2):
"""
@@ -227,14 +245,16 @@ class BottleneckDistance(BaseEstimator, TransformerMixin):
:Requires: `CGAL <installation.html#cgal>`_ :math:`\geq` 4.11.0
"""
- def __init__(self, epsilon=None):
+ def __init__(self, epsilon=None, n_jobs=None):
"""
Constructor for the BottleneckDistance class.
Parameters:
epsilon (double): absolute (additive) error tolerated on the distance (default is the smallest positive float), see :func:`gudhi.bottleneck_distance`.
+ n_jobs (int): number of jobs to use for the computation. See :func:`pairwise_persistence_diagram_distances` for details.
"""
self.epsilon = epsilon
+ self.n_jobs = n_jobs
def fit(self, X, y=None):
"""
@@ -257,7 +277,7 @@ class BottleneckDistance(BaseEstimator, TransformerMixin):
Returns:
numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise bottleneck distances.
"""
- Xfit = pairwise_persistence_diagram_distances(X, self.diagrams_, metric="bottleneck", e=self.epsilon)
+ Xfit = pairwise_persistence_diagram_distances(X, self.diagrams_, metric="bottleneck", e=self.epsilon, n_jobs=self.n_jobs)
return Xfit
def __call__(self, diag1, diag2):
@@ -282,15 +302,17 @@ class PersistenceFisherDistance(BaseEstimator, TransformerMixin):
"""
This is a class for computing the persistence Fisher distance matrix from a list of persistence diagrams. The persistence Fisher distance is obtained by computing the original Fisher distance between the probability distributions associated to the persistence diagrams given by convolving them with a Gaussian kernel. See http://papers.nips.cc/paper/8205-persistence-fisher-kernel-a-riemannian-manifold-kernel-for-persistence-diagrams for more details.
"""
- def __init__(self, bandwidth=1., kernel_approx=None):
+ def __init__(self, bandwidth=1., kernel_approx=None, n_jobs=None):
"""
Constructor for the PersistenceFisherDistance class.
Parameters:
bandwidth (double): bandwidth of the Gaussian kernel used to turn persistence diagrams into probability distributions (default 1.).
kernel_approx (class): kernel approximation class used to speed up computation (default None). Common kernel approximations classes can be found in the scikit-learn library (such as RBFSampler for instance).
+ n_jobs (int): number of jobs to use for the computation. See :func:`pairwise_persistence_diagram_distances` for details.
"""
self.bandwidth, self.kernel_approx = bandwidth, kernel_approx
+ self.n_jobs = n_jobs
def fit(self, X, y=None):
"""
@@ -313,7 +335,7 @@ class PersistenceFisherDistance(BaseEstimator, TransformerMixin):
Returns:
numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise persistence Fisher distances.
"""
- return pairwise_persistence_diagram_distances(X, self.diagrams_, metric="persistence_fisher", bandwidth=self.bandwidth, kernel_approx=self.kernel_approx)
+ return pairwise_persistence_diagram_distances(X, self.diagrams_, metric="persistence_fisher", bandwidth=self.bandwidth, kernel_approx=self.kernel_approx, n_jobs=self.n_jobs)
def __call__(self, diag1, diag2):
"""
@@ -328,23 +350,32 @@ class PersistenceFisherDistance(BaseEstimator, TransformerMixin):
"""
return _persistence_fisher_distance(diag1, diag2, bandwidth=self.bandwidth, kernel_approx=self.kernel_approx)
+
class WassersteinDistance(BaseEstimator, TransformerMixin):
"""
This is a class for computing the Wasserstein distance matrix from a list of persistence diagrams.
"""
- def __init__(self, order=2, internal_p=2, mode="pot", delta=0.01):
+
+ def __init__(self, order=1, internal_p=np.inf, mode="hera", delta=0.01, n_jobs=None):
"""
Constructor for the WassersteinDistance class.
Parameters:
- order (int): exponent for Wasserstein, default value is 2., see :func:`gudhi.wasserstein.wasserstein_distance`.
- internal_p (int): ground metric on the (upper-half) plane (i.e. norm l_p in R^2), default value is 2 (euclidean norm), see :func:`gudhi.wasserstein.wasserstein_distance`.
- mode (str): method for computing Wasserstein distance. Either "pot" or "hera".
+ order (int): exponent for Wasserstein, default value is 1., see :func:`gudhi.wasserstein.wasserstein_distance`.
+ internal_p (int): ground metric on the (upper-half) plane (i.e. norm l_p in R^2), default value is `np.inf`, see :func:`gudhi.wasserstein.wasserstein_distance`.
+ mode (str): method for computing Wasserstein distance. Either "pot" or "hera". Default set to "hera".
delta (float): relative error 1+delta. Used only if mode == "hera".
+ n_jobs (int): number of jobs to use for the computation. See :func:`pairwise_persistence_diagram_distances` for details.
"""
self.order, self.internal_p, self.mode = order, internal_p, mode
- self.metric = "pot_wasserstein" if mode == "pot" else "hera_wasserstein"
+ if mode == "pot":
+ self.metric = "pot_wasserstein"
+ elif mode == "hera":
+ self.metric = "hera_wasserstein"
+ else:
+ raise NameError("Unknown mode. Current available values for mode are 'hera' and 'pot'")
self.delta = delta
+ self.n_jobs = n_jobs
def fit(self, X, y=None):
"""
@@ -368,9 +399,9 @@ class WassersteinDistance(BaseEstimator, TransformerMixin):
numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise Wasserstein distances.
"""
if self.metric == "hera_wasserstein":
- Xfit = pairwise_persistence_diagram_distances(X, self.diagrams_, metric=self.metric, order=self.order, internal_p=self.internal_p, delta=self.delta)
+ Xfit = pairwise_persistence_diagram_distances(X, self.diagrams_, metric=self.metric, order=self.order, internal_p=self.internal_p, delta=self.delta, n_jobs=self.n_jobs)
else:
- Xfit = pairwise_persistence_diagram_distances(X, self.diagrams_, metric=self.metric, order=self.order, internal_p=self.internal_p, matching=False)
+ Xfit = pairwise_persistence_diagram_distances(X, self.diagrams_, metric=self.metric, order=self.order, internal_p=self.internal_p, matching=False, n_jobs=self.n_jobs)
return Xfit
def __call__(self, diag1, diag2):
diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 46fee086..5ca127f6 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -1,16 +1,17 @@
# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
-# Author(s): Mathieu Carrière
+# Author(s): Mathieu Carrière, Martin Royer
#
-# Copyright (C) 2018-2019 Inria
+# Copyright (C) 2018-2020 Inria
#
# Modification(s):
-# - YYYY/MM Author: Description of the modification
+# - 2020/06 Martin: ATOL integration
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler
from sklearn.neighbors import DistanceMetric
+from sklearn.metrics import pairwise
from .preprocessing import DiagramScaler, BirthPersistenceTransform
@@ -574,3 +575,140 @@ class ComplexPolynomial(BaseEstimator, TransformerMixin):
numpy array with shape (**threshold**): output complex vector of coefficients.
"""
return self.fit_transform([diag])[0,:]
+
+def _lapl_contrast(measure, centers, inertias):
+ """contrast function for vectorising `measure` in ATOL"""
+ return np.exp(-pairwise.pairwise_distances(measure, Y=centers) / inertias)
+
+def _gaus_contrast(measure, centers, inertias):
+ """contrast function for vectorising `measure` in ATOL"""
+ return np.exp(-pairwise.pairwise_distances(measure, Y=centers, squared=True) / inertias**2)
+
+def _indicator_contrast(diags, centers, inertias):
+ """contrast function for vectorising `measure` in ATOL"""
+ robe_curve = np.clip(2-pairwise.pairwise_distances(diags, Y=centers)/inertias, 0, 1)
+ return robe_curve
+
+def _cloud_weighting(measure):
+ """automatic uniform weighting with mass 1 for `measure` in ATOL"""
+ return np.ones(shape=measure.shape[0])
+
+def _iidproba_weighting(measure):
+ """automatic uniform weighting with mass 1/N for `measure` in ATOL"""
+ return np.ones(shape=measure.shape[0]) / measure.shape[0]
+
+class Atol(BaseEstimator, TransformerMixin):
+ """
+ This class allows to vectorise measures (e.g. point clouds, persistence diagrams, etc) after a quantisation step.
+
+ ATOL paper: :cite:`royer2019atol`
+
+ Example
+ --------
+ >>> from sklearn.cluster import KMeans
+ >>> from gudhi.representations.vector_methods import Atol
+ >>> import numpy as np
+ >>> a = np.array([[1, 2, 4], [1, 4, 0], [1, 0, 4]])
+ >>> b = np.array([[4, 2, 0], [4, 4, 0], [4, 0, 2]])
+ >>> c = np.array([[3, 2, -1], [1, 2, -1]])
+ >>> atol_vectoriser = Atol(quantiser=KMeans(n_clusters=2, random_state=202006))
+ >>> atol_vectoriser.fit(X=[a, b, c]).centers
+ array([[ 2. , 0.66666667, 3.33333333],
+ [ 2.6 , 2.8 , -0.4 ]])
+ >>> atol_vectoriser(a)
+ array([1.18168665, 0.42375966])
+ >>> atol_vectoriser(c)
+ array([0.02062512, 1.25157463])
+ >>> atol_vectoriser.transform(X=[a, b, c])
+ array([[1.18168665, 0.42375966],
+ [0.29861028, 1.06330156],
+ [0.02062512, 1.25157463]])
+ """
+ def __init__(self, quantiser, weighting_method="cloud", contrast="gaussian"):
+ """
+ Constructor for the Atol measure vectorisation class.
+
+ Parameters:
+ quantiser (Object): Object with `fit` (sklearn API consistent) and `cluster_centers` and `n_clusters`
+ attributes, e.g. sklearn.cluster.KMeans. It will be fitted when the Atol object function `fit` is called.
+ weighting_method (string): constant generic function for weighting the measure points
+ choose from {"cloud", "iidproba"}
+ (default: constant function, i.e. the measure is seen as a point cloud by default).
+ This will have no impact if weights are provided along with measures all the way: `fit` and `transform`.
+ contrast (string): constant function for evaluating proximity of a measure with respect to centers
+ choose from {"gaussian", "laplacian", "indicator"}
+ (default: gaussian contrast function, see page 3 in the ATOL paper).
+ """
+ self.quantiser = quantiser
+ self.contrast = {
+ "gaussian": _gaus_contrast,
+ "laplacian": _lapl_contrast,
+ "indicator": _indicator_contrast,
+ }.get(contrast, _gaus_contrast)
+ self.weighting_method = {
+ "cloud" : _cloud_weighting,
+ "iidproba": _iidproba_weighting,
+ }.get(weighting_method, _cloud_weighting)
+
+ def fit(self, X, y=None, sample_weight=None):
+ """
+ Calibration step: fit centers to the sample measures and derive inertias between centers.
+
+ Parameters:
+ X (list N x d numpy arrays): input measures in R^d from which to learn center locations and inertias
+ (measures can have different N).
+ y: Ignored, present for API consistency by convention.
+ sample_weight (list of numpy arrays): weights for each measure point in X, optional.
+ If None, the object's weighting_method will be used.
+
+ Returns:
+ self
+ """
+ if not hasattr(self.quantiser, 'fit'):
+ raise TypeError("quantiser %s has no `fit` attribute." % (self.quantiser))
+ if sample_weight is None:
+ sample_weight = np.concatenate([self.weighting_method(measure) for measure in X])
+
+ measures_concat = np.concatenate(X)
+ self.quantiser.fit(X=measures_concat, sample_weight=sample_weight)
+ self.centers = self.quantiser.cluster_centers_
+ if self.quantiser.n_clusters == 1:
+ dist_centers = pairwise.pairwise_distances(measures_concat)
+ np.fill_diagonal(dist_centers, 0)
+ self.inertias = np.array([np.max(dist_centers)/2])
+ else:
+ dist_centers = pairwise.pairwise_distances(self.centers)
+ dist_centers[dist_centers == 0] = np.inf
+ self.inertias = np.min(dist_centers, axis=0)/2
+ return self
+
+ def __call__(self, measure, sample_weight=None):
+ """
+ Apply measure vectorisation on a single measure.
+
+ Parameters:
+ measure (n x d numpy array): input measure in R^d.
+
+ Returns:
+ numpy array in R^self.quantiser.n_clusters.
+ """
+ if sample_weight is None:
+ sample_weight = self.weighting_method(measure)
+ return np.sum(sample_weight * self.contrast(measure, self.centers, self.inertias.T).T, axis=1)
+
+ def transform(self, X, sample_weight=None):
+ """
+ Apply measure vectorisation on a list of measures.
+
+ Parameters:
+ X (list N x d numpy arrays): input measures in R^d from which to learn center locations and inertias
+ (measures can have different N).
+ sample_weight (list of numpy arrays): weights for each measure point in X, optional.
+ If None, the object's weighting_method will be used.
+
+ Returns:
+ numpy array with shape (number of measures) x (self.quantiser.n_clusters).
+ """
+ if sample_weight is None:
+ sample_weight = [self.weighting_method(measure) for measure in X]
+ return np.stack([self(measure, sample_weight=weight) for measure, weight in zip(X, sample_weight)])
diff --git a/src/python/gudhi/simplex_tree.pxd b/src/python/gudhi/simplex_tree.pxd
index e748ac40..75e94e0b 100644
--- a/src/python/gudhi/simplex_tree.pxd
+++ b/src/python/gudhi/simplex_tree.pxd
@@ -57,6 +57,7 @@ cdef extern from "Simplex_tree_interface.h" namespace "Gudhi":
bool make_filtration_non_decreasing() nogil
void compute_extended_filtration() nogil
vector[vector[pair[int, pair[double, double]]]] compute_extended_persistence_subdiagrams(vector[pair[int, pair[double, double]]] dgm, double min_persistence) nogil
+ Simplex_tree_interface_full_featured* collapse_edges(int nb_collapse_iteration) nogil
# Iterators over Simplex tree
pair[vector[int], double] get_simplex_and_filtration(Simplex_tree_simplex_handle f_simplex) nogil
Simplex_tree_simplices_iterator get_simplices_iterator_begin() nogil
diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx
index 9cb24221..dfb1d985 100644
--- a/src/python/gudhi/simplex_tree.pyx
+++ b/src/python/gudhi/simplex_tree.pyx
@@ -69,7 +69,7 @@ cdef class SimplexTree:
this simplicial complex, or +infinity if it is not in the complex.
:param simplex: The N-simplex, represented by a list of vertex.
- :type simplex: list of int.
+ :type simplex: list of int
:returns: The simplicial complex filtration value.
:rtype: float
"""
@@ -80,7 +80,7 @@ cdef class SimplexTree:
given N-simplex.
:param simplex: The N-simplex, represented by a list of vertex.
- :type simplex: list of int.
+ :type simplex: list of int
:param filtration: The new filtration value.
:type filtration: float
@@ -153,7 +153,7 @@ cdef class SimplexTree:
"""This function sets the dimension of the simplicial complex.
:param dimension: The new dimension value.
- :type dimension: int.
+ :type dimension: int
.. note::
@@ -172,7 +172,7 @@ cdef class SimplexTree:
complex or not.
:param simplex: The N-simplex to find, represented by a list of vertex.
- :type simplex: list of int.
+ :type simplex: list of int
:returns: true if the simplex was found, false otherwise.
:rtype: bool
"""
@@ -186,9 +186,9 @@ cdef class SimplexTree:
:param simplex: The N-simplex to insert, represented by a list of
vertex.
- :type simplex: list of int.
+ :type simplex: list of int
:param filtration: The filtration value of the simplex.
- :type filtration: float.
+ :type filtration: float
:returns: true if the simplex was not yet in the complex, false
otherwise (whatever its original filtration value).
:rtype: bool
@@ -225,13 +225,12 @@ cdef class SimplexTree:
preincrement(it)
def get_skeleton(self, dimension):
- """This function returns the (simplices of the) skeleton of a maximum
- given dimension.
+ """This function returns a generator with the (simplices of the) skeleton of a maximum given dimension.
:param dimension: The skeleton dimension value.
- :type dimension: int.
+ :type dimension: int
:returns: The (simplices of the) skeleton of a maximum dimension.
- :rtype: list of tuples(simplex, filtration)
+ :rtype: generator with tuples(simplex, filtration)
"""
cdef Simplex_tree_skeleton_iterator it = self.get_ptr().get_skeleton_iterator_begin(dimension)
cdef Simplex_tree_skeleton_iterator end = self.get_ptr().get_skeleton_iterator_end(dimension)
@@ -244,7 +243,7 @@ cdef class SimplexTree:
"""This function returns the star of a given N-simplex.
:param simplex: The N-simplex, represented by a list of vertex.
- :type simplex: list of int.
+ :type simplex: list of int
:returns: The (simplices of the) star of a simplex.
:rtype: list of tuples(simplex, filtration)
"""
@@ -266,10 +265,10 @@ cdef class SimplexTree:
given codimension.
:param simplex: The N-simplex, represented by a list of vertex.
- :type simplex: list of int.
+ :type simplex: list of int
:param codimension: The codimension. If codimension = 0, all cofaces
are returned (equivalent of get_star function)
- :type codimension: int.
+ :type codimension: int
:returns: The (simplices of the) cofaces of a simplex
:rtype: list of tuples(simplex, filtration)
"""
@@ -291,7 +290,7 @@ cdef class SimplexTree:
complex.
:param simplex: The N-simplex, represented by a list of vertex.
- :type simplex: list of int.
+ :type simplex: list of int
.. note::
@@ -309,7 +308,7 @@ cdef class SimplexTree:
"""Prune above filtration value given as parameter.
:param filtration: Maximum threshold value.
- :type filtration: float.
+ :type filtration: float
:returns: The filtration modification information.
:rtype: bool
@@ -343,7 +342,7 @@ cdef class SimplexTree:
1 when calling the method.
:param max_dim: The maximal dimension.
- :type max_dim: int.
+ :type max_dim: int
"""
cdef int maxdim = max_dim
with nogil:
@@ -384,12 +383,12 @@ cdef class SimplexTree:
:param homology_coeff_field: The homology coefficient field. Must be a
prime number. Default value is 11.
- :type homology_coeff_field: int.
+ :type homology_coeff_field: int
:param min_persistence: The minimum persistence value (i.e., the absolute value of the difference between the persistence diagram point coordinates) to take into
account (strictly greater than min_persistence). Default value is
0.0.
Sets min_persistence to -1.0 to see all values.
- :type min_persistence: float.
+ :type min_persistence: float
:returns: A list of four persistence diagrams in the format described in :func:`persistence`. The first one is Ordinary, the second one is Relative, the third one is Extended+ and the fourth one is Extended-. See https://link.springer.com/article/10.1007/s10208-008-9027-z and/or section 2.2 in https://link.springer.com/article/10.1007/s10208-017-9370-z for a description of these subtypes.
.. note::
@@ -416,12 +415,12 @@ cdef class SimplexTree:
:param homology_coeff_field: The homology coefficient field. Must be a
prime number. Default value is 11.
- :type homology_coeff_field: int.
+ :type homology_coeff_field: int
:param min_persistence: The minimum persistence value to take into
account (strictly greater than min_persistence). Default value is
0.0.
Set min_persistence to -1.0 to see all values.
- :type min_persistence: float.
+ :type min_persistence: float
:param persistence_dim_max: If true, the persistent homology for the
maximal dimension in the complex is computed. If false, it is
ignored. Default is false.
@@ -439,12 +438,12 @@ cdef class SimplexTree:
:param homology_coeff_field: The homology coefficient field. Must be a
prime number. Default value is 11.
- :type homology_coeff_field: int.
+ :type homology_coeff_field: int
:param min_persistence: The minimum persistence value to take into
account (strictly greater than min_persistence). Default value is
0.0.
Sets min_persistence to -1.0 to see all values.
- :type min_persistence: float.
+ :type min_persistence: float
:param persistence_dim_max: If true, the persistent homology for the
maximal dimension in the complex is computed. If false, it is
ignored. Default is false.
@@ -479,10 +478,10 @@ cdef class SimplexTree:
:param from_value: The persistence birth limit to be added in the
numbers (persistent birth <= from_value).
- :type from_value: float.
+ :type from_value: float
:param to_value: The persistence death limit to be added in the
numbers (persistent death > to_value).
- :type to_value: float.
+ :type to_value: float
:returns: The persistent Betti numbers ([B0, B1, ..., Bn]).
:rtype: list of int
@@ -499,7 +498,7 @@ cdef class SimplexTree:
complex in a specific dimension.
:param dimension: The specific dimension.
- :type dimension: int.
+ :type dimension: int
:returns: The persistence intervals.
:rtype: numpy array of dimension 2
@@ -528,7 +527,7 @@ cdef class SimplexTree:
complex in a user given file name.
:param persistence_file: Name of the file.
- :type persistence_file: string.
+ :type persistence_file: string
:note: intervals_in_dim function requires
:func:`compute_persistence`
@@ -582,3 +581,23 @@ cdef class SimplexTree:
infinite0 = np_array(next(l))
infinites = [np_array(d).reshape(-1,2) for d in l]
return (normal0, normals, infinite0, infinites)
+
+ def collapse_edges(self, nb_iterations = 1):
+ """Assuming the simplex tree is a 1-skeleton graph, this method collapse edges (simplices of higher dimension
+ are ignored) and resets the simplex tree from the remaining edges.
+ A good candidate is to build a simplex tree on top of a :class:`~gudhi.RipsComplex` of dimension 1 before
+ collapsing edges
+ (cf. :download:`rips_complex_edge_collapse_example.py <../example/rips_complex_edge_collapse_example.py>`).
+ For implementation details, please refer to :cite:`edgecollapsesocg2020`.
+
+ :param nb_iterations: The number of edge collapse iterations to perform. Default is 1.
+ :type nb_iterations: int
+ """
+ # Backup old pointer
+ cdef Simplex_tree_interface_full_featured* ptr = self.get_ptr()
+ cdef int nb_iter = nb_iterations
+ with nogil:
+ # New pointer is a new collapsed simplex tree
+ self.thisptr = <intptr_t>(ptr.collapse_edges(nb_iter))
+ # Delete old pointer
+ del ptr
diff --git a/src/python/gudhi/wasserstein/wasserstein.py b/src/python/gudhi/wasserstein/wasserstein.py
index 89ecab1c..a9d1cdff 100644
--- a/src/python/gudhi/wasserstein/wasserstein.py
+++ b/src/python/gudhi/wasserstein/wasserstein.py
@@ -73,8 +73,8 @@ def _perstot_autodiff(X, order, internal_p):
def _perstot(X, order, internal_p, enable_autodiff):
'''
:param X: (n x 2) numpy.array (points of a given diagram).
- :param order: exponent for Wasserstein. Default value is 2.
- :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); Default value is 2 (Euclidean norm).
+ :param order: exponent for Wasserstein.
+ :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2).
:param enable_autodiff: If X is torch.tensor, tensorflow.Tensor or jax.numpy.ndarray, make the computation
transparent to automatic differentiation.
:type enable_autodiff: bool
@@ -88,7 +88,7 @@ def _perstot(X, order, internal_p, enable_autodiff):
return np.linalg.norm(_dist_to_diag(X, internal_p), ord=order)
-def wasserstein_distance(X, Y, matching=False, order=2., internal_p=2., enable_autodiff=False):
+def wasserstein_distance(X, Y, matching=False, order=1., internal_p=np.inf, enable_autodiff=False):
'''
:param X: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points
(i.e. with infinite coordinate).
@@ -96,10 +96,10 @@ def wasserstein_distance(X, Y, matching=False, order=2., internal_p=2., enable_a
:param matching: if True, computes and returns the optimal matching between X and Y, encoded as
a (n x 2) np.array [...[i,j]...], meaning the i-th point in X is matched to
the j-th point in Y, with the convention (-1) represents the diagonal.
- :param order: exponent for Wasserstein; Default value is 2.
+ :param order: exponent for Wasserstein; Default value is 1.
:param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2);
- Default value is 2 (Euclidean norm).
- :param enable_autodiff: If X and Y are torch.tensor, tensorflow.Tensor or jax.numpy.ndarray, make the computation
+ Default value is `np.inf`.
+ :param enable_autodiff: If X and Y are torch.tensor or tensorflow.Tensor, make the computation
transparent to automatic differentiation. This requires the package EagerPy and is currently incompatible
with `matching=True`.
@@ -165,9 +165,9 @@ def wasserstein_distance(X, Y, matching=False, order=2., internal_p=2., enable_a
# empty arrays are not handled properly by the helpers, so we avoid calling them
if len(pairs_X_Y):
dists.append((Y_orig[pairs_X_Y[:, 1]] - X_orig[pairs_X_Y[:, 0]]).norms.lp(internal_p, axis=-1).norms.lp(order))
- if len(pairs_X_diag):
+ if len(pairs_X_diag[0]):
dists.append(_perstot_autodiff(X_orig[pairs_X_diag], order, internal_p))
- if len(pairs_Y_diag):
+ if len(pairs_Y_diag[0]):
dists.append(_perstot_autodiff(Y_orig[pairs_Y_diag], order, internal_p))
dists = [dist.reshape(1) for dist in dists]
return ep.concatenate(dists).norms.lp(order).raw
diff --git a/src/python/include/Alpha_complex_factory.h b/src/python/include/Alpha_complex_factory.h
new file mode 100644
index 00000000..d699ad9b
--- /dev/null
+++ b/src/python/include/Alpha_complex_factory.h
@@ -0,0 +1,139 @@
+/* This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+ * See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+ * Author(s): Vincent Rouvreau
+ *
+ * Copyright (C) 2020 Inria
+ *
+ * Modification(s):
+ * - YYYY/MM Author: Description of the modification
+ */
+
+#ifndef INCLUDE_ALPHA_COMPLEX_FACTORY_H_
+#define INCLUDE_ALPHA_COMPLEX_FACTORY_H_
+
+#include <gudhi/Simplex_tree.h>
+#include <gudhi/Alpha_complex.h>
+#include <gudhi/Alpha_complex_3d.h>
+#include <gudhi/Alpha_complex_options.h>
+#include <CGAL/Epeck_d.h>
+#include <CGAL/Epick_d.h>
+
+#include <boost/range/adaptor/transformed.hpp>
+
+#include "Simplex_tree_interface.h"
+
+#include <iostream>
+#include <vector>
+#include <string>
+#include <memory> // for std::unique_ptr
+
+namespace Gudhi {
+
+namespace alpha_complex {
+
+template <typename CgalPointType>
+std::vector<double> pt_cgal_to_cython(CgalPointType const& point) {
+ std::vector<double> vd;
+ vd.reserve(point.dimension());
+ for (auto coord = point.cartesian_begin(); coord != point.cartesian_end(); coord++)
+ vd.push_back(CGAL::to_double(*coord));
+ return vd;
+}
+
+template <typename CgalPointType>
+static CgalPointType pt_cython_to_cgal(std::vector<double> const& vec) {
+ return CgalPointType(vec.size(), vec.begin(), vec.end());
+}
+
+class Abstract_alpha_complex {
+ public:
+ virtual std::vector<double> get_point(int vh) = 0;
+ virtual bool create_simplex_tree(Simplex_tree_interface<>* simplex_tree, double max_alpha_square,
+ bool default_filtration_value) = 0;
+};
+
+class Exact_Alphacomplex_dD : public Abstract_alpha_complex {
+ private:
+ using Kernel = CGAL::Epeck_d<CGAL::Dynamic_dimension_tag>;
+ using Point = typename Kernel::Point_d;
+
+ public:
+ Exact_Alphacomplex_dD(const std::vector<std::vector<double>>& points, bool exact_version)
+ : exact_version_(exact_version),
+ alpha_complex_(boost::adaptors::transform(points, pt_cython_to_cgal<Point>)) {
+ }
+
+ virtual std::vector<double> get_point(int vh) override {
+ Point const& point = alpha_complex_.get_point(vh);
+ return pt_cgal_to_cython(point);
+ }
+
+ virtual bool create_simplex_tree(Simplex_tree_interface<>* simplex_tree, double max_alpha_square,
+ bool default_filtration_value) override {
+ return alpha_complex_.create_complex(*simplex_tree, max_alpha_square, exact_version_, default_filtration_value);
+ }
+
+ private:
+ bool exact_version_;
+ Alpha_complex<Kernel> alpha_complex_;
+};
+
+class Inexact_Alphacomplex_dD : public Abstract_alpha_complex {
+ private:
+ using Kernel = CGAL::Epick_d<CGAL::Dynamic_dimension_tag>;
+ using Point = typename Kernel::Point_d;
+
+ public:
+ Inexact_Alphacomplex_dD(const std::vector<std::vector<double>>& points, bool exact_version)
+ : exact_version_(exact_version),
+ alpha_complex_(boost::adaptors::transform(points, pt_cython_to_cgal<Point>)) {
+ }
+
+ virtual std::vector<double> get_point(int vh) override {
+ Point const& point = alpha_complex_.get_point(vh);
+ return pt_cgal_to_cython(point);
+ }
+ virtual bool create_simplex_tree(Simplex_tree_interface<>* simplex_tree, double max_alpha_square,
+ bool default_filtration_value) override {
+ return alpha_complex_.create_complex(*simplex_tree, max_alpha_square, exact_version_, default_filtration_value);
+ }
+
+ private:
+ bool exact_version_;
+ Alpha_complex<Kernel> alpha_complex_;
+};
+
+template <complexity Complexity>
+class Alphacomplex_3D : public Abstract_alpha_complex {
+ private:
+ using Point = typename Alpha_complex_3d<Complexity, false, false>::Bare_point_3;
+
+ static Point pt_cython_to_cgal_3(std::vector<double> const& vec) {
+ return Point(vec[0], vec[1], vec[2]);
+ }
+
+ public:
+ Alphacomplex_3D(const std::vector<std::vector<double>>& points)
+ : alpha_complex_(boost::adaptors::transform(points, pt_cython_to_cgal_3)) {
+ }
+
+ virtual std::vector<double> get_point(int vh) override {
+ Point const& point = alpha_complex_.get_point(vh);
+ return pt_cgal_to_cython(point);
+ }
+
+ virtual bool create_simplex_tree(Simplex_tree_interface<>* simplex_tree, double max_alpha_square,
+ bool default_filtration_value) override {
+ return alpha_complex_.create_complex(*simplex_tree, max_alpha_square);
+ }
+
+ private:
+ Alpha_complex_3d<Complexity, false, false> alpha_complex_;
+};
+
+
+} // namespace alpha_complex
+
+} // namespace Gudhi
+
+#endif // INCLUDE_ALPHA_COMPLEX_FACTORY_H_
diff --git a/src/python/include/Alpha_complex_interface.h b/src/python/include/Alpha_complex_interface.h
index 40de88f3..23be194d 100644
--- a/src/python/include/Alpha_complex_interface.h
+++ b/src/python/include/Alpha_complex_interface.h
@@ -11,57 +11,66 @@
#ifndef INCLUDE_ALPHA_COMPLEX_INTERFACE_H_
#define INCLUDE_ALPHA_COMPLEX_INTERFACE_H_
-#include <gudhi/Simplex_tree.h>
-#include <gudhi/Alpha_complex.h>
-#include <CGAL/Epeck_d.h>
-#include <CGAL/Epick_d.h>
-
-#include <boost/range/adaptor/transformed.hpp>
+#include "Alpha_complex_factory.h"
+#include <gudhi/Alpha_complex_options.h>
#include "Simplex_tree_interface.h"
#include <iostream>
#include <vector>
#include <string>
+#include <memory> // for std::unique_ptr
namespace Gudhi {
namespace alpha_complex {
class Alpha_complex_interface {
- using Dynamic_kernel = CGAL::Epeck_d< CGAL::Dynamic_dimension_tag >;
- using Point_d = Dynamic_kernel::Point_d;
-
public:
- Alpha_complex_interface(const std::vector<std::vector<double>>& points) {
- auto mkpt = [](std::vector<double> const& vec){
- return Point_d(vec.size(), vec.begin(), vec.end());
- };
- alpha_complex_ = new Alpha_complex<Dynamic_kernel>(boost::adaptors::transform(points, mkpt));
- }
-
- Alpha_complex_interface(const std::string& off_file_name, bool from_file = true) {
- alpha_complex_ = new Alpha_complex<Dynamic_kernel>(off_file_name);
- }
-
- ~Alpha_complex_interface() {
- delete alpha_complex_;
+ Alpha_complex_interface(const std::vector<std::vector<double>>& points, bool fast_version, bool exact_version)
+ : points_(points),
+ fast_version_(fast_version),
+ exact_version_(exact_version) {
}
std::vector<double> get_point(int vh) {
- std::vector<double> vd;
- Point_d const& ph = alpha_complex_->get_point(vh);
- for (auto coord = ph.cartesian_begin(); coord != ph.cartesian_end(); coord++)
- vd.push_back(CGAL::to_double(*coord));
- return vd;
+ return alpha_ptr_->get_point(vh);
}
- void create_simplex_tree(Simplex_tree_interface<>* simplex_tree, double max_alpha_square) {
- alpha_complex_->create_complex(*simplex_tree, max_alpha_square);
+ void create_simplex_tree(Simplex_tree_interface<>* simplex_tree, double max_alpha_square,
+ bool default_filtration_value) {
+ if (points_.size() > 0) {
+ std::size_t dimension = points_[0].size();
+ if (dimension == 3 && !default_filtration_value) {
+ if (fast_version_)
+ alpha_ptr_ = std::make_unique<Alphacomplex_3D<Gudhi::alpha_complex::complexity::FAST>>(points_);
+ else if (exact_version_)
+ alpha_ptr_ = std::make_unique<Alphacomplex_3D<Gudhi::alpha_complex::complexity::EXACT>>(points_);
+ else
+ alpha_ptr_ = std::make_unique<Alphacomplex_3D<Gudhi::alpha_complex::complexity::SAFE>>(points_);
+ if (!alpha_ptr_->create_simplex_tree(simplex_tree, max_alpha_square, default_filtration_value)) {
+ // create_simplex_tree will fail if all points are on a plane - Retry with dD by setting dimension to 2
+ dimension--;
+ alpha_ptr_.reset();
+ }
+ }
+ // Not ** else ** because we have to take into account if 3d fails
+ if (dimension != 3 || default_filtration_value) {
+ if (fast_version_) {
+ alpha_ptr_ = std::make_unique<Inexact_Alphacomplex_dD>(points_, exact_version_);
+ } else {
+ alpha_ptr_ = std::make_unique<Exact_Alphacomplex_dD>(points_, exact_version_);
+ }
+ alpha_ptr_->create_simplex_tree(simplex_tree, max_alpha_square, default_filtration_value);
+ }
+ }
}
private:
- Alpha_complex<Dynamic_kernel>* alpha_complex_;
+ std::unique_ptr<Abstract_alpha_complex> alpha_ptr_;
+ std::vector<std::vector<double>> points_;
+ bool fast_version_;
+ bool exact_version_;
};
} // namespace alpha_complex
diff --git a/src/python/include/Simplex_tree_interface.h b/src/python/include/Simplex_tree_interface.h
index 56d7c41d..e288a8cf 100644
--- a/src/python/include/Simplex_tree_interface.h
+++ b/src/python/include/Simplex_tree_interface.h
@@ -15,10 +15,13 @@
#include <gudhi/distance_functions.h>
#include <gudhi/Simplex_tree.h>
#include <gudhi/Points_off_io.h>
+#include <gudhi/Flag_complex_edge_collapser.h>
#include <iostream>
#include <vector>
#include <utility> // std::pair
+#include <tuple>
+#include <iterator> // for std::distance
namespace Gudhi {
@@ -157,6 +160,34 @@ class Simplex_tree_interface : public Simplex_tree<SimplexTreeOptions> {
return new_dgm;
}
+ Simplex_tree_interface* collapse_edges(int nb_collapse_iteration) {
+ using Filtered_edge = std::tuple<Vertex_handle, Vertex_handle, Filtration_value>;
+ std::vector<Filtered_edge> edges;
+ for (Simplex_handle sh : Base::skeleton_simplex_range(1)) {
+ if (Base::dimension(sh) == 1) {
+ typename Base::Simplex_vertex_range rg = Base::simplex_vertex_range(sh);
+ auto vit = rg.begin();
+ Vertex_handle v = *vit;
+ Vertex_handle w = *++vit;
+ edges.emplace_back(v, w, Base::filtration(sh));
+ }
+ }
+
+ for (int iteration = 0; iteration < nb_collapse_iteration; iteration++) {
+ edges = Gudhi::collapse::flag_complex_collapse_edges(edges);
+ }
+ Simplex_tree_interface* collapsed_stree_ptr = new Simplex_tree_interface();
+ // Copy the original 0-skeleton
+ for (Simplex_handle sh : Base::skeleton_simplex_range(0)) {
+ collapsed_stree_ptr->insert({*(Base::simplex_vertex_range(sh).begin())}, Base::filtration(sh));
+ }
+ // Insert remaining edges
+ for (auto remaining_edge : edges) {
+ collapsed_stree_ptr->insert({std::get<0>(remaining_edge), std::get<1>(remaining_edge)}, std::get<2>(remaining_edge));
+ }
+ return collapsed_stree_ptr;
+ }
+
// Iterator over the simplex tree
Complex_simplex_iterator get_simplices_iterator_begin() {
// this specific case works because the range is just a pair of iterators - won't work if range was a vector
diff --git a/src/python/include/pybind11_diagram_utils.h b/src/python/include/pybind11_diagram_utils.h
index d9627258..2d5194f4 100644
--- a/src/python/include/pybind11_diagram_utils.h
+++ b/src/python/include/pybind11_diagram_utils.h
@@ -18,8 +18,8 @@ namespace py = pybind11;
typedef py::array_t<double> Dgm;
// Get m[i,0] and m[i,1] as a pair
-static auto pairify(void* p, ssize_t h, ssize_t w) {
- return [=](ssize_t i){
+static auto pairify(void* p, py::ssize_t h, py::ssize_t w) {
+ return [=](py::ssize_t i){
char* birth = (char*)p + i * h;
char* death = birth + w;
return std::make_pair(*(double*)birth, *(double*)death);
@@ -32,8 +32,8 @@ inline auto numpy_to_range_of_pairs(py::array_t<double> dgm) {
if((buf.ndim!=2 || buf.shape[1]!=2) && (buf.ndim!=1 || buf.shape[0]!=0))
throw std::runtime_error("Diagram must be an array of size n x 2");
// In the case of shape (0), avoid reading non-existing strides[1] even if we won't use it.
- ssize_t stride1 = buf.ndim == 2 ? buf.strides[1] : 0;
- auto cnt = boost::counting_range<ssize_t>(0, buf.shape[0]);
+ py::ssize_t stride1 = buf.ndim == 2 ? buf.strides[1] : 0;
+ auto cnt = boost::counting_range<py::ssize_t>(0, buf.shape[0]);
return boost::adaptors::transform(cnt, pairify(buf.ptr, buf.strides[0], stride1));
// Be careful that the returned range cannot contain references to dead temporaries.
}
diff --git a/src/python/introduction.rst b/src/python/introduction.rst
new file mode 100644
index 00000000..11c06ac5
--- /dev/null
+++ b/src/python/introduction.rst
@@ -0,0 +1,24 @@
+The Gudhi library is an open source library for Computational Topology and
+Topological Data Analysis (TDA). It offers state-of-the-art algorithms
+to construct various types of simplicial complexes, data structures to
+represent them, and algorithms to compute geometric approximations of shapes
+and persistent homology.
+
+The GUDHI library offers the following interoperable modules:
+
+* Complexes:
+ * Cubical
+ * Simplicial: Rips, Witness, Alpha and Čech complexes
+ * Cover: Nerve and Graph induced complexes
+* Data structures and basic operations:
+ * Simplex tree, Skeleton blockers and Toplex map
+ * Construction, update, filtration and simplification
+* Topological descriptors computation
+* Manifold reconstruction
+* Topological descriptors tools:
+ * Bottleneck distance
+ * Statistical tools
+ * Persistence diagram and barcode
+
+For more information about Topological Data Analysis and its workflow, please
+refer to the `Wikipedia TDA dedicated page <https://en.wikipedia.org/wiki/Topological_data_analysis>`_.
diff --git a/src/python/setup.py.in b/src/python/setup.py.in
index b9f4e3f0..98d058fc 100644
--- a/src/python/setup.py.in
+++ b/src/python/setup.py.in
@@ -48,10 +48,12 @@ ext_modules = cythonize(ext_modules)
for module in pybind11_modules:
my_include_dirs = include_dirs + [pybind11.get_include(False), pybind11.get_include(True)]
- if module == 'hera':
+ if module == 'hera/wasserstein':
my_include_dirs = ['@HERA_WASSERSTEIN_INCLUDE_DIR@'] + my_include_dirs
+ elif module == 'hera/bottleneck':
+ my_include_dirs = ['@HERA_BOTTLENECK_INCLUDE_DIR@'] + my_include_dirs
ext_modules.append(Extension(
- 'gudhi.' + module,
+ 'gudhi.' + module.replace('/', '.'),
sources = [source_dir + module + '.cc'],
language = 'c++',
include_dirs = my_include_dirs,
@@ -62,14 +64,29 @@ for module in pybind11_modules:
runtime_library_dirs=runtime_library_dirs,
))
+# read the contents of introduction.rst
+with open("introduction.rst", "r") as fh:
+ long_description = fh.read()
+
setup(
name = 'gudhi',
packages=find_packages(), # find_namespace_packages(include=["gudhi*"])
author='GUDHI Editorial Board',
author_email='gudhi-contact@lists.gforge.inria.fr',
version='@GUDHI_VERSION@',
- url='http://gudhi.gforge.inria.fr/',
+ url='https://gudhi.inria.fr/',
+ project_urls={
+ 'Bug Tracker': 'https://github.com/GUDHI/gudhi-devel/issues',
+ 'Documentation': 'https://gudhi.inria.fr/python/latest/',
+ 'Source Code': 'https://github.com/GUDHI/gudhi-devel',
+ 'License': 'https://gudhi.inria.fr/licensing/'
+ },
+ description='The Gudhi library is an open source library for ' \
+ 'Computational Topology and Topological Data Analysis (TDA).',
+ long_description_content_type='text/x-rst',
+ long_description=long_description,
ext_modules = ext_modules,
- install_requires = ['cython','numpy >= 1.9',],
- setup_requires = ['numpy >= 1.9','pybind11',],
+ install_requires = ['numpy >= 1.9',],
+ setup_requires = ['cython','numpy >= 1.9','pybind11',],
+ package_data={"": ["*.dll"], },
)
diff --git a/src/python/test/test_alpha_complex.py b/src/python/test/test_alpha_complex.py
index 77121302..814f8289 100755
--- a/src/python/test/test_alpha_complex.py
+++ b/src/python/test/test_alpha_complex.py
@@ -8,7 +8,7 @@
- YYYY/MM Author: Description of the modification
"""
-from gudhi import AlphaComplex, SimplexTree
+import gudhi as gd
import math
import numpy as np
import pytest
@@ -24,14 +24,17 @@ __copyright__ = "Copyright (C) 2016 Inria"
__license__ = "MIT"
-def test_empty_alpha():
- alpha_complex = AlphaComplex(points=[[0, 0]])
+def _empty_alpha(precision):
+ alpha_complex = gd.AlphaComplex(points=[[0, 0]], precision = precision)
assert alpha_complex.__is_defined() == True
+def test_empty_alpha():
+ for precision in ['fast', 'safe', 'exact']:
+ _empty_alpha(precision)
-def test_infinite_alpha():
+def _infinite_alpha(precision):
point_list = [[0, 0], [1, 0], [0, 1], [1, 1]]
- alpha_complex = AlphaComplex(points=point_list)
+ alpha_complex = gd.AlphaComplex(points=point_list, precision = precision)
assert alpha_complex.__is_defined() == True
simplex_tree = alpha_complex.create_simplex_tree()
@@ -79,10 +82,13 @@ def test_infinite_alpha():
else:
assert False
+def test_infinite_alpha():
+ for precision in ['fast', 'safe', 'exact']:
+ _infinite_alpha(precision)
-def test_filtered_alpha():
+def _filtered_alpha(precision):
point_list = [[0, 0], [1, 0], [0, 1], [1, 1]]
- filtered_alpha = AlphaComplex(points=point_list)
+ filtered_alpha = gd.AlphaComplex(points=point_list, precision = precision)
simplex_tree = filtered_alpha.create_simplex_tree(max_alpha_square=0.25)
@@ -119,7 +125,11 @@ def test_filtered_alpha():
assert simplex_tree.get_star([0]) == [([0], 0.0), ([0, 1], 0.25), ([0, 2], 0.25)]
assert simplex_tree.get_cofaces([0], 1) == [([0, 1], 0.25), ([0, 2], 0.25)]
-def test_safe_alpha_persistence_comparison():
+def test_filtered_alpha():
+ for precision in ['fast', 'safe', 'exact']:
+ _filtered_alpha(precision)
+
+def _safe_alpha_persistence_comparison(precision):
#generate periodic signal
time = np.arange(0, 10, 1)
signal = [math.sin(x) for x in time]
@@ -131,10 +141,10 @@ def test_safe_alpha_persistence_comparison():
embedding2 = [[signal[i], delayed[i]] for i in range(len(time))]
#build alpha complex and simplex tree
- alpha_complex1 = AlphaComplex(points=embedding1)
+ alpha_complex1 = gd.AlphaComplex(points=embedding1, precision = precision)
simplex_tree1 = alpha_complex1.create_simplex_tree()
- alpha_complex2 = AlphaComplex(points=embedding2)
+ alpha_complex2 = gd.AlphaComplex(points=embedding2, precision = precision)
simplex_tree2 = alpha_complex2.create_simplex_tree()
diag1 = simplex_tree1.persistence()
@@ -143,3 +153,106 @@ def test_safe_alpha_persistence_comparison():
for (first_p, second_p) in zip_longest(diag1, diag2):
assert first_p[0] == pytest.approx(second_p[0])
assert first_p[1] == pytest.approx(second_p[1])
+
+
+def test_safe_alpha_persistence_comparison():
+ # Won't work for 'fast' version
+ _safe_alpha_persistence_comparison('safe')
+ _safe_alpha_persistence_comparison('exact')
+
+def _delaunay_complex(precision):
+ point_list = [[0, 0], [1, 0], [0, 1], [1, 1]]
+ filtered_alpha = gd.AlphaComplex(points=point_list, precision = precision)
+
+ simplex_tree = filtered_alpha.create_simplex_tree(default_filtration_value = True)
+
+ assert simplex_tree.num_simplices() == 11
+ assert simplex_tree.num_vertices() == 4
+
+ assert point_list[0] == filtered_alpha.get_point(0)
+ assert point_list[1] == filtered_alpha.get_point(1)
+ assert point_list[2] == filtered_alpha.get_point(2)
+ assert point_list[3] == filtered_alpha.get_point(3)
+ try:
+ filtered_alpha.get_point(4) == []
+ except IndexError:
+ pass
+ else:
+ assert False
+ try:
+ filtered_alpha.get_point(125) == []
+ except IndexError:
+ pass
+ else:
+ assert False
+
+ for filtered_value in simplex_tree.get_filtration():
+ assert math.isnan(filtered_value[1])
+ for filtered_value in simplex_tree.get_star([0]):
+ assert math.isnan(filtered_value[1])
+ for filtered_value in simplex_tree.get_cofaces([0], 1):
+ assert math.isnan(filtered_value[1])
+
+def test_delaunay_complex():
+ for precision in ['fast', 'safe', 'exact']:
+ _delaunay_complex(precision)
+
+def _3d_points_on_a_plane(precision, default_filtration_value):
+ alpha = gd.AlphaComplex(off_file='alphacomplexdoc.off', precision = precision)
+
+ simplex_tree = alpha.create_simplex_tree(default_filtration_value = default_filtration_value)
+ assert simplex_tree.dimension() == 2
+ assert simplex_tree.num_vertices() == 7
+ assert simplex_tree.num_simplices() == 25
+
+def test_3d_points_on_a_plane():
+ off_file = open("alphacomplexdoc.off", "w")
+ off_file.write("OFF \n" \
+ "7 0 0 \n" \
+ "1.0 1.0 0.0\n" \
+ "7.0 0.0 0.0\n" \
+ "4.0 6.0 0.0\n" \
+ "9.0 6.0 0.0\n" \
+ "0.0 14.0 0.0\n" \
+ "2.0 19.0 0.0\n" \
+ "9.0 17.0 0.0\n" )
+ off_file.close()
+
+ for default_filtration_value in [True, False]:
+ for precision in ['fast', 'safe', 'exact']:
+ _3d_points_on_a_plane(precision, default_filtration_value)
+
+def _3d_tetrahedrons(precision):
+ points = 10*np.random.rand(10, 3)
+ alpha = gd.AlphaComplex(points=points, precision = precision)
+ st_alpha = alpha.create_simplex_tree(default_filtration_value = False)
+ # New AlphaComplex for get_point to work
+ delaunay = gd.AlphaComplex(points=points, precision = precision)
+ st_delaunay = delaunay.create_simplex_tree(default_filtration_value = True)
+
+ delaunay_tetra = []
+ for sk in st_delaunay.get_skeleton(4):
+ if len(sk[0]) == 4:
+ tetra = [delaunay.get_point(sk[0][0]),
+ delaunay.get_point(sk[0][1]),
+ delaunay.get_point(sk[0][2]),
+ delaunay.get_point(sk[0][3]) ]
+ delaunay_tetra.append(sorted(tetra, key=lambda tup: tup[0]))
+
+ alpha_tetra = []
+ for sk in st_alpha.get_skeleton(4):
+ if len(sk[0]) == 4:
+ tetra = [alpha.get_point(sk[0][0]),
+ alpha.get_point(sk[0][1]),
+ alpha.get_point(sk[0][2]),
+ alpha.get_point(sk[0][3]) ]
+ alpha_tetra.append(sorted(tetra, key=lambda tup: tup[0]))
+
+ # Check the tetrahedrons from one list are in the second one
+ assert len(alpha_tetra) == len(delaunay_tetra)
+ for tetra_from_del in delaunay_tetra:
+ assert tetra_from_del in alpha_tetra
+
+def test_3d_tetrahedrons():
+ for precision in ['fast', 'safe', 'exact']:
+ _3d_tetrahedrons(precision)
diff --git a/src/python/test/test_bottleneck_distance.py b/src/python/test/test_bottleneck_distance.py
index 70b2abad..6915bea8 100755
--- a/src/python/test/test_bottleneck_distance.py
+++ b/src/python/test/test_bottleneck_distance.py
@@ -9,6 +9,8 @@
"""
import gudhi
+import gudhi.hera
+import pytest
__author__ = "Vincent Rouvreau"
__copyright__ = "Copyright (C) 2016 Inria"
@@ -19,5 +21,7 @@ def test_basic_bottleneck():
diag1 = [[2.7, 3.7], [9.6, 14.0], [34.2, 34.974], [3.0, float("Inf")]]
diag2 = [[2.8, 4.45], [9.5, 14.1], [3.2, float("Inf")]]
- assert gudhi.bottleneck_distance(diag1, diag2, 0.1) == 0.8081763781405569
assert gudhi.bottleneck_distance(diag1, diag2) == 0.75
+ assert gudhi.bottleneck_distance(diag1, diag2, 0.1) == pytest.approx(0.75, abs=0.1)
+ assert gudhi.hera.bottleneck_distance(diag1, diag2, 0) == 0.75
+ assert gudhi.hera.bottleneck_distance(diag1, diag2, 0.1) == pytest.approx(0.75, rel=0.1)
diff --git a/src/python/test/test_cubical_complex.py b/src/python/test/test_cubical_complex.py
index 5c59db8f..d0e4e9e8 100755
--- a/src/python/test/test_cubical_complex.py
+++ b/src/python/test/test_cubical_complex.py
@@ -157,3 +157,20 @@ def test_cubical_generators():
assert np.array_equal(g[0][0], np.empty(shape=[0,2]))
assert np.array_equal(g[0][1], np.array([[7, 4]]))
assert np.array_equal(g[1][0], np.array([8]))
+
+def test_cubical_cofaces_of_persistence_pairs_when_pd_has_no_paired_birth_and_death():
+ cubCpx = CubicalComplex(dimensions=[1,2], top_dimensional_cells=[0.0, 1.0])
+ Diag = cubCpx.persistence(homology_coeff_field=2, min_persistence=0)
+ pairs = cubCpx.cofaces_of_persistence_pairs()
+ assert pairs[0] == []
+ assert np.array_equal(pairs[1][0], np.array([0]))
+
+def test_periodic_cofaces_of_persistence_pairs_when_pd_has_no_paired_birth_and_death():
+ perCubCpx = PeriodicCubicalComplex(dimensions=[1,2], top_dimensional_cells=[0.0, 1.0],
+ periodic_dimensions=[True, True])
+ Diag = perCubCpx.persistence(homology_coeff_field=2, min_persistence=0)
+ pairs = perCubCpx.cofaces_of_persistence_pairs()
+ assert pairs[0] == []
+ assert np.array_equal(pairs[1][0], np.array([0]))
+ assert np.array_equal(pairs[1][1], np.array([0, 1]))
+ assert np.array_equal(pairs[1][2], np.array([1]))
diff --git a/src/python/test/test_dtm.py b/src/python/test/test_dtm.py
index bff4c267..0a52279e 100755
--- a/src/python/test/test_dtm.py
+++ b/src/python/test/test_dtm.py
@@ -8,10 +8,11 @@
- YYYY/MM Author: Description of the modification
"""
-from gudhi.point_cloud.dtm import DistanceToMeasure
+from gudhi.point_cloud.dtm import DistanceToMeasure, DTMDensity
import numpy
import pytest
import torch
+import math
def test_dtm_compare_euclidean():
@@ -66,3 +67,23 @@ def test_dtm_precomputed():
dtm = DistanceToMeasure(2, q=2, metric="neighbors")
r = dtm.fit_transform(dist)
assert r == pytest.approx([2.0, 0.707, 3.5355], rel=0.01)
+
+
+def test_density_normalized():
+ sample = numpy.random.normal(0, 1, (1000000, 2))
+ queries = numpy.array([[0.0, 0.0], [-0.5, 0.7], [0.4, 1.7]])
+ expected = numpy.exp(-(queries ** 2).sum(-1) / 2) / (2 * math.pi)
+ estimated = DTMDensity(k=150, normalize=True).fit(sample).transform(queries)
+ assert estimated == pytest.approx(expected, rel=0.4)
+
+
+def test_density():
+ distances = [[0, 1, 10], [2, 0, 30], [1, 3, 5]]
+ density = DTMDensity(k=2, metric="neighbors", dim=1).fit_transform(distances)
+ expected = numpy.array([2.0, 1.0, 0.5])
+ assert density == pytest.approx(expected)
+ distances = [[0, 1], [2, 0], [1, 3]]
+ density = DTMDensity(metric="neighbors", dim=1).fit_transform(distances)
+ assert density == pytest.approx(expected)
+ density = DTMDensity(weights=[0.5, 0.5], metric="neighbors", dim=1).fit_transform(distances)
+ assert density == pytest.approx(expected)
diff --git a/src/python/test/test_dtm_rips_complex.py b/src/python/test/test_dtm_rips_complex.py
new file mode 100644
index 00000000..e1c0ee44
--- /dev/null
+++ b/src/python/test/test_dtm_rips_complex.py
@@ -0,0 +1,32 @@
+""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+ See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+ Author(s): Yuichi Ike
+
+ Copyright (C) 2020 Inria, Copyright (C) 2020 FUjitsu Laboratories Ltd.
+
+ Modification(s):
+ - YYYY/MM Author: Description of the modification
+"""
+
+from gudhi.dtm_rips_complex import DTMRipsComplex
+from gudhi import RipsComplex
+import numpy as np
+from math import sqrt
+import pytest
+
+def test_dtm_rips_complex():
+ pts = np.array([[2.0, 2.0], [0.0, 1.0], [3.0, 4.0]])
+ dtm_rips = DTMRipsComplex(points=pts, k=2)
+ st = dtm_rips.create_simplex_tree(max_dimension=2)
+ st.persistence()
+ persistence_intervals0 = st.persistence_intervals_in_dimension(0)
+ assert persistence_intervals0 == pytest.approx(np.array([[3.16227766, 5.39834564],[3.16227766, 5.39834564], [3.16227766, float("inf")]]))
+
+def test_compatibility_with_rips():
+ distance_matrix = np.array([[0, 1, 1, sqrt(2)], [1, 0, sqrt(2), 1], [1, sqrt(2), 0, 1], [sqrt(2), 1, 1, 0]])
+ dtm_rips = DTMRipsComplex(distance_matrix=distance_matrix, max_filtration=42)
+ st = dtm_rips.create_simplex_tree(max_dimension=1)
+ rips_complex = RipsComplex(distance_matrix=distance_matrix, max_edge_length=42)
+ st_from_rips = rips_complex.create_simplex_tree(max_dimension=1)
+ assert list(st.get_filtration()) == list(st_from_rips.get_filtration())
+
diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py
index dba7f952..e5c211a0 100755
--- a/src/python/test/test_representations.py
+++ b/src/python/test/test_representations.py
@@ -1,12 +1,60 @@
import os
import sys
import matplotlib.pyplot as plt
+import numpy as np
+import pytest
+
+from sklearn.cluster import KMeans
+
def test_representations_examples():
# Disable graphics for testing purposes
- plt.show = lambda:None
+ plt.show = lambda: None
here = os.path.dirname(os.path.realpath(__file__))
sys.path.append(here + "/../example")
import diagram_vectorizations_distances_kernels
return None
+
+
+from gudhi.representations.vector_methods import Atol
+from gudhi.representations.metrics import *
+from gudhi.representations.kernel_methods import *
+
+
+def _n_diags(n):
+ l = []
+ for _ in range(n):
+ a = np.random.rand(50, 2)
+ a[:, 1] += a[:, 0] # So that y >= x
+ l.append(a)
+ return l
+
+
+def test_multiple():
+ l1 = _n_diags(9)
+ l2 = _n_diags(11)
+ l1b = l1.copy()
+ d1 = pairwise_persistence_diagram_distances(l1, e=0.00001, n_jobs=4)
+ d2 = BottleneckDistance(epsilon=0.00001).fit_transform(l1)
+ d3 = pairwise_persistence_diagram_distances(l1, l1b, e=0.00001, n_jobs=4)
+ assert d1 == pytest.approx(d2)
+ assert d3 == pytest.approx(d2, abs=1e-5) # Because of 0 entries (on the diagonal)
+ d1 = pairwise_persistence_diagram_distances(l1, l2, metric="wasserstein", order=2, internal_p=2)
+ d2 = WassersteinDistance(order=2, internal_p=2, n_jobs=4).fit(l2).transform(l1)
+ print(d1.shape, d2.shape)
+ assert d1 == pytest.approx(d2, rel=.02)
+
+
+def test_dummy_atol():
+ a = np.array([[1, 2, 4], [1, 4, 0], [1, 0, 4]])
+ b = np.array([[4, 2, 0], [4, 4, 0], [4, 0, 2]])
+ c = np.array([[3, 2, -1], [1, 2, -1]])
+
+ for weighting_method in ["cloud", "iidproba"]:
+ for contrast in ["gaussian", "laplacian", "indicator"]:
+ atol_vectoriser = Atol(quantiser=KMeans(n_clusters=1, random_state=202006), weighting_method=weighting_method, contrast=contrast)
+ atol_vectoriser.fit([a, b, c])
+ atol_vectoriser(a)
+ atol_vectoriser.transform(X=[a, b, c])
+
diff --git a/src/python/test/test_simplex_tree.py b/src/python/test/test_simplex_tree.py
index 2137d822..83be0602 100755
--- a/src/python/test/test_simplex_tree.py
+++ b/src/python/test/test_simplex_tree.py
@@ -340,3 +340,21 @@ def test_simplices_iterator():
assert st.find(simplex[0]) == True
print("filtration is: ", simplex[1])
assert st.filtration(simplex[0]) == simplex[1]
+
+def test_collapse_edges():
+ st = SimplexTree()
+
+ assert st.insert([0, 1], filtration=1.0) == True
+ assert st.insert([1, 2], filtration=1.0) == True
+ assert st.insert([2, 3], filtration=1.0) == True
+ assert st.insert([0, 3], filtration=1.0) == True
+ assert st.insert([0, 2], filtration=2.0) == True
+ assert st.insert([1, 3], filtration=2.0) == True
+
+ assert st.num_simplices() == 10
+
+ st.collapse_edges()
+ assert st.num_simplices() == 9
+ assert st.find([1, 3]) == False
+ for simplex in st.get_skeleton(0):
+ assert simplex[1] == 1.
diff --git a/src/python/test/test_tomato.py b/src/python/test/test_tomato.py
new file mode 100755
index 00000000..ecab03c4
--- /dev/null
+++ b/src/python/test/test_tomato.py
@@ -0,0 +1,65 @@
+""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+ See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+ Author(s): Marc Glisse
+
+ Copyright (C) 2020 Inria
+
+ Modification(s):
+ - YYYY/MM Author: Description of the modification
+"""
+
+from gudhi.clustering.tomato import Tomato
+import numpy as np
+import pytest
+import matplotlib.pyplot as plt
+
+# Disable graphics for testing purposes
+plt.show = lambda: None
+
+
+def test_tomato_1():
+ a = [(1, 2), (1.1, 1.9), (0.9, 1.8), (10, 0), (10.1, 0.05), (10.2, -0.1), (5.4, 0)]
+ t = Tomato(metric="euclidean", n_clusters=2, k=4, n_jobs=-1, eps=0.05)
+ assert np.array_equal(t.fit_predict(a), [1, 1, 1, 0, 0, 0, 0]) # or with swapped 0 and 1
+ assert np.array_equal(t.children_, [[0, 1]])
+
+ t = Tomato(density_type="KDE", r=1, k=4)
+ t.fit(a)
+ assert np.array_equal(t.leaf_labels_, [1, 1, 1, 0, 0, 0, 0]) # or with swapped 0 and 1
+ assert t.n_clusters_ == 2
+ t.merge_threshold_ = 10
+ assert t.n_clusters_ == 1
+ assert (t.labels_ == 0).all()
+
+ t = Tomato(graph_type="radius", r=0.1, metric="cosine", k=3)
+ assert np.array_equal(t.fit_predict(a), [1, 1, 1, 0, 0, 0, 0]) # or with swapped 0 and 1
+
+ t = Tomato(metric="euclidean", graph_type="radius", r=4.7, k=4)
+ t.fit(a)
+ assert t.max_weight_per_cc_.size == 2
+ assert np.array_equal(t.neighbors_, [[0, 1, 2], [0, 1, 2], [0, 1, 2], [3, 4, 5, 6], [3, 4, 5], [3, 4, 5], [3, 6]])
+ t.plot_diagram()
+
+ t = Tomato(graph_type="radius", r=4.7, k=4, symmetrize_graph=True)
+ t.fit(a)
+ assert t.max_weight_per_cc_.size == 2
+ assert [set(i) for i in t.neighbors_] == [{1, 2}, {0, 2}, {0, 1}, {4, 5, 6}, {3, 5}, {3, 4}, {3}]
+
+ t = Tomato(n_clusters=2, k=4, symmetrize_graph=True)
+ t.fit(a)
+ assert [set(i) for i in t.neighbors_] == [
+ {1, 2, 6},
+ {0, 2, 6},
+ {0, 1, 6},
+ {4, 5, 6},
+ {3, 5, 6},
+ {3, 4, 6},
+ {0, 1, 2, 3, 4, 5},
+ ]
+ t.plot_diagram()
+
+ t = Tomato(k=6, metric="manhattan")
+ t.fit(a)
+ assert t.diagram_.size == 0
+ assert t.max_weight_per_cc_.size == 1
+ t.plot_diagram()
diff --git a/src/python/test/test_weighted_rips.py b/src/python/test/test_weighted_rips_complex.py
index 7ef48333..7ef48333 100644
--- a/src/python/test/test_weighted_rips.py
+++ b/src/python/test/test_weighted_rips_complex.py