summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.github/for_maintainers/new_gudhi_version_creation.md5
-rw-r--r--.github/for_maintainers/next_release_template.md10
-rw-r--r--.github/next_release.md27
-rw-r--r--CMakeGUDHIVersion.txt2
-rw-r--r--src/python/CMakeLists.txt9
-rw-r--r--src/python/doc/installation.rst6
-rw-r--r--src/python/doc/representations.rst119
-rw-r--r--src/python/doc/representations_sum.inc24
-rw-r--r--src/python/doc/rips_complex_user.rst127
-rw-r--r--src/python/gudhi/bottleneck.cc18
-rw-r--r--src/python/gudhi/hera/bottleneck.cc11
-rw-r--r--src/python/gudhi/hera/wasserstein.cc129
-rw-r--r--src/python/gudhi/representations/vector_methods.py136
-rw-r--r--src/python/gudhi/tensorflow/perslay.py284
-rw-r--r--src/python/include/pybind11_diagram_utils.h25
-rw-r--r--src/python/test/test_perslay.py147
-rwxr-xr-xsrc/python/test/test_representations.py22
-rwxr-xr-xsrc/python/test/test_wasserstein_distance.py28
18 files changed, 886 insertions, 243 deletions
diff --git a/.github/for_maintainers/new_gudhi_version_creation.md b/.github/for_maintainers/new_gudhi_version_creation.md
index 19ef168e..de8d0aa5 100644
--- a/.github/for_maintainers/new_gudhi_version_creation.md
+++ b/.github/for_maintainers/new_gudhi_version_creation.md
@@ -18,11 +18,10 @@ Checkin the modifications, build and test the version:
```bash
git submodule update --init
rm -rf build; mkdir build; cd build
-cmake -DCMAKE_BUILD_TYPE=Release -DCGAL_DIR=/your/path/to/CGAL -DWITH_GUDHI_EXAMPLE=ON -DWITH_GUDHI_BENCHMARK=ON -DUSER_VERSION_DIR=gudhi.@GUDHI_VERSION@ -DPython_ADDITIONAL_VERSIONS=3 ..
+cmake -DCMAKE_BUILD_TYPE=Release -DCGAL_DIR=/your/path/to/CGAL -DWITH_GUDHI_REMOTE_TEST=ON -DWITH_GUDHI_EXAMPLE=ON -DWITH_GUDHI_BENCHMARK=ON -DUSER_VERSION_DIR=gudhi.@GUDHI_VERSION@ -DPython_ADDITIONAL_VERSIONS=3 ..
make user_version
date +"%d-%m-%Y-%T" > gudhi.@GUDHI_VERSION@/timestamp.txt
tar -czvf gudhi.@GUDHI_VERSION@.tar.gz gudhi.@GUDHI_VERSION@
-md5sum gudhi.@GUDHI_VERSION@.tar.gz > md5sum.txt
sha256sum gudhi.@GUDHI_VERSION@.tar.gz > sha256sum.txt
sha512sum gudhi.@GUDHI_VERSION@.tar.gz > sha512sum.txt
@@ -87,7 +86,7 @@ Copy gudhi WebDAV python/@GUDHI_VERSION@ as python/latest (no symbolic link with
* Name the tag: tags/gudhi-release-@GUDHI_VERSION@
* Name the release GUDHI @GUDHI_VERSION@ release
* Write the release note
-* Drag'n drop *gudhi.@GUDHI_VERSION@.tar.gz*, *md5sum.txt*, *sha256sum.txt*, *sha512sum.txt* files
+* Drag'n drop *gudhi.@GUDHI_VERSION@.tar.gz*, *sha256sum.txt*, *sha512sum.txt* files
* Tick the *This is a pre-release* check button if this is a release candidate (untick if this is an official version)
* Click the *Publish the release* button
diff --git a/.github/for_maintainers/next_release_template.md b/.github/for_maintainers/next_release_template.md
index a2805a55..c57ae4eb 100644
--- a/.github/for_maintainers/next_release_template.md
+++ b/.github/for_maintainers/next_release_template.md
@@ -1,16 +1,16 @@
We are pleased to announce the release 3.X.X of the GUDHI library.
-As a major new feature, the GUDHI library now offers ...
+As a major new feature, the GUDHI library now offers **...**
We are now using GitHub to develop the GUDHI library, do not hesitate to [fork the GUDHI project on GitHub](https://github.com/GUDHI/gudhi-devel). From a user point of view, we recommend to download GUDHI user version (gudhi.3.X.X.tar.gz).
Below is a list of changes made since GUDHI 3.X-1.X-1:
- [Module](link)
- - ...
+ - **...**
- [Module](link)
- - ...
+ - **...**
- Miscellaneous
- The [list of bugs that were solved since GUDHI-3.X-1.X-1](https://github.com/GUDHI/gudhi-devel/issues?q=label%3A3.1.1+is%3Aclosed) is available on GitHub.
@@ -26,3 +26,7 @@ Feel free to [contact us](https://gudhi.inria.fr/contact/) in case you have any
For further information about downloading and installing the library ([C++](https://gudhi.inria.fr/doc/latest/installation.html) or [Python](https://gudhi.inria.fr/python/latest/installation.html)), please visit the [GUDHI web site](https://gudhi.inria.fr/).
+## Contributors
+
+- **...**
+- **...** \ No newline at end of file
diff --git a/.github/next_release.md b/.github/next_release.md
index 929a7ce6..f27af825 100644
--- a/.github/next_release.md
+++ b/.github/next_release.md
@@ -1,25 +1,22 @@
-We are pleased to announce the release 3.7.0 of the GUDHI library.
+We are pleased to announce the release 3.8.0 of the GUDHI library.
-As a major new feature, the GUDHI library now offers ...
+As a major new feature, the GUDHI library now offers Perslay, a Tensorflow model for the representations module.
We are now using GitHub to develop the GUDHI library, do not hesitate to [fork the GUDHI project on GitHub](https://github.com/GUDHI/gudhi-devel). From a user point of view, we recommend to download GUDHI user version (gudhi.3.X.X.tar.gz).
-Below is a list of changes made since GUDHI 3.6.0:
+Below is a list of changes made since GUDHI 3.7.1:
-- [Module](link)
- - ...
-
-- [Simplex tree](https://gudhi.inria.fr/python/latest/simplex_tree_ref.html)
- - New functions to initialize from a matrix or insert batches of simplices of the same dimension.
+- [Perslay](https://gudhi.inria.fr/python/latest/representations_tflow_itf_ref.html)
+ - Tensorflow model for the representations module
-- [Rips complex](https://gudhi.inria.fr/python/latest/rips_complex_user.html)
- - Construction now rejects positional arguments, you need to specify `points=X`.
+- [Hera version of Wasserstein distance](https://gudhi.inria.fr/python/latest/wasserstein_distance_user.html#hera)
+ - now provides matching in its interface
-- Installation
- - c++17 is the new minimal standard to compile the library. This implies Visual Studio minimal version is now 2017.
+- [Module](link)
+ - **...**
- Miscellaneous
- - The [list of bugs that were solved since GUDHI-3.6.0](https://github.com/GUDHI/gudhi-devel/issues?q=label%3A3.7.0+is%3Aclosed) is available on GitHub.
+ - The [list of bugs that were solved since GUDHI-3.7.1](https://github.com/GUDHI/gudhi-devel/issues?q=label%3A3.8.0+is%3Aclosed) is available on GitHub.
All modules are distributed under the terms of the MIT license.
However, there are still GPL dependencies for many modules. We invite you to check our [license dedicated web page](https://gudhi.inria.fr/licensing/) for further details.
@@ -32,3 +29,7 @@ Feel free to [contact us](https://gudhi.inria.fr/contact/) in case you have any
For further information about downloading and installing the library ([C++](https://gudhi.inria.fr/doc/latest/installation.html) or [Python](https://gudhi.inria.fr/python/latest/installation.html)), please visit the [GUDHI web site](https://gudhi.inria.fr/).
+## Contributors
+
+- **...**
+- **...**
diff --git a/CMakeGUDHIVersion.txt b/CMakeGUDHIVersion.txt
index 1dab47ab..69c8e9fc 100644
--- a/CMakeGUDHIVersion.txt
+++ b/CMakeGUDHIVersion.txt
@@ -1,6 +1,6 @@
# Must be conform to pep440 - https://www.python.org/dev/peps/pep-0440/#pre-releases
set (GUDHI_MAJOR_VERSION 3)
-set (GUDHI_MINOR_VERSION 7)
+set (GUDHI_MINOR_VERSION 8)
# GUDHI_PATCH_VERSION can be 'ZaN' for Alpha release, 'ZbN' for Beta release, 'ZrcN' for release candidate or 'Z' for a final release.
set (GUDHI_PATCH_VERSION 0a0)
set(GUDHI_VERSION ${GUDHI_MAJOR_VERSION}.${GUDHI_MINOR_VERSION}.${GUDHI_PATCH_VERSION})
diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt
index cda7f8c3..74d1c4c6 100644
--- a/src/python/CMakeLists.txt
+++ b/src/python/CMakeLists.txt
@@ -298,8 +298,8 @@ if(PYTHONINTERP_FOUND)
# Other .py files
file(COPY "gudhi/persistence_graphical_tools.py" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi")
file(COPY "gudhi/representations" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi/")
- file(COPY "gudhi/wasserstein" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi")
- file(COPY "gudhi/tensorflow" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi")
+ file(COPY "gudhi/tensorflow" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi/")
+ file(COPY "gudhi/wasserstein" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi")
file(COPY "gudhi/point_cloud" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi")
file(COPY "gudhi/clustering" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi" FILES_MATCHING PATTERN "*.py")
file(COPY "gudhi/weighted_rips_complex.py" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi")
@@ -579,6 +579,11 @@ if(PYTHONINTERP_FOUND)
add_gudhi_py_test(test_diff)
endif()
+ # Perslay
+ if(TENSORFLOW_FOUND AND SKLEARN_FOUND)
+ add_gudhi_py_test(test_perslay)
+ endif()
+
# Betti curves
if(SKLEARN_FOUND AND SCIPY_FOUND)
add_gudhi_py_test(test_betti_curve_representations)
diff --git a/src/python/doc/installation.rst b/src/python/doc/installation.rst
index 5491542f..7200b2f0 100644
--- a/src/python/doc/installation.rst
+++ b/src/python/doc/installation.rst
@@ -396,11 +396,13 @@ mathematics, science, and engineering.
TensorFlow
----------
+:class:`~gudhi.tensorflow.perslay.Perslay` from the :doc:`persistence representations </representations>` module
+requires `TensorFlow <https://www.tensorflow.org/>`_.
The :doc:`cubical complex </cubical_complex_tflow_itf_ref>`, :doc:`simplex tree </ls_simplex_tree_tflow_itf_ref>`
-and :doc:`Rips complex </rips_complex_tflow_itf_ref>` modules require `TensorFlow <https://www.tensorflow.org>`_
+and :doc:`Rips complex </rips_complex_tflow_itf_ref>` modules require `TensorFlow`_
for incorporating them in neural nets.
-`TensorFlow <https://www.tensorflow.org>`_ is also used in some automatic differentiation tests.
+`TensorFlow`_ is also used in some automatic differentiation tests.
Bug reports and contributions
*****************************
diff --git a/src/python/doc/representations.rst b/src/python/doc/representations.rst
index b0477197..5686974a 100644
--- a/src/python/doc/representations.rst
+++ b/src/python/doc/representations.rst
@@ -8,10 +8,16 @@ Representations manual
.. include:: representations_sum.inc
-This module, originally available at https://github.com/MathieuCarriere/sklearn-tda and named sklearn_tda, aims at bridging the gap between persistence diagrams and machine learning, by providing implementations of most of the vector representations for persistence diagrams in the literature, in a scikit-learn format. More specifically, it provides tools, using the scikit-learn standard interface, to compute distances and kernels on persistence diagrams, and to convert these diagrams into vectors in Euclidean space.
+This module aims at bridging the gap between persistence diagrams and machine learning, by providing implementations of most of the vector representations for persistence diagrams in the literature, in a scikit-learn format. More specifically, it provides tools, using the scikit-learn standard interface, to compute distances and kernels on persistence diagrams, and to convert these diagrams into vectors in Euclidean space. Moreover, this module also contains `PersLay <http://proceedings.mlr.press/v108/carriere20a.html>`_, which is a general neural network layer for performing deep learning with persistence diagrams, implemented in TensorFlow.
A diagram is represented as a numpy array of shape (n,2), as can be obtained from :func:`~gudhi.SimplexTree.persistence_intervals_in_dimension` for instance. Points at infinity are represented as a numpy array of shape (n,1), storing only the birth time. The classes in this module can handle several persistence diagrams at once. In that case, the diagrams are provided as a list of numpy arrays. Note that it is not necessary for the diagrams to have the same number of points, i.e., for the corresponding arrays to have the same number of rows: all classes can handle arrays with different shapes.
+This `notebook <https://github.com/GUDHI/TDA-tutorial/blob/master/Tuto-GUDHI-representations.ipynb>`__ explains how to
+efficiently combine machine learning and topological data analysis with the
+:doc:`representations module<representations>` in a scikit-learn fashion. This `notebook <https://github.com/MathieuCarriere/tda-tutorials/blob/perslay/Tuto-GUDHI-perslay-expe.ipynb>`__
+and `this one <https://github.com/MathieuCarriere/tda-tutorials/blob/perslay/Tuto-GUDHI-perslay-visu.ipynb>`__ explain how to use PersLay.
+
+
Examples
--------
@@ -30,8 +36,6 @@ This example computes the first two Landscapes associated to a persistence diagr
l=Landscape(num_landscapes=2,resolution=10).fit_transform(diags)
print(l)
-The output is:
-
.. testoutput::
[[1.02851895 2.05703791 2.57129739 1.54277843 0.89995409 1.92847304
@@ -45,13 +49,71 @@ Various kernels
This small example is also provided
:download:`diagram_vectorizations_distances_kernels.py <../example/diagram_vectorizations_distances_kernels.py>`
-Machine Learning and Topological Data Analysis
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+PersLay
+^^^^^^^
-This `notebook <https://github.com/GUDHI/TDA-tutorial/blob/master/Tuto-GUDHI-representations.ipynb>`_ explains how to
-efficiently combine machine learning and topological data analysis with the
-:doc:`representations module<representations>`.
+.. testsetup:: perslay
+
+ import numpy
+ numpy.set_printoptions(precision=5)
+
+.. testcode:: perslay
+
+ import numpy as np
+ import tensorflow as tf
+ from sklearn.preprocessing import MinMaxScaler
+ import gudhi.representations as gdr
+ import gudhi.tensorflow.perslay as prsl
+
+ diagrams = [np.array([[0.,4.],[1.,2.],[3.,8.],[6.,8.]])]
+ diagrams = gdr.DiagramScaler(use=True, scalers=[([0,1], MinMaxScaler())]).fit_transform(diagrams)
+ diagrams = tf.RaggedTensor.from_tensor(tf.constant(diagrams, dtype=tf.float32))
+
+ rho = tf.identity
+ phi = prsl.GaussianPerslayPhi((5, 5), ((-.5, 1.5), (-.5, 1.5)), .1)
+ weight = prsl.PowerPerslayWeight(1.,0.)
+ perm_op = tf.math.reduce_sum
+
+ perslay = prsl.Perslay(phi=phi, weight=weight, perm_op=perm_op, rho=rho)
+ vectors = perslay(diagrams)
+ print(vectors)
+
+.. testcleanup:: perslay
+ numpy.set_printoptions(precision=8)
+
+.. testoutput:: perslay
+
+ tf.Tensor(
+ [[[[1.72661e-16]
+ [4.17060e-09]
+ [1.13369e-08]
+ [8.57388e-12]
+ [2.12439e-14]]
+
+ [[4.17151e-09]
+ [1.00741e-01]
+ [2.73843e-01]
+ [3.07242e-02]
+ [7.61575e-05]]
+
+ [[8.03829e-06]
+ [1.58027e+00]
+ [8.29970e-01]
+ [1.23954e+01]
+ [3.07241e-02]]
+
+ [[8.02694e-06]
+ [1.30657e+00]
+ [9.09230e+00]
+ [6.16648e-02]
+ [1.39492e-06]]
+
+ [[9.03313e-13]
+ [1.49548e-07]
+ [1.51460e-04]
+ [1.02051e-06]
+ [7.80935e-16]]]], shape=(1, 5, 5, 1), dtype=float32)
Preprocessing
-------------
@@ -80,3 +142,44 @@ Metrics
:members:
:special-members:
:show-inheritance:
+
+PersLay
+-------
+.. autoclass:: gudhi.tensorflow.perslay.Perslay
+ :members:
+ :special-members:
+ :show-inheritance:
+
+Weight functions
+^^^^^^^^^^^^^^^^
+.. autoclass:: gudhi.tensorflow.perslay.GaussianMixturePerslayWeight
+ :members:
+ :special-members:
+ :show-inheritance:
+
+.. autoclass:: gudhi.tensorflow.perslay.GridPerslayWeight
+ :members:
+ :special-members:
+ :show-inheritance:
+
+.. autoclass:: gudhi.tensorflow.perslay.PowerPerslayWeight
+ :members:
+ :special-members:
+ :show-inheritance:
+
+Phi functions
+^^^^^^^^^^^^^
+.. autoclass:: gudhi.tensorflow.perslay.FlatPerslayPhi
+ :members:
+ :special-members:
+ :show-inheritance:
+
+.. autoclass:: gudhi.tensorflow.perslay.GaussianPerslayPhi
+ :members:
+ :special-members:
+ :show-inheritance:
+
+.. autoclass:: gudhi.tensorflow.perslay.TentPerslayPhi
+ :members:
+ :special-members:
+ :show-inheritance:
diff --git a/src/python/doc/representations_sum.inc b/src/python/doc/representations_sum.inc
index 9515f044..f8c31ecf 100644
--- a/src/python/doc/representations_sum.inc
+++ b/src/python/doc/representations_sum.inc
@@ -1,14 +1,16 @@
.. table::
:widths: 30 40 30
- +------------------------------------------------------------------+----------------------------------------------------------------+-------------------------------------------------------------------------+
- | .. figure:: | Vectorizations, distances and kernels that work on persistence | :Author: Mathieu Carrière, Martin Royer, Gard Spreemann, Wojciech Reise |
- | img/sklearn-tda.png | diagrams, compatible with scikit-learn. | |
- | | | :Since: GUDHI 3.1.0 |
- | | | |
- | | | :License: MIT |
- | | | |
- | | | :Requires: `Scikit-learn <installation.html#scikit-learn>`_ |
- +------------------------------------------------------------------+----------------------------------------------------------------+-------------------------------------------------------------------------+
- | * :doc:`representations` |
- +------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+
+ +------------------------------------------------------------------+----------------------------------------------------------------+------------------------------------------------------------------------------------------------------------+
+ | .. figure:: | Vectorizations, distances and kernels that work on persistence | :Author: Mathieu Carrière, Martin Royer, Gard Spreemann, Wojciech Reise |
+ | img/sklearn-tda.png | diagrams, compatible with scikit-learn and tensorflow. | |
+ | | | :Since: GUDHI 3.1.0 |
+ | | | |
+ | | | :License: MIT |
+ | | | |
+ | | | :Requires: `Scikit-learn <installation.html#scikit-learn>`_, `TensorFlow <installation.html#tensorflow>`_ |
+ | | | |
+ +------------------------------------------------------------------+----------------------------------------------------------------+------------------------------------------------------------------------------------------------------------+
+ | * :doc:`representations` |
+ +------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+
diff --git a/src/python/doc/rips_complex_user.rst b/src/python/doc/rips_complex_user.rst
index c41a7803..a4e83462 100644
--- a/src/python/doc/rips_complex_user.rst
+++ b/src/python/doc/rips_complex_user.rst
@@ -34,9 +34,6 @@ A vertex name corresponds to the index of the point in the given range (aka. the
On this example, as edges (4,5), (4,6) and (5,6) are in the complex, simplex (4,5,6) is added with the filtration value
set with :math:`max(filtration(4,5), filtration(4,6), filtration(5,6))`. And so on for simplex (0,1,2,3).
-If the :doc:`RipsComplex <rips_complex_ref>` interfaces are not detailed enough for your need, please refer to
-rips_persistence_step_by_step.cpp C++ example, where the graph construction over the Simplex_tree is more detailed.
-
A Rips complex can easily become huge, even if we limit the length of the edges
and the dimension of the simplices. One easy trick, before building a Rips
complex on a point cloud, is to call :func:`~gudhi.sparsify_point_set` which removes points
@@ -55,6 +52,13 @@ construction of a :class:`~gudhi.RipsComplex` object asks it to build a sparse R
parameter :math:`\varepsilon=0.3`, while the default `sparse=None` builds the
regular Rips complex.
+Another option which is especially useful if you want to compute persistent homology in "high" dimension (2 or more,
+sometimes even 1), is to build the Rips complex only up to dimension 1 (a graph), then use
+:func:`~gudhi.SimplexTree.collapse_edges` to reduce the size of this graph, and finally call
+:func:`~gudhi.SimplexTree.expansion` to get a simplicial complex of a suitable dimension to compute its homology. This
+trick gives the same persistence diagram as one would get with a plain use of `RipsComplex`, with a complex that is
+often significantly smaller and thus faster to process.
+
Point cloud
-----------
@@ -117,54 +121,44 @@ Notice that if we use
asking for a very sparse version (theory only gives some guarantee on the meaning of the output if `sparse<1`),
2 to 5 edges disappear, depending on the random vertex used to start the sparsification.
-Example from OFF file
-^^^^^^^^^^^^^^^^^^^^^
+Example step by step
+^^^^^^^^^^^^^^^^^^^^
-This example builds the :doc:`RipsComplex <rips_complex_ref>` from the given
-points in an OFF file, and max_edge_length value.
-Then it creates a :doc:`SimplexTree <simplex_tree_ref>` with it.
+While :doc:`RipsComplex <rips_complex_ref>` is convenient, for instance to build a simplicial complex in one line
+
+.. testcode::
-Finally, it is asked to display information about the Rips complex.
+ import gudhi
+ points = [[1, 1], [7, 0], [4, 6], [9, 6], [0, 14], [2, 19], [9, 17]]
+ cplx = gudhi.RipsComplex(points=points, max_edge_length=12.0).create_simplex_tree(max_dimension=2)
+you can achieve the same result without this class for more flexibility
.. testcode::
- import gudhi
- off_file = gudhi.__root_source_dir__ + '/data/points/alphacomplexdoc.off'
- point_cloud = gudhi.read_points_from_off_file(off_file = off_file)
- rips_complex = gudhi.RipsComplex(points=point_cloud, max_edge_length=12.0)
- simplex_tree = rips_complex.create_simplex_tree(max_dimension=1)
- result_str = 'Rips complex is of dimension ' + repr(simplex_tree.dimension()) + ' - ' + \
- repr(simplex_tree.num_simplices()) + ' simplices - ' + \
- repr(simplex_tree.num_vertices()) + ' vertices.'
- print(result_str)
- fmt = '%s -> %.2f'
- for filtered_value in simplex_tree.get_filtration():
- print(fmt % tuple(filtered_value))
+ import gudhi
+ from scipy.spatial.distance import cdist
+ points = [[1, 1], [7, 0], [4, 6], [9, 6], [0, 14], [2, 19], [9, 17]]
+ distance_matrix = cdist(points, points)
+ cplx = gudhi.SimplexTree.create_from_array(distance_matrix, max_filtration=12.0)
+ cplx.expansion(2)
-the program output is:
+or
-.. testoutput::
+.. testcode::
+
+ import gudhi
+ from scipy.spatial import cKDTree
+ points = [[1, 1], [7, 0], [4, 6], [9, 6], [0, 14], [2, 19], [9, 17]]
+ tree = cKDTree(points)
+ edges = tree.sparse_distance_matrix(tree, max_distance=12.0, output_type="coo_matrix")
+ cplx = gudhi.SimplexTree()
+ cplx.insert_edges_from_coo_matrix(edges)
+ cplx.expansion(2)
- Rips complex is of dimension 1 - 18 simplices - 7 vertices.
- [0] -> 0.00
- [1] -> 0.00
- [2] -> 0.00
- [3] -> 0.00
- [4] -> 0.00
- [5] -> 0.00
- [6] -> 0.00
- [2, 3] -> 5.00
- [4, 5] -> 5.39
- [0, 2] -> 5.83
- [0, 1] -> 6.08
- [1, 3] -> 6.32
- [1, 2] -> 6.71
- [5, 6] -> 7.28
- [2, 4] -> 8.94
- [0, 3] -> 9.43
- [4, 6] -> 9.49
- [3, 6] -> 11.00
+
+This way, you can easily add a call to :func:`~gudhi.SimplexTree.collapse_edges` before the expansion,
+use a different metric to compute the matrix, or other variations.
Distance matrix
---------------
@@ -223,54 +217,7 @@ until dimension 1 - one skeleton graph in other words), the output is:
[4, 6] -> 9.49
[3, 6] -> 11.00
-Example from csv file
-^^^^^^^^^^^^^^^^^^^^^
-
-This example builds the :doc:`RipsComplex <rips_complex_ref>` from the given
-distance matrix in a csv file, and max_edge_length value.
-Then it creates a :doc:`SimplexTree <simplex_tree_ref>` with it.
-
-Finally, it is asked to display information about the Rips complex.
-
-
-.. testcode::
-
- import gudhi
- distance_matrix = gudhi.read_lower_triangular_matrix_from_csv_file(csv_file=gudhi.__root_source_dir__ + \
- '/data/distance_matrix/full_square_distance_matrix.csv')
- rips_complex = gudhi.RipsComplex(distance_matrix=distance_matrix, max_edge_length=12.0)
- simplex_tree = rips_complex.create_simplex_tree(max_dimension=1)
- result_str = 'Rips complex is of dimension ' + repr(simplex_tree.dimension()) + ' - ' + \
- repr(simplex_tree.num_simplices()) + ' simplices - ' + \
- repr(simplex_tree.num_vertices()) + ' vertices.'
- print(result_str)
- fmt = '%s -> %.2f'
- for filtered_value in simplex_tree.get_filtration():
- print(fmt % tuple(filtered_value))
-
-the program output is:
-
-.. testoutput::
-
- Rips complex is of dimension 1 - 18 simplices - 7 vertices.
- [0] -> 0.00
- [1] -> 0.00
- [2] -> 0.00
- [3] -> 0.00
- [4] -> 0.00
- [5] -> 0.00
- [6] -> 0.00
- [2, 3] -> 5.00
- [4, 5] -> 5.39
- [0, 2] -> 5.83
- [0, 1] -> 6.08
- [1, 3] -> 6.32
- [1, 2] -> 6.71
- [5, 6] -> 7.28
- [2, 4] -> 8.94
- [0, 3] -> 9.43
- [4, 6] -> 9.49
- [3, 6] -> 11.00
+In case this lower triangular matrix is stored in a CSV file, like `data/distance_matrix/full_square_distance_matrix.csv` in the Gudhi distribution, you can read it with :func:`~gudhi.read_lower_triangular_matrix_from_csv_file`.
Correlation matrix
------------------
diff --git a/src/python/gudhi/bottleneck.cc b/src/python/gudhi/bottleneck.cc
index 8a3d669a..040e6d37 100644
--- a/src/python/gudhi/bottleneck.cc
+++ b/src/python/gudhi/bottleneck.cc
@@ -9,18 +9,20 @@
*/
#include <gudhi/Bottleneck.h>
-
+#include <optional>
#include <pybind11_diagram_utils.h>
+#include <pybind11/stl.h>
+
+// Indices are added internally in bottleneck_distance, they are not needed in the input.
+static auto make_point(double x, double y, py::ssize_t) { return std::pair(x, y); };
// For compatibility with older versions, we want to support e=None.
-// In C++17, the recommended way is std::optional<double>.
-double bottleneck(Dgm d1, Dgm d2, py::object epsilon)
+double bottleneck(Dgm d1, Dgm d2, std::optional<double> epsilon)
{
- double e = (std::numeric_limits<double>::min)();
- if (!epsilon.is_none()) e = epsilon.cast<double>();
- // I *think* the call to request() has to be before releasing the GIL.
- auto diag1 = numpy_to_range_of_pairs(d1);
- auto diag2 = numpy_to_range_of_pairs(d2);
+ double e = epsilon.value_or((std::numeric_limits<double>::min)());
+ // I *think* the call to request() in numpy_to_range_of_pairs has to be before releasing the GIL.
+ auto diag1 = numpy_to_range_of_pairs(d1, make_point);
+ auto diag2 = numpy_to_range_of_pairs(d2, make_point);
py::gil_scoped_release release;
diff --git a/src/python/gudhi/hera/bottleneck.cc b/src/python/gudhi/hera/bottleneck.cc
index ec461f7c..9826252c 100644
--- a/src/python/gudhi/hera/bottleneck.cc
+++ b/src/python/gudhi/hera/bottleneck.cc
@@ -16,13 +16,16 @@
using py::ssize_t;
#endif
-#include <hera/bottleneck.h> // Hera
+#include <hera/bottleneck.h>
+
+// Indices are added internally in bottleneck_distance, they are not needed in the input.
+static auto make_point(double x, double y, py::ssize_t) { return std::pair(x, y); };
double bottleneck_distance(Dgm d1, Dgm d2, double delta)
{
- // I *think* the call to request() has to be before releasing the GIL.
- auto diag1 = numpy_to_range_of_pairs(d1);
- auto diag2 = numpy_to_range_of_pairs(d2);
+ // I *think* the call to request() in numpy_to_range_of_pairs has to be before releasing the GIL.
+ auto diag1 = numpy_to_range_of_pairs(d1, make_point);
+ auto diag2 = numpy_to_range_of_pairs(d2, make_point);
py::gil_scoped_release release;
diff --git a/src/python/gudhi/hera/wasserstein.cc b/src/python/gudhi/hera/wasserstein.cc
index 3516352e..41e84f7b 100644
--- a/src/python/gudhi/hera/wasserstein.cc
+++ b/src/python/gudhi/hera/wasserstein.cc
@@ -16,27 +16,118 @@
using py::ssize_t;
#endif
-#include <hera/wasserstein.h> // Hera
+#include <hera/wasserstein.h>
+#include <gudhi/Debug_utils.h>
-double wasserstein_distance(
+// Unlike bottleneck, for wasserstein, we need to add the index ourselves (if we want the matching)
+static auto make_hera_point(double x, double y, py::ssize_t i) { return hera::DiagramPoint<double>(x, y, i); };
+
+py::object wasserstein_distance(
Dgm d1, Dgm d2,
double wasserstein_power, double internal_p,
- double delta)
+ double delta, bool return_matching)
{
- // I *think* the call to request() has to be before releasing the GIL.
- auto diag1 = numpy_to_range_of_pairs(d1);
- auto diag2 = numpy_to_range_of_pairs(d2);
-
- py::gil_scoped_release release;
-
- hera::AuctionParams<double> params;
- params.wasserstein_power = wasserstein_power;
- // hera encodes infinity as -1...
- if(std::isinf(internal_p)) internal_p = hera::get_infinity<double>();
- params.internal_p = internal_p;
- params.delta = delta;
- // The extra parameters are purposely not exposed for now.
- return hera::wasserstein_dist(diag1, diag2, params);
+ // I *think* the call to request() in numpy_to_range_of_pairs has to be before releasing the GIL.
+ auto diag1 = numpy_to_range_of_pairs(d1, make_hera_point);
+ auto diag2 = numpy_to_range_of_pairs(d2, make_hera_point);
+ int n1 = boost::size(diag1);
+ int n2 = boost::size(diag2);
+ hera::AuctionResult<double> res;
+ double dist;
+
+ { // No Python allowed in this section
+ py::gil_scoped_release release;
+
+ hera::AuctionParams<double> params;
+ params.wasserstein_power = wasserstein_power;
+ // hera encodes infinity as -1...
+ if(std::isinf(internal_p)) internal_p = hera::get_infinity<double>();
+ params.internal_p = internal_p;
+ params.delta = delta;
+ if(return_matching) {
+ params.return_matching = true;
+ params.match_inf_points = true;
+ }
+ // The extra parameters are purposely not exposed for now.
+ res = hera::wasserstein_cost_detailed(diag1, diag2, params);
+ dist = std::pow(res.cost, 1./params.wasserstein_power);
+ }
+
+ if(!return_matching)
+ return py::cast(dist);
+
+ if(dist == std::numeric_limits<double>::infinity())
+ return py::make_tuple(dist, py::none());
+
+ // bug in Hera, matching_a_to_b_ is empty if one diagram is empty or both diagrams contain the same points
+ if(res.matching_a_to_b_.size() == 0) {
+ if(n1 == 0) { // diag1 is empty
+ py::array_t<int> matching({{ n2, 2 }}, nullptr);
+ auto m = matching.mutable_unchecked();
+ for(int j=0; j<n2; ++j){
+ m(j, 0) = -1;
+ m(j, 1) = j;
+ }
+ return py::make_tuple(dist, matching);
+ }
+ if(n2 == 0) { // diag2 is empty
+ py::array_t<int> matching({{ n1, 2 }}, nullptr);
+ auto m = matching.mutable_unchecked();
+ for(int i=0; i<n1; ++i){
+ m(i, 0) = i;
+ m(i, 1) = -1;
+ }
+ return py::make_tuple(dist, matching);
+ }
+ // The only remaining case should be that the 2 diagrams are identical, but possibly shuffled
+ GUDHI_CHECK(n1==n2, "unexpected bug in Hera?");
+ std::vector v1(boost::begin(diag1), boost::end(diag1));
+ std::vector v2(boost::begin(diag2), boost::end(diag2));
+ std::sort(v1.begin(), v1.end());
+ std::sort(v2.begin(), v2.end());
+ py::array_t<int> matching({{ n1, 2 }}, nullptr);
+ auto m = matching.mutable_unchecked();
+ for(int i=0; i<n1; ++i){
+ GUDHI_CHECK(v1[i][0]==v2[i][0] && v1[i][1]==v2[i][1], "unexpected bug in Hera?");
+ m(i, 0) = v1[i].get_id();
+ m(i, 1) = v2[i].get_id();
+ }
+ return py::make_tuple(dist, matching);
+
+ }
+
+ // bug in Hera, diagonal points are ignored and don't appear in matching_a_to_b_
+ for(auto p : diag1)
+ if(p[0] == p[1]) { auto id = p.get_id(); res.matching_a_to_b_[id] = -id-1; }
+ for(auto p : diag2)
+ if(p[0] == p[1]) { auto id = p.get_id(); res.matching_a_to_b_[-id-1] = id; }
+
+ py::array_t<int> matching({{ n1 + n2, 2 }}, nullptr);
+ auto m = matching.mutable_unchecked();
+ int cur = 0;
+ for(auto x : res.matching_a_to_b_){
+ if(x.first < 0) {
+ if(x.second < 0) {
+ } else {
+ m(cur, 0) = -1;
+ m(cur, 1) = x.second;
+ ++cur;
+ }
+ } else {
+ if(x.second < 0) {
+ m(cur, 0) = x.first;
+ m(cur, 1) = -1;
+ ++cur;
+ } else {
+ m(cur, 0) = x.first;
+ m(cur, 1) = x.second;
+ ++cur;
+ }
+ }
+ }
+ // n1+n2 was too much, it only happens if everything matches to the diagonal, so we return matching[:cur,:]
+ py::array_t<int> ret({{ cur, 2 }}, {{ matching.strides()[0], matching.strides()[1] }}, matching.data(), matching);
+ return py::make_tuple(dist, ret);
}
PYBIND11_MODULE(wasserstein, m) {
@@ -45,6 +136,7 @@ PYBIND11_MODULE(wasserstein, m) {
py::arg("order") = 1,
py::arg("internal_p") = std::numeric_limits<double>::infinity(),
py::arg("delta") = .01,
+ py::arg("matching") = false,
R"pbdoc(
Compute the Wasserstein distance between two diagrams.
Points at infinity are supported.
@@ -55,8 +147,9 @@ PYBIND11_MODULE(wasserstein, m) {
order (float): Wasserstein exponent W_q
internal_p (float): Internal Minkowski norm L^p in R^2
delta (float): Relative error 1+delta
+ matching (bool): if ``True``, computes and returns the optimal matching between X and Y, encoded as a (n x 2) np.array [...[i,j]...], meaning the i-th point in X is matched to the j-th point in Y, with the convention that (-1) represents the diagonal. If the distance between two diagrams is +inf (which happens if the cardinalities of essential parts differ) and the matching is requested, it will be set to ``None`` (any matching is optimal).
Returns:
- float: Approximate Wasserstein distance W_q(X,Y)
+ float|Tuple[float,numpy.array|None]: Approximate Wasserstein distance W_q(X,Y), and optionally the corresponding matching
)pbdoc");
}
diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index d52185ef..ce74aee5 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -106,7 +106,7 @@ class PersistenceImage(BaseEstimator, TransformerMixin):
"""
return self.fit_transform([diag])[0,:]
-def _automatic_sample_range(sample_range, X, y):
+def _automatic_sample_range(sample_range, X):
"""
Compute and returns sample range from the persistence diagrams if one of the sample_range values is numpy.nan.
@@ -119,7 +119,7 @@ def _automatic_sample_range(sample_range, X, y):
nan_in_range = np.isnan(sample_range)
if nan_in_range.any():
try:
- pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y)
+ pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X)
[mx,my] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]]
[Mx,My] = [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]]
return np.where(nan_in_range, np.array([mx, My]), sample_range)
@@ -129,7 +129,7 @@ def _automatic_sample_range(sample_range, X, y):
return sample_range
-def _trim_on_edges(x, are_endpoints_nan):
+def _trim_endpoints(x, are_endpoints_nan):
if are_endpoints_nan[0]:
x = x[1:]
if are_endpoints_nan[1]:
@@ -137,11 +137,26 @@ def _trim_on_edges(x, are_endpoints_nan):
return x
+def _grid_from_sample_range(self, X):
+ sample_range = np.array(self.sample_range)
+ self.nan_in_range = np.isnan(sample_range)
+ self.new_resolution = self.resolution
+ if not self.keep_endpoints:
+ self.new_resolution += self.nan_in_range.sum()
+ self.sample_range_fixed = _automatic_sample_range(sample_range, X)
+ self.grid_ = np.linspace(self.sample_range_fixed[0], self.sample_range_fixed[1], self.new_resolution)
+ if not self.keep_endpoints:
+ self.grid_ = _trim_endpoints(self.grid_, self.nan_in_range)
+
+
class Landscape(BaseEstimator, TransformerMixin):
"""
This is a class for computing persistence landscapes from a list of persistence diagrams. A persistence landscape is a collection of 1D piecewise-linear functions computed from the rank function associated to the persistence diagram. These piecewise-linear functions are then sampled evenly on a given range and the corresponding vectors of samples are concatenated and returned. See http://jmlr.org/papers/v16/bubenik15a.html for more details.
+
+ Attributes:
+ grid_ (1d array): The grid on which the landscapes are computed.
"""
- def __init__(self, num_landscapes=5, resolution=100, sample_range=[np.nan, np.nan]):
+ def __init__(self, num_landscapes=5, resolution=100, sample_range=[np.nan, np.nan], *, keep_endpoints=False):
"""
Constructor for the Landscape class.
@@ -149,10 +164,10 @@ class Landscape(BaseEstimator, TransformerMixin):
num_landscapes (int): number of piecewise-linear functions to output (default 5).
resolution (int): number of sample for all piecewise-linear functions (default 100).
sample_range ([double, double]): minimum and maximum of all piecewise-linear function domains, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
+ keep_endpoints (bool): when computing `sample_range`, use the exact extremities (where the value is always 0). This is mostly useful for plotting, the default is to use a slightly smaller range.
"""
self.num_landscapes, self.resolution, self.sample_range = num_landscapes, resolution, sample_range
- self.nan_in_range = np.isnan(np.array(self.sample_range))
- self.new_resolution = self.resolution + self.nan_in_range.sum()
+ self.keep_endpoints = keep_endpoints
def fit(self, X, y=None):
"""
@@ -162,9 +177,7 @@ class Landscape(BaseEstimator, TransformerMixin):
X (list of n x 2 numpy arrays): input persistence diagrams.
y (n x 1 array): persistence diagram labels (unused).
"""
- self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
- self.im_range = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution)
- self.im_range = _trim_on_edges(self.im_range, self.nan_in_range)
+ _grid_from_sample_range(self, X)
return self
def transform(self, X):
@@ -179,7 +192,7 @@ class Landscape(BaseEstimator, TransformerMixin):
"""
Xfit = []
- x_values = self.im_range
+ x_values = self.grid_
for diag in X:
midpoints, heights = (diag[:, 0] + diag[:, 1]) / 2., (diag[:, 1] - diag[:, 0]) / 2.
tent_functions = np.maximum(heights[None, :] - np.abs(x_values[:, None] - midpoints[None, :]), 0)
@@ -213,8 +226,11 @@ class Landscape(BaseEstimator, TransformerMixin):
class Silhouette(BaseEstimator, TransformerMixin):
"""
This is a class for computing persistence silhouettes from a list of persistence diagrams. A persistence silhouette is computed by taking a weighted average of the collection of 1D piecewise-linear functions given by the persistence landscapes, and then by evenly sampling this average on a given range. Finally, the corresponding vector of samples is returned. See https://arxiv.org/abs/1312.0308 for more details.
+
+ Attributes:
+ grid_ (1d array): The grid on which the silhouette is computed.
"""
- def __init__(self, weight=lambda x: 1, resolution=100, sample_range=[np.nan, np.nan]):
+ def __init__(self, weight=lambda x: 1, resolution=100, sample_range=[np.nan, np.nan], *, keep_endpoints=False):
"""
Constructor for the Silhouette class.
@@ -222,10 +238,10 @@ class Silhouette(BaseEstimator, TransformerMixin):
weight (function): weight function for the persistence diagram points (default constant function, ie lambda x: 1). This function must be defined on 2D points, ie on lists or numpy arrays of the form [p_x,p_y].
resolution (int): number of samples for the weighted average (default 100).
sample_range ([double, double]): minimum and maximum for the weighted average domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
+ keep_endpoints (bool): when computing `sample_range`, use the exact extremities (where the value is always 0). This is mostly useful for plotting, the default is to use a slightly smaller range.
"""
self.weight, self.resolution, self.sample_range = weight, resolution, sample_range
- self.nan_in_range = np.isnan(np.array(self.sample_range))
- self.new_resolution = self.resolution + self.nan_in_range.sum()
+ self.keep_endpoints = keep_endpoints
def fit(self, X, y=None):
"""
@@ -235,9 +251,7 @@ class Silhouette(BaseEstimator, TransformerMixin):
X (list of n x 2 numpy arrays): input persistence diagrams.
y (n x 1 array): persistence diagram labels (unused).
"""
- self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
- self.im_range = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution)
- self.im_range = _trim_on_edges(self.im_range, self.nan_in_range)
+ _grid_from_sample_range(self, X)
return self
def transform(self, X):
@@ -251,7 +265,7 @@ class Silhouette(BaseEstimator, TransformerMixin):
numpy array with shape (number of diagrams) x (**resolution**): output persistence silhouettes.
"""
Xfit = []
- x_values = self.im_range
+ x_values = self.grid_
for diag in X:
midpoints, heights = (diag[:, 0] + diag[:, 1]) / 2., (diag[:, 1] - diag[:, 0]) / 2.
@@ -280,36 +294,39 @@ class Silhouette(BaseEstimator, TransformerMixin):
class BettiCurve(BaseEstimator, TransformerMixin):
"""
Compute Betti curves from persistence diagrams. There are several modes of operation: with a given resolution (with or without a sample_range), with a predefined grid, and with none of the previous. With a predefined grid, the class computes the Betti numbers at those grid points. Without a predefined grid, if the resolution is set to None, it can be fit to a list of persistence diagrams and produce a grid that consists of (at least) the filtration values at which at least one of those persistence diagrams changes Betti numbers, and then compute the Betti numbers at those grid points. In the latter mode, the exact Betti curve is computed for the entire real line. Otherwise, if the resolution is given, the Betti curve is obtained by sampling evenly using either the given sample_range or based on the persistence diagrams.
- """
- def __init__(self, resolution=100, sample_range=[np.nan, np.nan], predefined_grid=None):
- """
- Constructor for the BettiCurve class.
+ Examples
+ --------
+ If pd is a persistence diagram and xs is a nonempty grid of finite values such that xs[0] >= pd.min(), then the results of:
- Parameters:
- resolution (int): number of sample for the piecewise-constant function (default 100).
- sample_range ([double, double]): minimum and maximum of the piecewise-constant function domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
- predefined_grid (1d array or None, default=None): Predefined filtration grid points at which to compute the Betti curves. Must be strictly ordered. Infinities are ok. If None (default), and resolution is given, the grid will be uniform from x_min to x_max in 'resolution' steps, otherwise a grid will be computed that captures all changes in Betti numbers in the provided data.
+ >>> bc = BettiCurve(predefined_grid=xs) # doctest: +SKIP
+ >>> result = bc(pd) # doctest: +SKIP
- Attributes:
- grid_ (1d array): The grid on which the Betti numbers are computed. If predefined_grid was specified, `grid_` will always be that grid, independently of data. If not, the grid is fitted to capture all filtration values at which the Betti numbers change.
+ and
- Examples
- --------
- If pd is a persistence diagram and xs is a nonempty grid of finite values such that xs[0] >= pd.min(), then the results of:
+ >>> from scipy.interpolate import interp1d # doctest: +SKIP
+ >>> bc = BettiCurve(resolution=None, predefined_grid=None) # doctest: +SKIP
+ >>> bettis = bc.fit_transform([pd]) # doctest: +SKIP
+ >>> interp = interp1d(bc.grid_, bettis[0, :], kind="previous", fill_value="extrapolate") # doctest: +SKIP
+ >>> result = np.array(interp(xs), dtype=int) # doctest: +SKIP
- >>> bc = BettiCurve(predefined_grid=xs) # doctest: +SKIP
- >>> result = bc(pd) # doctest: +SKIP
+ are the same.
- and
+ Attributes
+ ----------
+ grid_ : 1d array
+ The grid on which the Betti numbers are computed. If predefined_grid was specified, `grid_` will always be that grid, independently of data. If not and resolution is None, the grid is fitted to capture all filtration values at which the Betti numbers change.
+ """
- >>> from scipy.interpolate import interp1d # doctest: +SKIP
- >>> bc = BettiCurve(resolution=None, predefined_grid=None) # doctest: +SKIP
- >>> bettis = bc.fit_transform([pd]) # doctest: +SKIP
- >>> interp = interp1d(bc.grid_, bettis[0, :], kind="previous", fill_value="extrapolate") # doctest: +SKIP
- >>> result = np.array(interp(xs), dtype=int) # doctest: +SKIP
+ def __init__(self, resolution=100, sample_range=[np.nan, np.nan], predefined_grid=None, *, keep_endpoints=False):
+ """
+ Constructor for the BettiCurve class.
- are the same.
+ Parameters:
+ resolution (int): number of samples for the piecewise-constant function (default 100), or None for the exact curve.
+ sample_range ([double, double]): minimum and maximum of the piecewise-constant function domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
+ predefined_grid (1d array or None, default=None): Predefined filtration grid points at which to compute the Betti curves. Must be strictly ordered. Infinities are ok. If None (default), and resolution is given, the grid will be uniform from x_min to x_max in 'resolution' steps, otherwise a grid will be computed that captures all changes in Betti numbers in the provided data.
+ keep_endpoints (bool): when computing `sample_range` (fixed `resolution`, no `predefined_grid`), use the exact extremities. This is mostly useful for plotting, the default is to use a slightly smaller range.
"""
if (predefined_grid is not None) and (not isinstance(predefined_grid, np.ndarray)):
@@ -318,6 +335,7 @@ class BettiCurve(BaseEstimator, TransformerMixin):
self.predefined_grid = predefined_grid
self.resolution = resolution
self.sample_range = sample_range
+ self.keep_endpoints = keep_endpoints
def is_fitted(self):
return hasattr(self, "grid_")
@@ -336,8 +354,7 @@ class BettiCurve(BaseEstimator, TransformerMixin):
events = np.unique(np.concatenate([pd.flatten() for pd in X] + [[-np.inf]], axis=0))
self.grid_ = np.array(events)
else:
- self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
- self.grid_ = np.linspace(self.sample_range[0], self.sample_range[1], self.resolution)
+ _grid_from_sample_range(self, X)
else:
self.grid_ = self.predefined_grid # Get the predefined grid from user
@@ -436,8 +453,11 @@ class BettiCurve(BaseEstimator, TransformerMixin):
class Entropy(BaseEstimator, TransformerMixin):
"""
This is a class for computing persistence entropy. Persistence entropy is a statistic for persistence diagrams inspired from Shannon entropy. This statistic can also be used to compute a feature vector, called the entropy summary function. See https://arxiv.org/pdf/1803.08304.pdf for more details. Note that a previous implementation was contributed by Manuel Soriano-Trigueros.
+
+ Attributes:
+ grid_ (1d array): In vector mode, the grid on which the entropy summary function is computed.
"""
- def __init__(self, mode="scalar", normalized=True, resolution=100, sample_range=[np.nan, np.nan]):
+ def __init__(self, mode="scalar", normalized=True, resolution=100, sample_range=[np.nan, np.nan], *, keep_endpoints=False):
"""
Constructor for the Entropy class.
@@ -446,8 +466,10 @@ class Entropy(BaseEstimator, TransformerMixin):
normalized (bool): whether to normalize the entropy summary function (default True). Used only if **mode** = "vector".
resolution (int): number of sample for the entropy summary function (default 100). Used only if **mode** = "vector".
sample_range ([double, double]): minimum and maximum of the entropy summary function domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method. Used only if **mode** = "vector".
+ keep_endpoints (bool): when computing `sample_range`, use the exact extremities. This is mostly useful for plotting, the default is to use a slightly smaller range.
"""
self.mode, self.normalized, self.resolution, self.sample_range = mode, normalized, resolution, sample_range
+ self.keep_endpoints = keep_endpoints
def fit(self, X, y=None):
"""
@@ -457,7 +479,9 @@ class Entropy(BaseEstimator, TransformerMixin):
X (list of n x 2 numpy arrays): input persistence diagrams.
y (n x 1 array): persistence diagram labels (unused).
"""
- self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
+ if self.mode == "vector":
+ _grid_from_sample_range(self, X)
+ self.step_ = self.grid_[1] - self.grid_[0]
return self
def transform(self, X):
@@ -471,8 +495,6 @@ class Entropy(BaseEstimator, TransformerMixin):
numpy array with shape (number of diagrams) x (1 if **mode** = "scalar" else **resolution**): output entropy.
"""
num_diag, Xfit = len(X), []
- x_values = np.linspace(self.sample_range[0], self.sample_range[1], self.resolution)
- step_x = x_values[1] - x_values[0]
new_X = BirthPersistenceTransform().fit_transform(X)
for i in range(num_diag):
@@ -487,8 +509,8 @@ class Entropy(BaseEstimator, TransformerMixin):
ent = np.zeros(self.resolution)
for j in range(num_pts_in_diag):
[px,py] = orig_diagram[j,:2]
- min_idx = np.clip(np.ceil((px - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
- max_idx = np.clip(np.ceil((py - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
+ min_idx = np.clip(np.ceil((px - self.sample_range_fixed[0]) / self.step_).astype(int), 0, self.resolution)
+ max_idx = np.clip(np.ceil((py - self.sample_range_fixed[0]) / self.step_).astype(int), 0, self.resolution)
ent[min_idx:max_idx]-=p[j]*np.log(p[j])
if self.normalized:
ent = ent / np.linalg.norm(ent, ord=1)
@@ -688,17 +710,17 @@ class Atol(BaseEstimator, TransformerMixin):
>>> b = np.array([[4, 2, 0], [4, 4, 0], [4, 0, 2]])
>>> c = np.array([[3, 2, -1], [1, 2, -1]])
>>> atol_vectoriser = Atol(quantiser=KMeans(n_clusters=2, random_state=202006))
- >>> atol_vectoriser.fit(X=[a, b, c]).centers # doctest: +SKIP
- >>> # array([[ 2. , 0.66666667, 3.33333333],
- >>> # [ 2.6 , 2.8 , -0.4 ]])
+ >>> atol_vectoriser.fit(X=[a, b, c]).centers
+ array([[ 2.6 , 2.8 , -0.4 ],
+ [ 2. , 0.66666667, 3.33333333]])
>>> atol_vectoriser(a)
- >>> # array([1.18168665, 0.42375966]) # doctest: +SKIP
+ array([0.42375966, 1.18168665])
>>> atol_vectoriser(c)
- >>> # array([0.02062512, 1.25157463]) # doctest: +SKIP
- >>> atol_vectoriser.transform(X=[a, b, c]) # doctest: +SKIP
- >>> # array([[1.18168665, 0.42375966],
- >>> # [0.29861028, 1.06330156],
- >>> # [0.02062512, 1.25157463]])
+ array([1.25157463, 0.02062512])
+ >>> atol_vectoriser.transform(X=[a, b, c])
+ array([[0.42375966, 1.18168665],
+ [1.06330156, 0.29861028],
+ [1.25157463, 0.02062512]])
"""
# Note the example above must be up to date with the one in tests called test_atol_doc
def __init__(self, quantiser, weighting_method="cloud", contrast="gaussian"):
@@ -749,6 +771,8 @@ class Atol(BaseEstimator, TransformerMixin):
measures_concat = np.concatenate(X)
self.quantiser.fit(X=measures_concat, sample_weight=sample_weight)
self.centers = self.quantiser.cluster_centers_
+ # Hack, but some people are unhappy if the order depends on the version of sklearn
+ self.centers = self.centers[np.lexsort(self.centers.T)]
if self.quantiser.n_clusters == 1:
dist_centers = pairwise.pairwise_distances(measures_concat)
np.fill_diagonal(dist_centers, 0)
diff --git a/src/python/gudhi/tensorflow/perslay.py b/src/python/gudhi/tensorflow/perslay.py
new file mode 100644
index 00000000..9976c5f3
--- /dev/null
+++ b/src/python/gudhi/tensorflow/perslay.py
@@ -0,0 +1,284 @@
+# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+# Author(s): Mathieu Carrière
+#
+# Copyright (C) 2021 Inria
+#
+# Modification(s):
+# - YYYY/MM Author: Description of the modification
+
+import tensorflow as tf
+import math
+
+class GridPerslayWeight(tf.keras.layers.Layer):
+ """
+ This is a class for computing a differentiable weight function for persistence diagram points. This function is defined from an array that contains its values on a 2D grid.
+ """
+ def __init__(self, grid, grid_bnds, **kwargs):
+ """
+ Constructor for the GridPerslayWeight class.
+
+ Parameters:
+ grid (n x n numpy array): grid of values.
+ grid_bnds (2 x 2 numpy array): boundaries of the grid, of the form [[min_x, max_x], [min_y, max_y]].
+ """
+ super().__init__(dynamic=True, **kwargs)
+ self.grid = tf.Variable(initial_value=grid, trainable=True)
+ self.grid_bnds = grid_bnds
+
+ def build(self, input_shape):
+ return self
+
+ def call(self, diagrams):
+ """
+ Apply GridPerslayWeight on a ragged tensor containing a list of persistence diagrams.
+
+ Parameters:
+ diagrams (n x None x 2): ragged tensor containing n persistence diagrams. The second dimension is ragged since persistence diagrams can have different numbers of points.
+
+ Returns:
+ weight (n x None): ragged tensor containing the weights of the points in the n persistence diagrams. The second dimension is ragged since persistence diagrams can have different numbers of points.
+ """
+ grid_shape = self.grid.shape
+ indices = []
+ for dim in range(2):
+ [m,M] = self.grid_bnds[dim]
+ coords = tf.expand_dims(diagrams[:,:,dim],-1)
+ ids = grid_shape[dim]*(coords-m)/(M-m)
+ indices.append(tf.cast(ids, tf.int32))
+ weight = tf.gather_nd(params=self.grid, indices=tf.concat(indices, axis=2))
+ return weight
+
+class GaussianMixturePerslayWeight(tf.keras.layers.Layer):
+ """
+ This is a class for computing a differentiable weight function for persistence diagram points. This function is defined from a mixture of Gaussian functions.
+ """
+ def __init__(self, gaussians, **kwargs):
+ """
+ Constructor for the GridPerslayWeight class.
+
+ Parameters:
+ gaussians (4 x n numpy array): parameters of the n Gaussian functions, of the form transpose([[mu_x^1, mu_y^1, sigma_x^1, sigma_y^1], ..., [mu_x^n, mu_y^n, sigma_x^n, sigma_y^n]]).
+ """
+ super().__init__(dynamic=True, **kwargs)
+ self.W = tf.Variable(initial_value=gaussians, trainable=True)
+
+ def build(self, input_shape):
+ return self
+
+ def call(self, diagrams):
+ """
+ Apply GaussianMixturePerslayWeight on a ragged tensor containing a list of persistence diagrams.
+
+ Parameters:
+ diagrams (n x None x 2): ragged tensor containing n persistence diagrams. The second dimension is ragged since persistence diagrams can have different numbers of points.
+
+ Returns:
+ weight (n x None): ragged tensor containing the weights of the points in the n persistence diagrams. The second dimension is ragged since persistence diagrams can have different numbers of points.
+ """
+ means = tf.expand_dims(tf.expand_dims(self.W[:2,:],0),0)
+ variances = tf.expand_dims(tf.expand_dims(self.W[2:,:],0),0)
+ diagrams = tf.expand_dims(diagrams, -1)
+ dists = tf.math.multiply(tf.math.square(diagrams-means), 1/tf.math.square(variances))
+ weight = tf.math.reduce_sum(tf.math.exp(tf.math.reduce_sum(-dists, axis=2)), axis=2)
+ return weight
+
+class PowerPerslayWeight(tf.keras.layers.Layer):
+ """
+ This is a class for computing a differentiable weight function for persistence diagram points. This function is defined as a constant multiplied by the distance to the diagonal of the persistence diagram point raised to some power.
+ """
+ def __init__(self, constant, power, **kwargs):
+ """
+ Constructor for the PowerPerslayWeight class.
+
+ Parameters:
+ constant (float): constant value.
+ power (float): power applied to the distance to the diagonal.
+ """
+ super().__init__(dynamic=True, **kwargs)
+ self.constant = tf.Variable(initial_value=constant, trainable=True)
+ self.power = power
+
+ def build(self, input_shape):
+ return self
+
+ def call(self, diagrams):
+ """
+ Apply PowerPerslayWeight on a ragged tensor containing a list of persistence diagrams.
+
+ Parameters:
+ diagrams (n x None x 2): ragged tensor containing n persistence diagrams. The second dimension is ragged since persistence diagrams can have different numbers of points.
+
+ Returns:
+ weight (n x None): ragged tensor containing the weights of the points in the n persistence diagrams. The second dimension is ragged since persistence diagrams can have different numbers of points.
+ """
+ weight = self.constant * tf.math.pow(tf.math.abs(diagrams[:,:,1]-diagrams[:,:,0]), self.power)
+ return weight
+
+
+class GaussianPerslayPhi(tf.keras.layers.Layer):
+ """
+ This is a class for computing a transformation function for persistence diagram points. This function turns persistence diagram points into 2D Gaussian functions centered on the points, that are then evaluated on a regular 2D grid.
+ """
+ def __init__(self, image_size, image_bnds, variance, **kwargs):
+ """
+ Constructor for the GaussianPerslayPhi class.
+
+ Parameters:
+ image_size (int numpy array): number of grid elements on each grid axis, of the form [n_x, n_y].
+ image_bnds (2 x 2 numpy array): boundaries of the grid, of the form [[min_x, max_x], [min_y, max_y]].
+ variance (float): variance of the Gaussian functions.
+ """
+ super().__init__(dynamic=True, **kwargs)
+ self.image_size = image_size
+ self.image_bnds = image_bnds
+ self.variance = tf.Variable(initial_value=variance, trainable=True)
+
+ def build(self, input_shape):
+ return self
+
+ def call(self, diagrams):
+ """
+ Apply GaussianPerslayPhi on a ragged tensor containing a list of persistence diagrams.
+
+ Parameters:
+ diagrams (n x None x 2): ragged tensor containing n persistence diagrams. The second dimension is ragged since persistence diagrams can have different numbers of points.
+
+ Returns:
+ output (n x None x image_size x image_size x 1): ragged tensor containing the evaluations on the 2D grid of the 2D Gaussian functions corresponding to the persistence diagram points, in the form of a 2D image with 1 channel that can be processed with, e.g., convolutional layers. The second dimension is ragged since persistence diagrams can have different numbers of points.
+ output_shape (int numpy array): shape of the output tensor.
+ """
+ diagrams_d = tf.concat([diagrams[:,:,0:1], diagrams[:,:,1:2]-diagrams[:,:,0:1]], axis=2)
+ step = [(self.image_bnds[i][1]-self.image_bnds[i][0])/self.image_size[i] for i in range(2)]
+ coords = [tf.range(self.image_bnds[i][0], self.image_bnds[i][1], step[i]) for i in range(2)]
+ M = tf.meshgrid(*coords)
+ mu = tf.concat([tf.expand_dims(tens, 0) for tens in M], axis=0)
+ for _ in range(2):
+ diagrams_d = tf.expand_dims(diagrams_d,-1)
+ dists = tf.math.square(diagrams_d-mu) / (2*tf.math.square(self.variance))
+ gauss = tf.math.exp(tf.math.reduce_sum(-dists, axis=2)) / (2*math.pi*tf.math.square(self.variance))
+ output = tf.expand_dims(gauss,-1)
+ output_shape = M[0].shape + tuple([1])
+ return output, output_shape
+
+class TentPerslayPhi(tf.keras.layers.Layer):
+ """
+ This is a class for computing a transformation function for persistence diagram points. This function turns persistence diagram points into 1D tent functions (linearly increasing on the first half of the bar corresponding to the point from zero to half of the bar length, linearly decreasing on the second half and zero elsewhere) centered on the points, that are then evaluated on a regular 1D grid.
+ """
+ def __init__(self, samples, **kwargs):
+ """
+ Constructor for the GaussianPerslayPhi class.
+
+ Parameters:
+ samples (float numpy array): grid elements on which to evaluate the tent functions, of the form [x_1, ..., x_n].
+ """
+ super().__init__(dynamic=True, **kwargs)
+ self.samples = tf.Variable(initial_value=samples, trainable=True)
+
+ def build(self, input_shape):
+ return self
+
+ def call(self, diagrams):
+ """
+ Apply TentPerslayPhi on a ragged tensor containing a list of persistence diagrams.
+
+ Parameters:
+ diagrams (n x None x 2): ragged tensor containing n persistence diagrams. The second dimension is ragged since persistence diagrams can have different numbers of points.
+
+ Returns:
+ output (n x None x num_samples): ragged tensor containing the evaluations on the 1D grid of the 1D tent functions corresponding to the persistence diagram points. The second dimension is ragged since persistence diagrams can have different numbers of points.
+ output_shape (int numpy array): shape of the output tensor.
+ """
+ samples_d = tf.expand_dims(tf.expand_dims(self.samples,0),0)
+ xs, ys = diagrams[:,:,0:1], diagrams[:,:,1:2]
+ output = tf.math.maximum(.5*(ys-xs) - tf.math.abs(samples_d-.5*(ys+xs)), tf.constant([0.]))
+ output_shape = self.samples.shape
+ return output, output_shape
+
+class FlatPerslayPhi(tf.keras.layers.Layer):
+ """
+ This is a class for computing a transformation function for persistence diagram points. This function turns persistence diagram points into 1D constant functions (that evaluate to half of the bar length on the bar corresponding to the point and zero elsewhere), that are then evaluated on a regular 1D grid.
+ """
+ def __init__(self, samples, theta, **kwargs):
+ """
+ Constructor for the FlatPerslayPhi class.
+
+ Parameters:
+ samples (float numpy array): grid elements on which to evaluate the constant functions, of the form [x_1, ..., x_n].
+ theta (float): sigmoid parameter used to approximate the constant function with a differentiable sigmoid function. The bigger the theta, the closer to a constant function the output will be.
+ """
+ super().__init__(dynamic=True, **kwargs)
+ self.samples = tf.Variable(initial_value=samples, trainable=True)
+ self.theta = tf.Variable(initial_value=theta, trainable=True)
+
+ def build(self, input_shape):
+ return self
+
+ def call(self, diagrams):
+ """
+ Apply FlatPerslayPhi on a ragged tensor containing a list of persistence diagrams.
+
+ Parameters:
+ diagrams (n x None x 2): ragged tensor containing n persistence diagrams. The second dimension is ragged since persistence diagrams can have different numbers of points.
+
+ Returns:
+ output (n x None x num_samples): ragged tensor containing the evaluations on the 1D grid of the 1D constant functions corresponding to the persistence diagram points. The second dimension is ragged since persistence diagrams can have different numbers of points.
+ output_shape (int numpy array): shape of the output tensor.
+ """
+ samples_d = tf.expand_dims(tf.expand_dims(self.samples,0),0)
+ xs, ys = diagrams[:,:,0:1], diagrams[:,:,1:2]
+ output = 1./(1.+tf.math.exp(-self.theta*(.5*(ys-xs)-tf.math.abs(samples_d-.5*(ys+xs)))))
+ output_shape = self.samples.shape
+ return output, output_shape
+
+class Perslay(tf.keras.layers.Layer):
+ """
+ This is a TensorFlow layer for vectorizing persistence diagrams in a differentiable way within a neural network. This function implements the PersLay equation, see `the corresponding article <http://proceedings.mlr.press/v108/carriere20a.html>`_.
+ """
+ def __init__(self, weight, phi, perm_op, rho, **kwargs):
+ """
+ Constructor for the Perslay class.
+
+ Parameters:
+ weight (function): weight function for the persistence diagram points. Can be either :class:`~gudhi.tensorflow.perslay.GridPerslayWeight`, :class:`~gudhi.tensorflow.perslay.GaussianMixturePerslayWeight`, :class:`~gudhi.tensorflow.perslay.PowerPerslayWeight`, or a custom TensorFlow function that takes persistence diagrams as argument (represented as an (n x None x 2) ragged tensor, where n is the number of diagrams).
+ phi (function): transformation function for the persistence diagram points. Can be either :class:`~gudhi.tensorflow.perslay.GaussianPerslayPhi`, :class:`~gudhi.tensorflow.perslay.TentPerslayPhi`, :class:`~gudhi.tensorflow.perslay.FlatPerslayPhi`, or a custom TensorFlow class (that can have trainable parameters) with a method `call` that takes persistence diagrams as argument (represented as an (n x None x 2) ragged tensor, where n is the number of diagrams).
+ perm_op (function): permutation invariant function, such as `tf.math.reduce_sum`, `tf.math.reduce_mean`, `tf.math.reduce_max`, `tf.math.reduce_min`, or a custom TensorFlow function that takes two arguments: a tensor and an axis on which to apply the permutation invariant operation. If perm_op is the string "topk" (where k is a number), this function will be computed as `tf.math.top_k` with parameter `int(k)`.
+ rho (function): postprocessing function that is applied after the permutation invariant operation. Can be any TensorFlow layer.
+ """
+ super().__init__(dynamic=True, **kwargs)
+ self.weight = weight
+ self.phi = phi
+ self.perm_op = perm_op
+ self.rho = rho
+
+ def build(self, input_shape):
+ return self
+
+ def call(self, diagrams):
+ """
+ Apply Perslay on a ragged tensor containing a list of persistence diagrams.
+
+ Parameters:
+ diagrams (n x None x 2): ragged tensor containing n persistence diagrams. The second dimension is ragged since persistence diagrams can have different numbers of points.
+
+ Returns:
+ vector (n x output_shape): tensor containing the vectorizations of the persistence diagrams.
+ """
+ vector, dim = self.phi(diagrams)
+ weight = self.weight(diagrams)
+ for _ in range(len(dim)):
+ weight = tf.expand_dims(weight, -1)
+ vector = tf.math.multiply(vector, weight)
+
+ permop = self.perm_op
+ if type(permop) == str and permop[:3] == 'top':
+ k = int(permop[3:])
+ vector = vector.to_tensor(default_value=-1e10)
+ vector = tf.math.top_k(tf.transpose(vector, perm=[0, 2, 1]), k=k).values
+ vector = tf.reshape(vector, [-1,k*dim[0]])
+ else:
+ vector = permop(vector, axis=1)
+
+ vector = self.rho(vector)
+
+ return vector
diff --git a/src/python/include/pybind11_diagram_utils.h b/src/python/include/pybind11_diagram_utils.h
index 2d5194f4..5cb7c48b 100644
--- a/src/python/include/pybind11_diagram_utils.h
+++ b/src/python/include/pybind11_diagram_utils.h
@@ -17,16 +17,9 @@
namespace py = pybind11;
typedef py::array_t<double> Dgm;
-// Get m[i,0] and m[i,1] as a pair
-static auto pairify(void* p, py::ssize_t h, py::ssize_t w) {
- return [=](py::ssize_t i){
- char* birth = (char*)p + i * h;
- char* death = birth + w;
- return std::make_pair(*(double*)birth, *(double*)death);
- };
-}
-
-inline auto numpy_to_range_of_pairs(py::array_t<double> dgm) {
+// build_point(double birth, double death, ssize_t index) -> Point
+template<class BuildPoint>
+inline auto numpy_to_range_of_pairs(py::array_t<double> dgm, BuildPoint build_point) {
py::buffer_info buf = dgm.request();
// shape (n,2) or (0) for empty
if((buf.ndim!=2 || buf.shape[1]!=2) && (buf.ndim!=1 || buf.shape[0]!=0))
@@ -34,6 +27,16 @@ inline auto numpy_to_range_of_pairs(py::array_t<double> dgm) {
// In the case of shape (0), avoid reading non-existing strides[1] even if we won't use it.
py::ssize_t stride1 = buf.ndim == 2 ? buf.strides[1] : 0;
auto cnt = boost::counting_range<py::ssize_t>(0, buf.shape[0]);
- return boost::adaptors::transform(cnt, pairify(buf.ptr, buf.strides[0], stride1));
+
+ char* p = static_cast<char*>(buf.ptr);
+ auto h = buf.strides[0];
+ auto w = stride1;
+ // Get m[i,0] and m[i,1] as a pair
+ auto pairify = [=](py::ssize_t i){
+ char* birth = p + i * h;
+ char* death = birth + w;
+ return build_point(*(double*)birth, *(double*)death, i);
+ };
+ return boost::adaptors::transform(cnt, pairify);
// Be careful that the returned range cannot contain references to dead temporaries.
}
diff --git a/src/python/test/test_perslay.py b/src/python/test/test_perslay.py
new file mode 100644
index 00000000..06497712
--- /dev/null
+++ b/src/python/test/test_perslay.py
@@ -0,0 +1,147 @@
+import numpy as np
+import tensorflow as tf
+from sklearn.preprocessing import MinMaxScaler
+from gudhi.tensorflow.perslay import *
+import gudhi.representations as gdr
+
+def test_gaussian_perslay():
+
+ diagrams = [np.array([[0.,4.],[1.,2.],[3.,8.],[6.,8.]])]
+ diagrams = gdr.DiagramScaler(use=True, scalers=[([0,1], MinMaxScaler())]).fit_transform(diagrams)
+ diagrams = tf.RaggedTensor.from_tensor(tf.constant(diagrams, dtype=tf.float32))
+
+ rho = tf.identity
+ phi = GaussianPerslayPhi((5, 5), ((-.5, 1.5), (-.5, 1.5)), .1)
+ weight = PowerPerslayWeight(1.,0.)
+ perm_op = tf.math.reduce_sum
+
+ perslay = Perslay(phi=phi, weight=weight, perm_op=perm_op, rho=rho)
+ vectors = perslay(diagrams)
+
+ print(vectors.shape)
+
+ assert np.linalg.norm(vectors.numpy() - np.array(
+[[[[1.7266072e-16],
+ [4.1706043e-09],
+ [1.1336876e-08],
+ [8.5738821e-12],
+ [2.1243891e-14]],
+
+ [[4.1715076e-09],
+ [1.0074080e-01],
+ [2.7384272e-01],
+ [3.0724244e-02],
+ [7.6157507e-05]],
+
+ [[8.0382870e-06],
+ [1.5802664e+00],
+ [8.2997030e-01],
+ [1.2395413e+01],
+ [3.0724116e-02]],
+
+ [[8.0269419e-06],
+ [1.3065740e+00],
+ [9.0923014e+00],
+ [6.1664842e-02],
+ [1.3949171e-06]],
+
+ [[9.0331329e-13],
+ [1.4954816e-07],
+ [1.5145997e-04],
+ [1.0205092e-06],
+ [7.8093526e-16]]]]) <= 1e-7)
+
+test_gaussian_perslay()
+
+def test_tent_perslay():
+
+ diagrams = [np.array([[0.,4.],[1.,2.],[3.,8.],[6.,8.]])]
+ diagrams = gdr.DiagramScaler(use=True, scalers=[([0,1], MinMaxScaler())]).fit_transform(diagrams)
+ diagrams = tf.RaggedTensor.from_tensor(tf.constant(diagrams, dtype=tf.float32))
+
+ rho = tf.identity
+ phi = TentPerslayPhi(np.array(np.arange(-1.,2.,.1), dtype=np.float32))
+ weight = PowerPerslayWeight(1.,0.)
+ perm_op = 'top3'
+
+ perslay = Perslay(phi=phi, weight=weight, perm_op=perm_op, rho=rho)
+ vectors = perslay(diagrams)
+
+ assert np.linalg.norm(vectors-np.array([[0., 0., 0., 0., 0., 0.,
+ 0., 0., 0., 0., 0., 0.,
+ 0., 0., 0., 0., 0., 0.,
+ 0., 0., 0., 0., 0., 0.,
+ 0., 0., 0., 0., 0., 0.,
+ 0., 0., 0., 0.09999999, 0., 0.,
+ 0.2, 0.05, 0., 0.19999999, 0., 0.,
+ 0.09999999, 0.02500001, 0., 0.125, 0., 0.,
+ 0.22500002, 0., 0., 0.3, 0., 0.,
+ 0.19999999, 0.05000001, 0., 0.10000002, 0.10000002, 0.,
+ 0., 0., 0., 0., 0., 0.,
+ 0., 0., 0., 0., 0., 0.,
+ 0., 0., 0., 0., 0., 0.,
+ 0., 0., 0., 0., 0., 0.,
+ 0., 0., 0., 0., 0., 0. ]])) <= 1e-7
+
+def test_flat_perslay():
+
+ diagrams = [np.array([[0.,4.],[1.,2.],[3.,8.],[6.,8.]])]
+ diagrams = gdr.DiagramScaler(use=True, scalers=[([0,1], MinMaxScaler())]).fit_transform(diagrams)
+ diagrams = tf.RaggedTensor.from_tensor(tf.constant(diagrams, dtype=tf.float32))
+
+ rho = tf.identity
+ phi = FlatPerslayPhi(np.array(np.arange(-1.,2.,.1), dtype=np.float32), 100.)
+ weight = PowerPerslayWeight(1.,0.)
+ perm_op = tf.math.reduce_sum
+
+ perslay = Perslay(phi=phi, weight=weight, perm_op=perm_op, rho=rho)
+ vectors = perslay(diagrams)
+
+ assert np.linalg.norm(vectors-np.array([[0.0000000e+00, 0.0000000e+00, 1.8048651e-35, 3.9754645e-31, 8.7565101e-27,
+ 1.9287571e-22, 4.2483860e-18, 9.3576392e-14, 2.0611652e-09, 4.5398087e-05,
+ 5.0000376e-01, 1.0758128e+00, 1.9933071e+00, 1.0072457e+00, 1.9240967e+00,
+ 1.4999963e+00, 1.0000458e+00, 1.0066929e+00, 1.9933071e+00, 1.9999092e+00,
+ 1.0000000e+00, 9.0795562e-05, 4.1222914e-09, 1.8715316e-13, 8.4967405e-18,
+ 3.8574998e-22, 1.7512956e-26, 7.9508388e-31, 3.6097302e-35, 0.0000000e+00]]) <= 1e-7)
+
+def test_gmix_weight():
+
+ diagrams = [np.array([[0.,4.],[1.,2.],[3.,8.],[6.,8.]])]
+ diagrams = gdr.DiagramScaler(use=True, scalers=[([0,1], MinMaxScaler())]).fit_transform(diagrams)
+ diagrams = tf.RaggedTensor.from_tensor(tf.constant(diagrams, dtype=tf.float32))
+
+ rho = tf.identity
+ phi = FlatPerslayPhi(np.array(np.arange(-1.,2.,.1), dtype=np.float32), 100.)
+ weight = GaussianMixturePerslayWeight(np.array([[.5],[.5],[5],[5]], dtype=np.float32))
+ perm_op = tf.math.reduce_sum
+
+ perslay = Perslay(phi=phi, weight=weight, perm_op=perm_op, rho=rho)
+ vectors = perslay(diagrams)
+
+ assert np.linalg.norm(vectors-np.array([[0.0000000e+00, 0.0000000e+00, 1.7869064e-35, 3.9359080e-31, 8.6693818e-27,
+ 1.9095656e-22, 4.2061142e-18, 9.2645292e-14, 2.0406561e-09, 4.4946366e-05,
+ 4.9502861e-01, 1.0652492e+00, 1.9753191e+00, 9.9723548e-01, 1.9043801e+00,
+ 1.4844525e+00, 9.8947650e-01, 9.9604094e-01, 1.9703994e+00, 1.9769192e+00,
+ 9.8850453e-01, 8.9751818e-05, 4.0749040e-09, 1.8500175e-13, 8.3990662e-18,
+ 3.8131562e-22, 1.7311636e-26, 7.8594399e-31, 3.5682349e-35, 0.0000000e+00]]) <= 1e-7)
+
+def test_grid_weight():
+
+ diagrams = [np.array([[0.,4.],[1.,2.],[3.,8.],[6.,8.]])]
+ diagrams = gdr.DiagramScaler(use=True, scalers=[([0,1], MinMaxScaler())]).fit_transform(diagrams)
+ diagrams = tf.RaggedTensor.from_tensor(tf.constant(diagrams, dtype=tf.float32))
+
+ rho = tf.identity
+ phi = FlatPerslayPhi(np.array(np.arange(-1.,2.,.1), dtype=np.float32), 100.)
+ weight = GridPerslayWeight(np.array(np.random.uniform(size=[100,100]),dtype=np.float32),((-0.01, 1.01),(-0.01, 1.01)))
+ perm_op = tf.math.reduce_sum
+
+ perslay = Perslay(phi=phi, weight=weight, perm_op=perm_op, rho=rho)
+ vectors = perslay(diagrams)
+
+ assert np.linalg.norm(vectors-np.array([[0.0000000e+00, 0.0000000e+00, 1.5124093e-37, 3.3314498e-33, 7.3379791e-29,
+ 1.6163036e-24, 3.5601592e-20, 7.8417273e-16, 1.7272621e-11, 3.8043717e-07,
+ 4.1902456e-03, 1.7198652e-02, 1.2386327e-01, 9.2694648e-03, 1.9515079e-01,
+ 2.0629172e-01, 2.0210314e-01, 2.0442720e-01, 5.4709727e-01, 5.4939687e-01,
+ 2.7471092e-01, 2.4942532e-05, 1.1324385e-09, 5.1413016e-14, 2.3341474e-18,
+ 1.0596973e-22, 4.8110000e-27, 2.1841823e-31, 9.9163230e-36, 0.0000000e+00]]) <= 1e-7)
diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py
index 58caab21..f4ffbdc1 100755
--- a/src/python/test/test_representations.py
+++ b/src/python/test/test_representations.py
@@ -161,7 +161,7 @@ def test_entropy_miscalculation():
return -np.dot(l, np.log(l))
sce = Entropy(mode="scalar")
assert [[pe(diag_ex)]] == sce.fit_transform([diag_ex])
- sce = Entropy(mode="vector", resolution=4, normalized=False)
+ sce = Entropy(mode="vector", resolution=4, normalized=False, keep_endpoints=True)
pef = [-1/4*np.log(1/4)-1/4*np.log(1/4)-1/2*np.log(1/2),
-1/4*np.log(1/4)-1/4*np.log(1/4)-1/2*np.log(1/2),
-1/2*np.log(1/2),
@@ -170,7 +170,7 @@ def test_entropy_miscalculation():
sce = Entropy(mode="vector", resolution=4, normalized=True)
pefN = (sce.fit_transform([diag_ex]))[0]
area = np.linalg.norm(pefN, ord=1)
- assert area==1
+ assert area==pytest.approx(1)
def test_kernel_empty_diagrams():
empty_diag = np.empty(shape = [0, 2])
@@ -249,5 +249,21 @@ def test_landscape_nan_range():
dgm = np.array([[2., 6.], [3., 5.]])
lds = Landscape(num_landscapes=2, resolution=9, sample_range=[np.nan, 6.])
lds_dgm = lds(dgm)
- assert (lds.sample_range[0] == 2) & (lds.sample_range[1] == 6)
+ assert (lds.sample_range_fixed[0] == 2) & (lds.sample_range_fixed[1] == 6)
assert lds.new_resolution == 10
+
+def test_endpoints():
+ diags = [ np.array([[2., 3.]]) ]
+ for vec in [ Landscape(), Silhouette(), BettiCurve(), Entropy(mode="vector") ]:
+ vec.fit(diags)
+ assert vec.grid_[0] > 2 and vec.grid_[-1] < 3
+ for vec in [ Landscape(keep_endpoints=True), Silhouette(keep_endpoints=True), BettiCurve(keep_endpoints=True), Entropy(mode="vector", keep_endpoints=True)]:
+ vec.fit(diags)
+ assert vec.grid_[0] == 2 and vec.grid_[-1] == 3
+ vec = BettiCurve(resolution=None)
+ vec.fit(diags)
+ assert np.equal(vec.grid_, [-np.inf, 2., 3.]).all()
+
+def test_get_params():
+ for vec in [ Landscape(), Silhouette(), BettiCurve(), Entropy(mode="vector") ]:
+ vec.get_params()
diff --git a/src/python/test/test_wasserstein_distance.py b/src/python/test/test_wasserstein_distance.py
index a76b6ce7..42bf3299 100755
--- a/src/python/test/test_wasserstein_distance.py
+++ b/src/python/test/test_wasserstein_distance.py
@@ -97,6 +97,13 @@ def test_warn_infty():
assert (m is None)
+def _to_set(X):
+ return { (i, j) for i, j in X }
+
+def _same_permuted(X, Y):
+ return _to_set(X) == _to_set(Y)
+
+
def _basic_wasserstein(wasserstein_distance, delta, test_infinity=True, test_matching=True):
diag1 = np.array([[2.7, 3.7], [9.6, 14.0], [34.2, 34.974]])
diag2 = np.array([[2.8, 4.45], [9.5, 14.1]])
@@ -141,15 +148,16 @@ def _basic_wasserstein(wasserstein_distance, delta, test_infinity=True, test_mat
if test_matching:
match = wasserstein_distance(emptydiag, emptydiag, matching=True, internal_p=1., order=2)[1]
- assert np.array_equal(match, [])
+ # Accept [] or np.array of shape (2, 0)
+ assert len(match) == 0
match = wasserstein_distance(emptydiag, emptydiag, matching=True, internal_p=np.inf, order=2.24)[1]
- assert np.array_equal(match, [])
+ assert len(match) == 0
match = wasserstein_distance(emptydiag, diag2, matching=True, internal_p=np.inf, order=2.)[1]
- assert np.array_equal(match , [[-1, 0], [-1, 1]])
+ assert _same_permuted(match, [[-1, 0], [-1, 1]])
match = wasserstein_distance(diag2, emptydiag, matching=True, internal_p=np.inf, order=2.24)[1]
- assert np.array_equal(match , [[0, -1], [1, -1]])
+ assert _same_permuted(match, [[0, -1], [1, -1]])
match = wasserstein_distance(diag1, diag2, matching=True, internal_p=2., order=2.)[1]
- assert np.array_equal(match, [[0, 0], [1, 1], [2, -1]])
+ assert _same_permuted(match, [[0, 0], [1, 1], [2, -1]])
if test_matching and test_infinity:
diag7 = np.array([[0, 3], [4, np.inf], [5, np.inf]])
@@ -158,7 +166,7 @@ def _basic_wasserstein(wasserstein_distance, delta, test_infinity=True, test_mat
diag10 = np.array([[0,1], [-np.inf, -np.inf], [np.inf, np.inf]])
match = wasserstein_distance(diag5, diag6, matching=True, internal_p=2., order=2.)[1]
- assert np.array_equal(match, [[0, -1], [-1,0], [-1, 1], [1, 2]])
+ assert _same_permuted(match, [[0, -1], [-1,0], [-1, 1], [1, 2]])
match = wasserstein_distance(diag5, diag7, matching=True, internal_p=2., order=2.)[1]
assert (match is None)
cost, match = wasserstein_distance(diag7, emptydiag, matching=True, internal_p=2., order=2.3)
@@ -172,10 +180,10 @@ def _basic_wasserstein(wasserstein_distance, delta, test_infinity=True, test_mat
assert (match is None)
cost, match = wasserstein_distance(diag9, diag10, matching=True, internal_p=1., order=1.)
assert (cost == 1)
- assert (match == [[0, -1],[1, -1],[-1, 0], [-1, 1], [-1, 2]]) # type 4 and 5 are match to the diag anyway.
+ assert _same_permuted(match, [[0, -1],[1, -1],[-1, 0], [-1, 1], [-1, 2]]) # type 4 and 5 are match to the diag anyway.
cost, match = wasserstein_distance(diag9, emptydiag, matching=True, internal_p=2., order=2.)
assert (cost == 0.)
- assert (match == [[0, -1], [1, -1]])
+ assert _same_permuted(match, [[0, -1], [1, -1]])
def hera_wrap(**extra):
@@ -196,6 +204,6 @@ def test_wasserstein_distance_pot():
def test_wasserstein_distance_hera():
- _basic_wasserstein(hera_wrap(delta=1e-12), 1e-12, test_matching=False)
- _basic_wasserstein(hera_wrap(delta=.1), .1, test_matching=False)
+ _basic_wasserstein(hera_wrap(delta=1e-12), 1e-12, test_matching=True)
+ _basic_wasserstein(hera_wrap(delta=.1), .1, test_matching=True)