summaryrefslogtreecommitdiff
path: root/src/python
diff options
context:
space:
mode:
Diffstat (limited to 'src/python')
-rw-r--r--src/python/CMakeLists.txt64
-rw-r--r--src/python/doc/representations_sum.inc22
-rw-r--r--src/python/gudhi/hera/bottleneck.cc2
-rw-r--r--src/python/gudhi/hera/wasserstein.cc10
-rw-r--r--src/python/gudhi/representations/vector_methods.py151
-rw-r--r--src/python/gudhi/simplex_tree.pxd3
-rw-r--r--src/python/gudhi/simplex_tree.pyx19
-rw-r--r--src/python/include/Alpha_complex_factory.h4
-rw-r--r--src/python/setup.py.in4
-rw-r--r--src/python/test/test_persistence_graphical_tools.py5
-rwxr-xr-xsrc/python/test/test_representations.py16
-rwxr-xr-xsrc/python/test/test_wasserstein_distance.py9
12 files changed, 180 insertions, 129 deletions
diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt
index 32ec13bd..39e2acd4 100644
--- a/src/python/CMakeLists.txt
+++ b/src/python/CMakeLists.txt
@@ -44,7 +44,7 @@ function( add_gudhi_debug_info DEBUG_INFO )
endfunction( add_gudhi_debug_info )
if(PYTHONINTERP_FOUND)
- if(PYBIND11_FOUND AND CYTHON_FOUND)
+ if(NUMPY_FOUND AND PYBIND11_FOUND AND CYTHON_FOUND)
add_gudhi_debug_info("Pybind11 version ${PYBIND11_VERSION}")
# PyBind11 modules
set(GUDHI_PYTHON_MODULES "${GUDHI_PYTHON_MODULES}'bottleneck', ")
@@ -163,10 +163,10 @@ if(PYTHONINTERP_FOUND)
set(GUDHI_PYBIND11_MODULES "${GUDHI_PYBIND11_MODULES}'clustering/_tomato', ")
set(GUDHI_PYBIND11_MODULES "${GUDHI_PYBIND11_MODULES}'hera/wasserstein', ")
set(GUDHI_PYBIND11_MODULES "${GUDHI_PYBIND11_MODULES}'hera/bottleneck', ")
+ set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'nerve_gic', ")
if (NOT CGAL_VERSION VERSION_LESS 4.11.0)
set(GUDHI_PYBIND11_MODULES "${GUDHI_PYBIND11_MODULES}'datasets/generators/_points', ")
set(GUDHI_PYBIND11_MODULES "${GUDHI_PYBIND11_MODULES}'bottleneck', ")
- set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'nerve_gic', ")
endif ()
if (NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0)
set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'subsampling', ")
@@ -432,38 +432,38 @@ if(PYTHONINTERP_FOUND)
${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/example/bottleneck_basic_example.py")
add_gudhi_py_test(test_bottleneck_distance)
+ endif (NOT CGAL_VERSION VERSION_LESS 4.11.0)
- # Cover complex
- file(COPY ${CMAKE_SOURCE_DIR}/data/points/human.off DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/)
- file(COPY ${CMAKE_SOURCE_DIR}/data/points/COIL_database/lucky_cat.off DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/)
- file(COPY ${CMAKE_SOURCE_DIR}/data/points/COIL_database/lucky_cat_PCA1 DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/)
- add_test(NAME cover_complex_nerve_example_py_test
- WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
- COMMAND ${CMAKE_COMMAND} -E env "${GUDHI_PYTHON_PATH_ENV}"
- ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/example/nerve_of_a_covering.py"
- -f human.off -c 2 -r 10 -g 0.3)
+ # Cover complex
+ file(COPY ${CMAKE_SOURCE_DIR}/data/points/human.off DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/)
+ file(COPY ${CMAKE_SOURCE_DIR}/data/points/COIL_database/lucky_cat.off DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/)
+ file(COPY ${CMAKE_SOURCE_DIR}/data/points/COIL_database/lucky_cat_PCA1 DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/)
+ add_test(NAME cover_complex_nerve_example_py_test
+ WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+ COMMAND ${CMAKE_COMMAND} -E env "${GUDHI_PYTHON_PATH_ENV}"
+ ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/example/nerve_of_a_covering.py"
+ -f human.off -c 2 -r 10 -g 0.3)
- add_test(NAME cover_complex_coordinate_gic_example_py_test
- WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
- COMMAND ${CMAKE_COMMAND} -E env "${GUDHI_PYTHON_PATH_ENV}"
- ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/example/coordinate_graph_induced_complex.py"
- -f human.off -c 0 -v)
+ add_test(NAME cover_complex_coordinate_gic_example_py_test
+ WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+ COMMAND ${CMAKE_COMMAND} -E env "${GUDHI_PYTHON_PATH_ENV}"
+ ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/example/coordinate_graph_induced_complex.py"
+ -f human.off -c 0 -v)
- add_test(NAME cover_complex_functional_gic_example_py_test
- WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
- COMMAND ${CMAKE_COMMAND} -E env "${GUDHI_PYTHON_PATH_ENV}"
- ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/example/functional_graph_induced_complex.py"
- -o lucky_cat.off
- -f lucky_cat_PCA1 -v)
+ add_test(NAME cover_complex_functional_gic_example_py_test
+ WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+ COMMAND ${CMAKE_COMMAND} -E env "${GUDHI_PYTHON_PATH_ENV}"
+ ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/example/functional_graph_induced_complex.py"
+ -o lucky_cat.off
+ -f lucky_cat_PCA1 -v)
- add_test(NAME cover_complex_voronoi_gic_example_py_test
- WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
- COMMAND ${CMAKE_COMMAND} -E env "${GUDHI_PYTHON_PATH_ENV}"
- ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/example/voronoi_graph_induced_complex.py"
- -f human.off -n 700 -v)
+ add_test(NAME cover_complex_voronoi_gic_example_py_test
+ WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+ COMMAND ${CMAKE_COMMAND} -E env "${GUDHI_PYTHON_PATH_ENV}"
+ ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/example/voronoi_graph_induced_complex.py"
+ -f human.off -n 700 -v)
- add_gudhi_py_test(test_cover_complex)
- endif (NOT CGAL_VERSION VERSION_LESS 4.11.0)
+ add_gudhi_py_test(test_cover_complex)
if (NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 5.1.0)
# Alpha
@@ -623,10 +623,10 @@ if(PYTHONINTERP_FOUND)
# Set missing or not modules
set(GUDHI_MODULES ${GUDHI_MODULES} "python" CACHE INTERNAL "GUDHI_MODULES")
- else(PYBIND11_FOUND AND CYTHON_FOUND)
- message("++ Python module will not be compiled because cython and/or pybind11 was/were not found")
+ else(NUMPY_FOUND AND PYBIND11_FOUND AND CYTHON_FOUND)
+ message("++ Python module will not be compiled because numpy and/or cython and/or pybind11 was/were not found")
set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python" CACHE INTERNAL "GUDHI_MISSING_MODULES")
- endif(PYBIND11_FOUND AND CYTHON_FOUND)
+ endif(NUMPY_FOUND AND PYBIND11_FOUND AND CYTHON_FOUND)
else(PYTHONINTERP_FOUND)
message("++ Python module will not be compiled because no Python interpreter was found")
set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python" CACHE INTERNAL "GUDHI_MISSING_MODULES")
diff --git a/src/python/doc/representations_sum.inc b/src/python/doc/representations_sum.inc
index 4298aea9..9515f044 100644
--- a/src/python/doc/representations_sum.inc
+++ b/src/python/doc/representations_sum.inc
@@ -1,14 +1,14 @@
.. table::
:widths: 30 40 30
- +------------------------------------------------------------------+----------------------------------------------------------------+-------------------------------------------------------------+
- | .. figure:: | Vectorizations, distances and kernels that work on persistence | :Author: Mathieu Carrière, Martin Royer |
- | img/sklearn-tda.png | diagrams, compatible with scikit-learn. | |
- | | | :Since: GUDHI 3.1.0 |
- | | | |
- | | | :License: MIT |
- | | | |
- | | | :Requires: `Scikit-learn <installation.html#scikit-learn>`_ |
- +------------------------------------------------------------------+----------------------------------------------------------------+-------------------------------------------------------------+
- | * :doc:`representations` |
- +------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------+
+ +------------------------------------------------------------------+----------------------------------------------------------------+-------------------------------------------------------------------------+
+ | .. figure:: | Vectorizations, distances and kernels that work on persistence | :Author: Mathieu Carrière, Martin Royer, Gard Spreemann, Wojciech Reise |
+ | img/sklearn-tda.png | diagrams, compatible with scikit-learn. | |
+ | | | :Since: GUDHI 3.1.0 |
+ | | | |
+ | | | :License: MIT |
+ | | | |
+ | | | :Requires: `Scikit-learn <installation.html#scikit-learn>`_ |
+ +------------------------------------------------------------------+----------------------------------------------------------------+-------------------------------------------------------------------------+
+ | * :doc:`representations` |
+ +------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+
diff --git a/src/python/gudhi/hera/bottleneck.cc b/src/python/gudhi/hera/bottleneck.cc
index 0cb562ce..ec461f7c 100644
--- a/src/python/gudhi/hera/bottleneck.cc
+++ b/src/python/gudhi/hera/bottleneck.cc
@@ -16,7 +16,7 @@
using py::ssize_t;
#endif
-#include <bottleneck.h> // Hera
+#include <hera/bottleneck.h> // Hera
double bottleneck_distance(Dgm d1, Dgm d2, double delta)
{
diff --git a/src/python/gudhi/hera/wasserstein.cc b/src/python/gudhi/hera/wasserstein.cc
index fa0cf8aa..3516352e 100644
--- a/src/python/gudhi/hera/wasserstein.cc
+++ b/src/python/gudhi/hera/wasserstein.cc
@@ -8,10 +8,16 @@
* - YYYY/MM Author: Description of the modification
*/
-#include <wasserstein.h> // Hera
-
#include <pybind11_diagram_utils.h>
+#ifdef _MSC_VER
+// https://github.com/grey-narn/hera/issues/3
+// ssize_t is a non-standard type (well, posix)
+using py::ssize_t;
+#endif
+
+#include <hera/wasserstein.h> // Hera
+
double wasserstein_distance(
Dgm d1, Dgm d2,
double wasserstein_power, double internal_p,
diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index a169aee8..745fe1e5 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -13,8 +13,13 @@ import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.exceptions import NotFittedError
from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler
-from sklearn.neighbors import DistanceMetric
from sklearn.metrics import pairwise
+try:
+ # New location since 1.0
+ from sklearn.metrics import DistanceMetric
+except ImportError:
+ # Will be removed in 1.3
+ from sklearn.neighbors import DistanceMetric
from .preprocessing import DiagramScaler, BirthPersistenceTransform
@@ -101,7 +106,7 @@ class PersistenceImage(BaseEstimator, TransformerMixin):
"""
return self.fit_transform([diag])[0,:]
-def _automatic_sample_range(sample_range, X, y):
+def _automatic_sample_range(sample_range, X):
"""
Compute and returns sample range from the persistence diagrams if one of the sample_range values is numpy.nan.
@@ -114,7 +119,7 @@ def _automatic_sample_range(sample_range, X, y):
nan_in_range = np.isnan(sample_range)
if nan_in_range.any():
try:
- pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y)
+ pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X)
[mx,my] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]]
[Mx,My] = [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]]
return np.where(nan_in_range, np.array([mx, My]), sample_range)
@@ -124,7 +129,7 @@ def _automatic_sample_range(sample_range, X, y):
return sample_range
-def _trim_on_edges(x, are_endpoints_nan):
+def _trim_endpoints(x, are_endpoints_nan):
if are_endpoints_nan[0]:
x = x[1:]
if are_endpoints_nan[1]:
@@ -132,11 +137,26 @@ def _trim_on_edges(x, are_endpoints_nan):
return x
+def _grid_from_sample_range(self, X):
+ sample_range = np.array(self.sample_range_init)
+ self.nan_in_range = np.isnan(sample_range)
+ self.new_resolution = self.resolution
+ if not self.keep_endpoints:
+ self.new_resolution += self.nan_in_range.sum()
+ self.sample_range = _automatic_sample_range(sample_range, X)
+ self.grid_ = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution)
+ if not self.keep_endpoints:
+ self.grid_ = _trim_endpoints(self.grid_, self.nan_in_range)
+
+
class Landscape(BaseEstimator, TransformerMixin):
"""
This is a class for computing persistence landscapes from a list of persistence diagrams. A persistence landscape is a collection of 1D piecewise-linear functions computed from the rank function associated to the persistence diagram. These piecewise-linear functions are then sampled evenly on a given range and the corresponding vectors of samples are concatenated and returned. See http://jmlr.org/papers/v16/bubenik15a.html for more details.
+
+ Attributes:
+ grid_ (1d array): The grid on which the landscapes are computed.
"""
- def __init__(self, num_landscapes=5, resolution=100, sample_range=[np.nan, np.nan]):
+ def __init__(self, num_landscapes=5, resolution=100, sample_range=[np.nan, np.nan], *, keep_endpoints=False):
"""
Constructor for the Landscape class.
@@ -144,10 +164,10 @@ class Landscape(BaseEstimator, TransformerMixin):
num_landscapes (int): number of piecewise-linear functions to output (default 5).
resolution (int): number of sample for all piecewise-linear functions (default 100).
sample_range ([double, double]): minimum and maximum of all piecewise-linear function domains, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
+ keep_endpoints (bool): when computing `sample_range`, use the exact extremities (where the value is always 0). This is mostly useful for plotting, the default is to use a slightly smaller range.
"""
- self.num_landscapes, self.resolution, self.sample_range = num_landscapes, resolution, sample_range
- self.nan_in_range = np.isnan(np.array(self.sample_range))
- self.new_resolution = self.resolution + self.nan_in_range.sum()
+ self.num_landscapes, self.resolution, self.sample_range_init = num_landscapes, resolution, sample_range
+ self.keep_endpoints = keep_endpoints
def fit(self, X, y=None):
"""
@@ -157,9 +177,7 @@ class Landscape(BaseEstimator, TransformerMixin):
X (list of n x 2 numpy arrays): input persistence diagrams.
y (n x 1 array): persistence diagram labels (unused).
"""
- self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
- self.im_range = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution)
- self.im_range = _trim_on_edges(self.im_range, self.nan_in_range)
+ _grid_from_sample_range(self, X)
return self
def transform(self, X):
@@ -174,7 +192,7 @@ class Landscape(BaseEstimator, TransformerMixin):
"""
Xfit = []
- x_values = self.im_range
+ x_values = self.grid_
for diag in X:
midpoints, heights = (diag[:, 0] + diag[:, 1]) / 2., (diag[:, 1] - diag[:, 0]) / 2.
tent_functions = np.maximum(heights[None, :] - np.abs(x_values[:, None] - midpoints[None, :]), 0)
@@ -208,8 +226,11 @@ class Landscape(BaseEstimator, TransformerMixin):
class Silhouette(BaseEstimator, TransformerMixin):
"""
This is a class for computing persistence silhouettes from a list of persistence diagrams. A persistence silhouette is computed by taking a weighted average of the collection of 1D piecewise-linear functions given by the persistence landscapes, and then by evenly sampling this average on a given range. Finally, the corresponding vector of samples is returned. See https://arxiv.org/abs/1312.0308 for more details.
+
+ Attributes:
+ grid_ (1d array): The grid on which the silhouette is computed.
"""
- def __init__(self, weight=lambda x: 1, resolution=100, sample_range=[np.nan, np.nan]):
+ def __init__(self, weight=lambda x: 1, resolution=100, sample_range=[np.nan, np.nan], *, keep_endpoints=False):
"""
Constructor for the Silhouette class.
@@ -217,10 +238,10 @@ class Silhouette(BaseEstimator, TransformerMixin):
weight (function): weight function for the persistence diagram points (default constant function, ie lambda x: 1). This function must be defined on 2D points, ie on lists or numpy arrays of the form [p_x,p_y].
resolution (int): number of samples for the weighted average (default 100).
sample_range ([double, double]): minimum and maximum for the weighted average domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
+ keep_endpoints (bool): when computing `sample_range`, use the exact extremities (where the value is always 0). This is mostly useful for plotting, the default is to use a slightly smaller range.
"""
- self.weight, self.resolution, self.sample_range = weight, resolution, sample_range
- self.nan_in_range = np.isnan(np.array(self.sample_range))
- self.new_resolution = self.resolution + self.nan_in_range.sum()
+ self.weight, self.resolution, self.sample_range_init = weight, resolution, sample_range
+ self.keep_endpoints = keep_endpoints
def fit(self, X, y=None):
"""
@@ -230,9 +251,7 @@ class Silhouette(BaseEstimator, TransformerMixin):
X (list of n x 2 numpy arrays): input persistence diagrams.
y (n x 1 array): persistence diagram labels (unused).
"""
- self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
- self.im_range = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution)
- self.im_range = _trim_on_edges(self.im_range, self.nan_in_range)
+ _grid_from_sample_range(self, X)
return self
def transform(self, X):
@@ -246,7 +265,7 @@ class Silhouette(BaseEstimator, TransformerMixin):
numpy array with shape (number of diagrams) x (**resolution**): output persistence silhouettes.
"""
Xfit = []
- x_values = self.im_range
+ x_values = self.grid_
for diag in X:
midpoints, heights = (diag[:, 0] + diag[:, 1]) / 2., (diag[:, 1] - diag[:, 0]) / 2.
@@ -275,36 +294,39 @@ class Silhouette(BaseEstimator, TransformerMixin):
class BettiCurve(BaseEstimator, TransformerMixin):
"""
Compute Betti curves from persistence diagrams. There are several modes of operation: with a given resolution (with or without a sample_range), with a predefined grid, and with none of the previous. With a predefined grid, the class computes the Betti numbers at those grid points. Without a predefined grid, if the resolution is set to None, it can be fit to a list of persistence diagrams and produce a grid that consists of (at least) the filtration values at which at least one of those persistence diagrams changes Betti numbers, and then compute the Betti numbers at those grid points. In the latter mode, the exact Betti curve is computed for the entire real line. Otherwise, if the resolution is given, the Betti curve is obtained by sampling evenly using either the given sample_range or based on the persistence diagrams.
- """
- def __init__(self, resolution=100, sample_range=[np.nan, np.nan], predefined_grid=None):
- """
- Constructor for the BettiCurve class.
+ Examples
+ --------
+ If pd is a persistence diagram and xs is a nonempty grid of finite values such that xs[0] >= pd.min(), then the results of:
- Parameters:
- resolution (int): number of sample for the piecewise-constant function (default 100).
- sample_range ([double, double]): minimum and maximum of the piecewise-constant function domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
- predefined_grid (1d array or None, default=None): Predefined filtration grid points at which to compute the Betti curves. Must be strictly ordered. Infinities are ok. If None (default), and resolution is given, the grid will be uniform from x_min to x_max in 'resolution' steps, otherwise a grid will be computed that captures all changes in Betti numbers in the provided data.
+ >>> bc = BettiCurve(predefined_grid=xs) # doctest: +SKIP
+ >>> result = bc(pd) # doctest: +SKIP
- Attributes:
- grid_ (1d array): The grid on which the Betti numbers are computed. If predefined_grid was specified, `grid_` will always be that grid, independently of data. If not, the grid is fitted to capture all filtration values at which the Betti numbers change.
+ and
- Examples
- --------
- If pd is a persistence diagram and xs is a nonempty grid of finite values such that xs[0] >= pd.min(), then the results of:
+ >>> from scipy.interpolate import interp1d # doctest: +SKIP
+ >>> bc = BettiCurve(resolution=None, predefined_grid=None) # doctest: +SKIP
+ >>> bettis = bc.fit_transform([pd]) # doctest: +SKIP
+ >>> interp = interp1d(bc.grid_, bettis[0, :], kind="previous", fill_value="extrapolate") # doctest: +SKIP
+ >>> result = np.array(interp(xs), dtype=int) # doctest: +SKIP
- >>> bc = BettiCurve(predefined_grid=xs) # doctest: +SKIP
- >>> result = bc(pd) # doctest: +SKIP
+ are the same.
- and
+ Attributes
+ ----------
+ grid_ : 1d array
+ The grid on which the Betti numbers are computed. If predefined_grid was specified, `grid_` will always be that grid, independently of data. If not and resolution is None, the grid is fitted to capture all filtration values at which the Betti numbers change.
+ """
- >>> from scipy.interpolate import interp1d # doctest: +SKIP
- >>> bc = BettiCurve(resolution=None, predefined_grid=None) # doctest: +SKIP
- >>> bettis = bc.fit_transform([pd]) # doctest: +SKIP
- >>> interp = interp1d(bc.grid_, bettis[0, :], kind="previous", fill_value="extrapolate") # doctest: +SKIP
- >>> result = np.array(interp(xs), dtype=int) # doctest: +SKIP
+ def __init__(self, resolution=100, sample_range=[np.nan, np.nan], predefined_grid=None, *, keep_endpoints=False):
+ """
+ Constructor for the BettiCurve class.
- are the same.
+ Parameters:
+ resolution (int): number of samples for the piecewise-constant function (default 100), or None for the exact curve.
+ sample_range ([double, double]): minimum and maximum of the piecewise-constant function domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
+ predefined_grid (1d array or None, default=None): Predefined filtration grid points at which to compute the Betti curves. Must be strictly ordered. Infinities are ok. If None (default), and resolution is given, the grid will be uniform from x_min to x_max in 'resolution' steps, otherwise a grid will be computed that captures all changes in Betti numbers in the provided data.
+ keep_endpoints (bool): when computing `sample_range` (fixed `resolution`, no `predefined_grid`), use the exact extremities. This is mostly useful for plotting, the default is to use a slightly smaller range.
"""
if (predefined_grid is not None) and (not isinstance(predefined_grid, np.ndarray)):
@@ -312,7 +334,8 @@ class BettiCurve(BaseEstimator, TransformerMixin):
self.predefined_grid = predefined_grid
self.resolution = resolution
- self.sample_range = sample_range
+ self.sample_range_init = sample_range
+ self.keep_endpoints = keep_endpoints
def is_fitted(self):
return hasattr(self, "grid_")
@@ -331,8 +354,7 @@ class BettiCurve(BaseEstimator, TransformerMixin):
events = np.unique(np.concatenate([pd.flatten() for pd in X] + [[-np.inf]], axis=0))
self.grid_ = np.array(events)
else:
- self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
- self.grid_ = np.linspace(self.sample_range[0], self.sample_range[1], self.resolution)
+ _grid_from_sample_range(self, X)
else:
self.grid_ = self.predefined_grid # Get the predefined grid from user
@@ -431,8 +453,11 @@ class BettiCurve(BaseEstimator, TransformerMixin):
class Entropy(BaseEstimator, TransformerMixin):
"""
This is a class for computing persistence entropy. Persistence entropy is a statistic for persistence diagrams inspired from Shannon entropy. This statistic can also be used to compute a feature vector, called the entropy summary function. See https://arxiv.org/pdf/1803.08304.pdf for more details. Note that a previous implementation was contributed by Manuel Soriano-Trigueros.
+
+ Attributes:
+ grid_ (1d array): In vector mode, the grid on which the entropy summary function is computed.
"""
- def __init__(self, mode="scalar", normalized=True, resolution=100, sample_range=[np.nan, np.nan]):
+ def __init__(self, mode="scalar", normalized=True, resolution=100, sample_range=[np.nan, np.nan], *, keep_endpoints=False):
"""
Constructor for the Entropy class.
@@ -441,8 +466,10 @@ class Entropy(BaseEstimator, TransformerMixin):
normalized (bool): whether to normalize the entropy summary function (default True). Used only if **mode** = "vector".
resolution (int): number of sample for the entropy summary function (default 100). Used only if **mode** = "vector".
sample_range ([double, double]): minimum and maximum of the entropy summary function domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method. Used only if **mode** = "vector".
+ keep_endpoints (bool): when computing `sample_range`, use the exact extremities. This is mostly useful for plotting, the default is to use a slightly smaller range.
"""
- self.mode, self.normalized, self.resolution, self.sample_range = mode, normalized, resolution, sample_range
+ self.mode, self.normalized, self.resolution, self.sample_range_init = mode, normalized, resolution, sample_range
+ self.keep_endpoints = keep_endpoints
def fit(self, X, y=None):
"""
@@ -452,7 +479,9 @@ class Entropy(BaseEstimator, TransformerMixin):
X (list of n x 2 numpy arrays): input persistence diagrams.
y (n x 1 array): persistence diagram labels (unused).
"""
- self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
+ if self.mode == "vector":
+ _grid_from_sample_range(self, X)
+ self.step_ = self.grid_[1] - self.grid_[0]
return self
def transform(self, X):
@@ -466,8 +495,6 @@ class Entropy(BaseEstimator, TransformerMixin):
numpy array with shape (number of diagrams) x (1 if **mode** = "scalar" else **resolution**): output entropy.
"""
num_diag, Xfit = len(X), []
- x_values = np.linspace(self.sample_range[0], self.sample_range[1], self.resolution)
- step_x = x_values[1] - x_values[0]
new_X = BirthPersistenceTransform().fit_transform(X)
for i in range(num_diag):
@@ -482,8 +509,8 @@ class Entropy(BaseEstimator, TransformerMixin):
ent = np.zeros(self.resolution)
for j in range(num_pts_in_diag):
[px,py] = orig_diagram[j,:2]
- min_idx = np.clip(np.ceil((px - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
- max_idx = np.clip(np.ceil((py - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
+ min_idx = np.clip(np.ceil((px - self.sample_range[0]) / self.step_).astype(int), 0, self.resolution)
+ max_idx = np.clip(np.ceil((py - self.sample_range[0]) / self.step_).astype(int), 0, self.resolution)
ent[min_idx:max_idx]-=p[j]*np.log(p[j])
if self.normalized:
ent = ent / np.linalg.norm(ent, ord=1)
@@ -683,17 +710,17 @@ class Atol(BaseEstimator, TransformerMixin):
>>> b = np.array([[4, 2, 0], [4, 4, 0], [4, 0, 2]])
>>> c = np.array([[3, 2, -1], [1, 2, -1]])
>>> atol_vectoriser = Atol(quantiser=KMeans(n_clusters=2, random_state=202006))
- >>> atol_vectoriser.fit(X=[a, b, c]).centers # doctest: +SKIP
- >>> # array([[ 2. , 0.66666667, 3.33333333],
- >>> # [ 2.6 , 2.8 , -0.4 ]])
+ >>> atol_vectoriser.fit(X=[a, b, c]).centers
+ array([[ 2.6 , 2.8 , -0.4 ],
+ [ 2. , 0.66666667, 3.33333333]])
>>> atol_vectoriser(a)
- >>> # array([1.18168665, 0.42375966]) # doctest: +SKIP
+ array([0.42375966, 1.18168665])
>>> atol_vectoriser(c)
- >>> # array([0.02062512, 1.25157463]) # doctest: +SKIP
- >>> atol_vectoriser.transform(X=[a, b, c]) # doctest: +SKIP
- >>> # array([[1.18168665, 0.42375966],
- >>> # [0.29861028, 1.06330156],
- >>> # [0.02062512, 1.25157463]])
+ array([1.25157463, 0.02062512])
+ >>> atol_vectoriser.transform(X=[a, b, c])
+ array([[0.42375966, 1.18168665],
+ [1.06330156, 0.29861028],
+ [1.25157463, 0.02062512]])
"""
# Note the example above must be up to date with the one in tests called test_atol_doc
def __init__(self, quantiser, weighting_method="cloud", contrast="gaussian"):
@@ -744,6 +771,8 @@ class Atol(BaseEstimator, TransformerMixin):
measures_concat = np.concatenate(X)
self.quantiser.fit(X=measures_concat, sample_weight=sample_weight)
self.centers = self.quantiser.cluster_centers_
+ # Hack, but some people are unhappy if the order depends on the version of sklearn
+ self.centers = self.centers[np.lexsort(self.centers.T)]
if self.quantiser.n_clusters == 1:
dist_centers = pairwise.pairwise_distances(measures_concat)
np.fill_diagonal(dist_centers, 0)
diff --git a/src/python/gudhi/simplex_tree.pxd b/src/python/gudhi/simplex_tree.pxd
index f86f1232..5309c6fa 100644
--- a/src/python/gudhi/simplex_tree.pxd
+++ b/src/python/gudhi/simplex_tree.pxd
@@ -56,7 +56,8 @@ cdef extern from "Simplex_tree_interface.h" namespace "Gudhi":
int upper_bound_dimension() nogil
bool find_simplex(vector[int] simplex) nogil
bool insert(vector[int] simplex, double filtration) nogil
- void insert_matrix(double* filtrations, int n, int stride0, int stride1, double max_filtration) nogil
+ void insert_matrix(double* filtrations, int n, int stride0, int stride1, double max_filtration) nogil except +
+ void insert_batch_vertices(vector[int] v, double f) nogil except +
vector[pair[vector[int], double]] get_star(vector[int] simplex) nogil
vector[pair[vector[int], double]] get_cofaces(vector[int] simplex, int dimension) nogil
void expansion(int max_dim) nogil except +
diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx
index 24b970c4..4cf176f5 100644
--- a/src/python/gudhi/simplex_tree.pyx
+++ b/src/python/gudhi/simplex_tree.pyx
@@ -12,6 +12,7 @@ from libc.stdint cimport intptr_t, int32_t, int64_t
import numpy as np
cimport gudhi.simplex_tree
cimport cython
+from numpy.math cimport INFINITY
__author__ = "Vincent Rouvreau"
__copyright__ = "Copyright (C) 2016 Inria"
@@ -239,7 +240,7 @@ cdef class SimplexTree:
@staticmethod
@cython.boundscheck(False)
- def create_from_array(filtrations, double max_filtration=np.inf):
+ def create_from_array(filtrations, double max_filtration=INFINITY):
"""Creates a new, empty complex and inserts vertices and edges. The vertices are numbered from 0 to n-1, and
the filtration values are encoded in the array, with the diagonal representing the vertices. It is the
caller's responsibility to ensure that this defines a filtration, which can be achieved with either::
@@ -281,6 +282,8 @@ cdef class SimplexTree:
.. seealso:: :func:`insert_batch`
"""
+ # Without this, it could be slow if we end up inserting vertices in a bad order (flat_map).
+ self.get_ptr().insert_batch_vertices(np.unique(np.stack((edges.row, edges.col))), INFINITY)
# TODO: optimize this?
for edge in zip(edges.row, edges.col, edges.data):
self.get_ptr().insert((edge[0], edge[1]), edge[2])
@@ -303,8 +306,7 @@ cdef class SimplexTree:
:param filtrations: the filtration values.
:type filtrations: numpy.array of shape (n,)
"""
- # This may be slow if we end up inserting vertices in a bad order (flat_map).
- # We could first insert the vertices from np.unique(vertex_array), or leave it to the caller.
+ cdef vector[int] vertices = np.unique(vertex_array)
cdef Py_ssize_t k = vertex_array.shape[0]
cdef Py_ssize_t n = vertex_array.shape[1]
assert filtrations.shape[0] == n, 'inconsistent sizes for vertex_array and filtrations'
@@ -312,6 +314,9 @@ cdef class SimplexTree:
cdef Py_ssize_t j
cdef vector[int] v
with nogil:
+ # Without this, it could be slow if we end up inserting vertices in a bad order (flat_map).
+ # NaN currently does the wrong thing
+ self.get_ptr().insert_batch_vertices(vertices, INFINITY)
for i in range(n):
for j in range(k):
v.push_back(vertex_array[j, i])
@@ -466,7 +471,7 @@ cdef class SimplexTree:
"""
return self.get_ptr().prune_above_filtration(filtration)
- def expansion(self, max_dim):
+ def expansion(self, max_dimension):
"""Expands the simplex tree containing only its one skeleton
until dimension max_dim.
@@ -480,10 +485,10 @@ cdef class SimplexTree:
The simplex tree must contain no simplex of dimension bigger than
1 when calling the method.
- :param max_dim: The maximal dimension.
- :type max_dim: int
+ :param max_dimension: The maximal dimension.
+ :type max_dimension: int
"""
- cdef int maxdim = max_dim
+ cdef int maxdim = max_dimension
with nogil:
self.get_ptr().expansion(maxdim)
diff --git a/src/python/include/Alpha_complex_factory.h b/src/python/include/Alpha_complex_factory.h
index 3d20aa8f..41eb72c1 100644
--- a/src/python/include/Alpha_complex_factory.h
+++ b/src/python/include/Alpha_complex_factory.h
@@ -106,7 +106,7 @@ class Exact_alpha_complex_dD final : public Abstract_alpha_complex {
return alpha_complex_.create_complex(*simplex_tree, max_alpha_square, exact_version_, default_filtration_value);
}
- virtual std::size_t num_vertices() const {
+ virtual std::size_t num_vertices() const override {
return alpha_complex_.num_vertices();
}
@@ -141,7 +141,7 @@ class Inexact_alpha_complex_dD final : public Abstract_alpha_complex {
return alpha_complex_.create_complex(*simplex_tree, max_alpha_square, false, default_filtration_value);
}
- virtual std::size_t num_vertices() const {
+ virtual std::size_t num_vertices() const override {
return alpha_complex_.num_vertices();
}
diff --git a/src/python/setup.py.in b/src/python/setup.py.in
index 2c67c2c5..6eb0db42 100644
--- a/src/python/setup.py.in
+++ b/src/python/setup.py.in
@@ -48,10 +48,6 @@ ext_modules = cythonize(ext_modules, compiler_directives={'language_level': '3'}
for module in pybind11_modules:
my_include_dirs = include_dirs + [pybind11.get_include(False), pybind11.get_include(True)]
- if module == 'hera/wasserstein':
- my_include_dirs = ['@HERA_WASSERSTEIN_INCLUDE_DIR@'] + my_include_dirs
- elif module == 'hera/bottleneck':
- my_include_dirs = ['@HERA_BOTTLENECK_INCLUDE_DIR@'] + my_include_dirs
ext_modules.append(Extension(
'gudhi.' + module.replace('/', '.'),
sources = [source_dir + module + '.cc'],
diff --git a/src/python/test/test_persistence_graphical_tools.py b/src/python/test/test_persistence_graphical_tools.py
index c19836b7..0e2ac3f8 100644
--- a/src/python/test/test_persistence_graphical_tools.py
+++ b/src/python/test/test_persistence_graphical_tools.py
@@ -12,6 +12,7 @@ import gudhi as gd
import numpy as np
import matplotlib as plt
import pytest
+import warnings
def test_array_handler():
@@ -71,13 +72,13 @@ def test_limit_to_max_intervals():
(0, (0.0, 0.106382)),
]
# check no warnings if max_intervals equals to the diagrams number
- with pytest.warns(None) as record:
+ with warnings.catch_warnings():
+ warnings.simplefilter("error")
truncated_diags = gd.persistence_graphical_tools._limit_to_max_intervals(
diags, 10, key=lambda life_time: life_time[1][1] - life_time[1][0]
)
# check diagrams are not sorted
assert truncated_diags == diags
- assert len(record) == 0
# check warning if max_intervals lower than the diagrams number
with pytest.warns(UserWarning) as record:
diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py
index 58caab21..ae0362f8 100755
--- a/src/python/test/test_representations.py
+++ b/src/python/test/test_representations.py
@@ -161,7 +161,7 @@ def test_entropy_miscalculation():
return -np.dot(l, np.log(l))
sce = Entropy(mode="scalar")
assert [[pe(diag_ex)]] == sce.fit_transform([diag_ex])
- sce = Entropy(mode="vector", resolution=4, normalized=False)
+ sce = Entropy(mode="vector", resolution=4, normalized=False, keep_endpoints=True)
pef = [-1/4*np.log(1/4)-1/4*np.log(1/4)-1/2*np.log(1/2),
-1/4*np.log(1/4)-1/4*np.log(1/4)-1/2*np.log(1/2),
-1/2*np.log(1/2),
@@ -170,7 +170,7 @@ def test_entropy_miscalculation():
sce = Entropy(mode="vector", resolution=4, normalized=True)
pefN = (sce.fit_transform([diag_ex]))[0]
area = np.linalg.norm(pefN, ord=1)
- assert area==1
+ assert area==pytest.approx(1)
def test_kernel_empty_diagrams():
empty_diag = np.empty(shape = [0, 2])
@@ -251,3 +251,15 @@ def test_landscape_nan_range():
lds_dgm = lds(dgm)
assert (lds.sample_range[0] == 2) & (lds.sample_range[1] == 6)
assert lds.new_resolution == 10
+
+def test_endpoints():
+ diags = [ np.array([[2., 3.]]) ]
+ for vec in [ Landscape(), Silhouette(), BettiCurve(), Entropy(mode="vector") ]:
+ vec.fit(diags)
+ assert vec.grid_[0] > 2 and vec.grid_[-1] < 3
+ for vec in [ Landscape(keep_endpoints=True), Silhouette(keep_endpoints=True), BettiCurve(keep_endpoints=True), Entropy(mode="vector", keep_endpoints=True)]:
+ vec.fit(diags)
+ assert vec.grid_[0] == 2 and vec.grid_[-1] == 3
+ vec = BettiCurve(resolution=None)
+ vec.fit(diags)
+ assert np.equal(vec.grid_, [-np.inf, 2., 3.]).all()
diff --git a/src/python/test/test_wasserstein_distance.py b/src/python/test/test_wasserstein_distance.py
index 3a004d77..a76b6ce7 100755
--- a/src/python/test/test_wasserstein_distance.py
+++ b/src/python/test/test_wasserstein_distance.py
@@ -90,10 +90,11 @@ def test_get_essential_parts():
def test_warn_infty():
- assert _warn_infty(matching=False)==np.inf
- c, m = _warn_infty(matching=True)
- assert (c == np.inf)
- assert (m is None)
+ with pytest.warns(UserWarning):
+ assert _warn_infty(matching=False)==np.inf
+ c, m = _warn_infty(matching=True)
+ assert (c == np.inf)
+ assert (m is None)
def _basic_wasserstein(wasserstein_distance, delta, test_infinity=True, test_matching=True):