summaryrefslogtreecommitdiff
path: root/src/python
diff options
context:
space:
mode:
Diffstat (limited to 'src/python')
-rw-r--r--src/python/doc/bottleneck_distance_user.rst4
-rw-r--r--src/python/doc/cubical_complex_user.rst22
-rw-r--r--src/python/doc/representations.rst64
-rw-r--r--src/python/doc/wasserstein_distance_user.rst7
-rw-r--r--src/python/gudhi/representations/vector_methods.py17
-rw-r--r--src/python/gudhi/simplex_tree.pyx41
-rw-r--r--src/python/gudhi/subsampling.pyx2
-rw-r--r--src/python/include/Alpha_complex_factory.h9
-rwxr-xr-xsrc/python/test/test_bottleneck_distance.py12
-rwxr-xr-xsrc/python/test/test_representations.py20
-rwxr-xr-xsrc/python/test/test_subsampling.py16
11 files changed, 134 insertions, 80 deletions
diff --git a/src/python/doc/bottleneck_distance_user.rst b/src/python/doc/bottleneck_distance_user.rst
index 6c6e08d9..7baa76cc 100644
--- a/src/python/doc/bottleneck_distance_user.rst
+++ b/src/python/doc/bottleneck_distance_user.rst
@@ -47,7 +47,7 @@ The following example explains how the distance is computed:
:figclass: align-center
The point (0, 13) is at distance 6.5 from the diagonal and more
- specifically from the point (6.5, 6.5)
+ specifically from the point (6.5, 6.5).
Basic example
@@ -72,6 +72,6 @@ The output is:
.. testoutput::
- Bottleneck distance approximation = 0.81
+ Bottleneck distance approximation = 0.72
Bottleneck distance value = 0.75
diff --git a/src/python/doc/cubical_complex_user.rst b/src/python/doc/cubical_complex_user.rst
index 3fd4e27a..6a211347 100644
--- a/src/python/doc/cubical_complex_user.rst
+++ b/src/python/doc/cubical_complex_user.rst
@@ -47,8 +47,8 @@ be a set of two elements).
For further details and theory of cubical complexes, please consult :cite:`kaczynski2004computational` as well as the
following paper :cite:`peikert2012topological`.
-Data structure.
----------------
+Data structure
+--------------
The implementation of Cubical complex provides a representation of complexes that occupy a rectangular region in
:math:`\mathbb{R}^n`. This extra assumption allows for a memory efficient way of storing cubical complexes in a form
@@ -77,8 +77,8 @@ Knowing the sizes of the bitmap, by a series of modulo operation, we can determi
present in the product that gives the cube :math:`C`. In a similar way, we can compute boundary and the coboundary of
each cube. Further details can be found in the literature.
-Input Format.
--------------
+Input Format
+------------
In the current implantation, filtration is given at the maximal cubes, and it is then extended by the lower star
filtration to all cubes. There are a number of constructors that can be used to construct cubical complex by users
@@ -108,8 +108,8 @@ the program output is:
Cubical complex is of dimension 2 - 49 simplices.
-Periodic boundary conditions.
------------------------------
+Periodic boundary conditions
+----------------------------
Often one would like to impose periodic boundary conditions to the cubical complex (cf.
:doc:`periodic_cubical_complex_ref`).
@@ -154,7 +154,13 @@ the program output is:
Periodic cubical complex is of dimension 2 - 42 simplices.
-Examples.
----------
+Examples
+--------
End user programs are available in python/example/ folder.
+
+Tutorial
+--------
+
+This `notebook <https://github.com/GUDHI/TDA-tutorial/blob/master/Tuto-GUDHI-cubical-complexes.ipynb>`_
+explains how to represent sublevels sets of functions using cubical complexes. \ No newline at end of file
diff --git a/src/python/doc/representations.rst b/src/python/doc/representations.rst
index 041e3247..b0477197 100644
--- a/src/python/doc/representations.rst
+++ b/src/python/doc/representations.rst
@@ -12,11 +12,45 @@ This module, originally available at https://github.com/MathieuCarriere/sklearn-
A diagram is represented as a numpy array of shape (n,2), as can be obtained from :func:`~gudhi.SimplexTree.persistence_intervals_in_dimension` for instance. Points at infinity are represented as a numpy array of shape (n,1), storing only the birth time. The classes in this module can handle several persistence diagrams at once. In that case, the diagrams are provided as a list of numpy arrays. Note that it is not necessary for the diagrams to have the same number of points, i.e., for the corresponding arrays to have the same number of rows: all classes can handle arrays with different shapes.
-A small example is provided
+Examples
+--------
-.. only:: builder_html
+Landscapes
+^^^^^^^^^^
- * :download:`diagram_vectorizations_distances_kernels.py <../example/diagram_vectorizations_distances_kernels.py>`
+This example computes the first two Landscapes associated to a persistence diagram with four points. The landscapes are evaluated on ten samples, leading to two vectors with ten coordinates each, that are eventually concatenated in order to produce a single vector representation.
+
+.. testcode::
+
+ import numpy as np
+ from gudhi.representations import Landscape
+ # A single diagram with 4 points
+ D = np.array([[0.,4.],[1.,2.],[3.,8.],[6.,8.]])
+ diags = [D]
+ l=Landscape(num_landscapes=2,resolution=10).fit_transform(diags)
+ print(l)
+
+The output is:
+
+.. testoutput::
+
+ [[1.02851895 2.05703791 2.57129739 1.54277843 0.89995409 1.92847304
+ 2.95699199 3.08555686 2.05703791 1.02851895 0. 0.64282435
+ 0. 0. 0.51425948 0. 0. 0.
+ 0.77138922 1.02851895]]
+
+Various kernels
+^^^^^^^^^^^^^^^
+
+This small example is also provided
+:download:`diagram_vectorizations_distances_kernels.py <../example/diagram_vectorizations_distances_kernels.py>`
+
+Machine Learning and Topological Data Analysis
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This `notebook <https://github.com/GUDHI/TDA-tutorial/blob/master/Tuto-GUDHI-representations.ipynb>`_ explains how to
+efficiently combine machine learning and topological data analysis with the
+:doc:`representations module<representations>`.
Preprocessing
@@ -46,27 +80,3 @@ Metrics
:members:
:special-members:
:show-inheritance:
-
-Basic example
--------------
-
-This example computes the first two Landscapes associated to a persistence diagram with four points. The landscapes are evaluated on ten samples, leading to two vectors with ten coordinates each, that are eventually concatenated in order to produce a single vector representation.
-
-.. testcode::
-
- import numpy as np
- from gudhi.representations import Landscape
- # A single diagram with 4 points
- D = np.array([[0.,4.],[1.,2.],[3.,8.],[6.,8.]])
- diags = [D]
- l=Landscape(num_landscapes=2,resolution=10).fit_transform(diags)
- print(l)
-
-The output is:
-
-.. testoutput::
-
- [[1.02851895 2.05703791 2.57129739 1.54277843 0.89995409 1.92847304
- 2.95699199 3.08555686 2.05703791 1.02851895 0. 0.64282435
- 0. 0. 0.51425948 0. 0. 0.
- 0.77138922 1.02851895]]
diff --git a/src/python/doc/wasserstein_distance_user.rst b/src/python/doc/wasserstein_distance_user.rst
index 96ec7872..9ffc2759 100644
--- a/src/python/doc/wasserstein_distance_user.rst
+++ b/src/python/doc/wasserstein_distance_user.rst
@@ -175,3 +175,10 @@ The output is:
[[0.27916667 0.55416667]
[0.7375 0.7625 ]
[0.2375 0.2625 ]]
+
+Tutorial
+********
+
+This
+`notebook <https://github.com/GUDHI/TDA-tutorial/blob/master/Tuto-GUDHI-Barycenters-of-persistence-diagrams.ipynb>`_
+presents the concept of barycenter, or Fréchet mean, of a family of persistence diagrams. \ No newline at end of file
diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 5ca127f6..cdcb1fde 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -323,22 +323,15 @@ class BettiCurve(BaseEstimator, TransformerMixin):
Returns:
numpy array with shape (number of diagrams) x (**resolution**): output Betti curves.
"""
- num_diag, Xfit = len(X), []
+ Xfit = []
x_values = np.linspace(self.sample_range[0], self.sample_range[1], self.resolution)
step_x = x_values[1] - x_values[0]
- for i in range(num_diag):
-
- diagram, num_pts_in_diag = X[i], X[i].shape[0]
-
+ for diagram in X:
+ diagram_int = np.clip(np.ceil((diagram[:,:2] - self.sample_range[0]) / step_x), 0, self.resolution).astype(int)
bc = np.zeros(self.resolution)
- for j in range(num_pts_in_diag):
- [px,py] = diagram[j,:2]
- min_idx = np.clip(np.ceil((px - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
- max_idx = np.clip(np.ceil((py - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
- for k in range(min_idx, max_idx):
- bc[k] += 1
-
+ for interval in diagram_int:
+ bc[interval[0]:interval[1]] += 1
Xfit.append(np.reshape(bc,[1,-1]))
Xfit = np.concatenate(Xfit, 0)
diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx
index 813dc5c2..cdd2e87b 100644
--- a/src/python/gudhi/simplex_tree.pyx
+++ b/src/python/gudhi/simplex_tree.pyx
@@ -389,37 +389,39 @@ cdef class SimplexTree:
self.get_ptr().reset_filtration(filtration, min_dim)
def extend_filtration(self):
- """ Extend filtration for computing extended persistence. This function only uses the
- filtration values at the 0-dimensional simplices, and computes the extended persistence
- diagram induced by the lower-star filtration computed with these values.
+ """ Extend filtration for computing extended persistence. This function only uses the filtration values at the
+ 0-dimensional simplices, and computes the extended persistence diagram induced by the lower-star filtration
+ computed with these values.
.. note::
- Note that after calling this function, the filtration
- values are actually modified within the simplex tree.
- The function :func:`extended_persistence`
- retrieves the original values.
+ Note that after calling this function, the filtration values are actually modified within the simplex tree.
+ The function :func:`extended_persistence` retrieves the original values.
.. note::
- Note that this code creates an extra vertex internally, so you should make sure that
- the simplex tree does not contain a vertex with the largest possible value (i.e., 4294967295).
+ Note that this code creates an extra vertex internally, so you should make sure that the simplex tree does
+ not contain a vertex with the largest possible value (i.e., 4294967295).
+
+ This `notebook <https://github.com/GUDHI/TDA-tutorial/blob/master/Tuto-GUDHI-extended-persistence.ipynb>`_
+ explains how to compute an extension of persistence called extended persistence.
"""
self.get_ptr().compute_extended_filtration()
def extended_persistence(self, homology_coeff_field=11, min_persistence=0):
- """This function retrieves good values for extended persistence, and separate the diagrams
- into the Ordinary, Relative, Extended+ and Extended- subdiagrams.
+ """This function retrieves good values for extended persistence, and separate the diagrams into the Ordinary,
+ Relative, Extended+ and Extended- subdiagrams.
- :param homology_coeff_field: The homology coefficient field. Must be a
- prime number. Default value is 11.
+ :param homology_coeff_field: The homology coefficient field. Must be a prime number. Default value is 11.
:type homology_coeff_field: int
- :param min_persistence: The minimum persistence value (i.e., the absolute value of the difference between the persistence diagram point coordinates) to take into
- account (strictly greater than min_persistence). Default value is
- 0.0.
- Sets min_persistence to -1.0 to see all values.
+ :param min_persistence: The minimum persistence value (i.e., the absolute value of the difference between the
+ persistence diagram point coordinates) to take into account (strictly greater than min_persistence).
+ Default value is 0.0. Sets min_persistence to -1.0 to see all values.
:type min_persistence: float
- :returns: A list of four persistence diagrams in the format described in :func:`persistence`. The first one is Ordinary, the second one is Relative, the third one is Extended+ and the fourth one is Extended-. See https://link.springer.com/article/10.1007/s10208-008-9027-z and/or section 2.2 in https://link.springer.com/article/10.1007/s10208-017-9370-z for a description of these subtypes.
+ :returns: A list of four persistence diagrams in the format described in :func:`persistence`. The first one is
+ Ordinary, the second one is Relative, the third one is Extended+ and the fourth one is Extended-.
+ See https://link.springer.com/article/10.1007/s10208-008-9027-z and/or section 2.2 in
+ https://link.springer.com/article/10.1007/s10208-017-9370-z for a description of these subtypes.
.. note::
@@ -430,6 +432,9 @@ cdef class SimplexTree:
The coordinates of the persistence diagram points might be a little different than the
original filtration values due to the internal transformation (scaling to [-2,-1]) that is
performed on these values during the computation of extended persistence.
+
+ This `notebook <https://github.com/GUDHI/TDA-tutorial/blob/master/Tuto-GUDHI-extended-persistence.ipynb>`_
+ explains how to compute an extension of persistence called extended persistence.
"""
cdef vector[pair[int, pair[double, double]]] persistence_result
if self.pcohptr != NULL:
diff --git a/src/python/gudhi/subsampling.pyx b/src/python/gudhi/subsampling.pyx
index b11d07e5..46f32335 100644
--- a/src/python/gudhi/subsampling.pyx
+++ b/src/python/gudhi/subsampling.pyx
@@ -105,7 +105,7 @@ def pick_n_random_points(points=None, off_file='', nb_points=0):
def sparsify_point_set(points=None, off_file='', min_squared_dist=0.0):
"""Outputs a subset of the input points so that the squared distance
- between any two points is greater than or equal to min_squared_dist.
+ between any two points is greater than min_squared_dist.
:param points: The input point set.
:type points: Iterable[Iterable[float]]
diff --git a/src/python/include/Alpha_complex_factory.h b/src/python/include/Alpha_complex_factory.h
index d699ad9b..3405fdd6 100644
--- a/src/python/include/Alpha_complex_factory.h
+++ b/src/python/include/Alpha_complex_factory.h
@@ -48,11 +48,14 @@ static CgalPointType pt_cython_to_cgal(std::vector<double> const& vec) {
class Abstract_alpha_complex {
public:
virtual std::vector<double> get_point(int vh) = 0;
+
virtual bool create_simplex_tree(Simplex_tree_interface<>* simplex_tree, double max_alpha_square,
bool default_filtration_value) = 0;
+
+ virtual ~Abstract_alpha_complex() = default;
};
-class Exact_Alphacomplex_dD : public Abstract_alpha_complex {
+class Exact_Alphacomplex_dD final : public Abstract_alpha_complex {
private:
using Kernel = CGAL::Epeck_d<CGAL::Dynamic_dimension_tag>;
using Point = typename Kernel::Point_d;
@@ -78,7 +81,7 @@ class Exact_Alphacomplex_dD : public Abstract_alpha_complex {
Alpha_complex<Kernel> alpha_complex_;
};
-class Inexact_Alphacomplex_dD : public Abstract_alpha_complex {
+class Inexact_Alphacomplex_dD final : public Abstract_alpha_complex {
private:
using Kernel = CGAL::Epick_d<CGAL::Dynamic_dimension_tag>;
using Point = typename Kernel::Point_d;
@@ -104,7 +107,7 @@ class Inexact_Alphacomplex_dD : public Abstract_alpha_complex {
};
template <complexity Complexity>
-class Alphacomplex_3D : public Abstract_alpha_complex {
+class Alphacomplex_3D final : public Abstract_alpha_complex {
private:
using Point = typename Alpha_complex_3d<Complexity, false, false>::Bare_point_3;
diff --git a/src/python/test/test_bottleneck_distance.py b/src/python/test/test_bottleneck_distance.py
index 6915bea8..07fcc9cc 100755
--- a/src/python/test/test_bottleneck_distance.py
+++ b/src/python/test/test_bottleneck_distance.py
@@ -25,3 +25,15 @@ def test_basic_bottleneck():
assert gudhi.bottleneck_distance(diag1, diag2, 0.1) == pytest.approx(0.75, abs=0.1)
assert gudhi.hera.bottleneck_distance(diag1, diag2, 0) == 0.75
assert gudhi.hera.bottleneck_distance(diag1, diag2, 0.1) == pytest.approx(0.75, rel=0.1)
+
+ import numpy as np
+
+ # Translating both diagrams along the diagonal should not affect the distance,
+ # checks that negative numbers are not an issue
+ diag1 = np.array(diag1) - 100
+ diag2 = np.array(diag2) - 100
+
+ assert gudhi.bottleneck_distance(diag1, diag2) == 0.75
+ assert gudhi.bottleneck_distance(diag1, diag2, 0.1) == pytest.approx(0.75, abs=0.1)
+ assert gudhi.hera.bottleneck_distance(diag1, diag2, 0) == 0.75
+ assert gudhi.hera.bottleneck_distance(diag1, diag2, 0.1) == pytest.approx(0.75, rel=0.1)
diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py
index e5c211a0..43c914f3 100755
--- a/src/python/test/test_representations.py
+++ b/src/python/test/test_representations.py
@@ -39,11 +39,11 @@ def test_multiple():
d2 = BottleneckDistance(epsilon=0.00001).fit_transform(l1)
d3 = pairwise_persistence_diagram_distances(l1, l1b, e=0.00001, n_jobs=4)
assert d1 == pytest.approx(d2)
- assert d3 == pytest.approx(d2, abs=1e-5) # Because of 0 entries (on the diagonal)
+ assert d3 == pytest.approx(d2, abs=1e-5) # Because of 0 entries (on the diagonal)
d1 = pairwise_persistence_diagram_distances(l1, l2, metric="wasserstein", order=2, internal_p=2)
d2 = WassersteinDistance(order=2, internal_p=2, n_jobs=4).fit(l2).transform(l1)
print(d1.shape, d2.shape)
- assert d1 == pytest.approx(d2, rel=.02)
+ assert d1 == pytest.approx(d2, rel=0.02)
def test_dummy_atol():
@@ -53,8 +53,22 @@ def test_dummy_atol():
for weighting_method in ["cloud", "iidproba"]:
for contrast in ["gaussian", "laplacian", "indicator"]:
- atol_vectoriser = Atol(quantiser=KMeans(n_clusters=1, random_state=202006), weighting_method=weighting_method, contrast=contrast)
+ atol_vectoriser = Atol(
+ quantiser=KMeans(n_clusters=1, random_state=202006),
+ weighting_method=weighting_method,
+ contrast=contrast,
+ )
atol_vectoriser.fit([a, b, c])
atol_vectoriser(a)
atol_vectoriser.transform(X=[a, b, c])
+
+from gudhi.representations.vector_methods import BettiCurve
+
+
+def test_infinity():
+ a = np.array([[1.0, 8.0], [2.0, np.inf], [3.0, 4.0]])
+ c = BettiCurve(20, [0.0, 10.0])(a)
+ assert c[1] == 0
+ assert c[7] == 3
+ assert c[9] == 2
diff --git a/src/python/test/test_subsampling.py b/src/python/test/test_subsampling.py
index 31f64e32..4019852e 100755
--- a/src/python/test/test_subsampling.py
+++ b/src/python/test/test_subsampling.py
@@ -141,12 +141,16 @@ def test_simple_sparsify_points():
# assert gudhi.sparsify_point_set(points = [], min_squared_dist = 0.0) == []
# assert gudhi.sparsify_point_set(points = [], min_squared_dist = 10.0) == []
assert gudhi.sparsify_point_set(points=point_set, min_squared_dist=0.0) == point_set
- assert gudhi.sparsify_point_set(points=point_set, min_squared_dist=1.0) == point_set
- assert gudhi.sparsify_point_set(points=point_set, min_squared_dist=2.0) == [
+ assert gudhi.sparsify_point_set(points=point_set, min_squared_dist=0.999) == point_set
+ assert gudhi.sparsify_point_set(points=point_set, min_squared_dist=1.001) == [
[0, 1],
[1, 0],
]
- assert gudhi.sparsify_point_set(points=point_set, min_squared_dist=2.01) == [[0, 1]]
+ assert gudhi.sparsify_point_set(points=point_set, min_squared_dist=1.999) == [
+ [0, 1],
+ [1, 0],
+ ]
+ assert gudhi.sparsify_point_set(points=point_set, min_squared_dist=2.001) == [[0, 1]]
assert (
len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=0.0))
@@ -157,11 +161,11 @@ def test_simple_sparsify_points():
== 5
)
assert (
- len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=40.0))
+ len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=40.1))
== 4
)
assert (
- len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=90.0))
+ len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=89.9))
== 3
)
assert (
@@ -169,7 +173,7 @@ def test_simple_sparsify_points():
== 2
)
assert (
- len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=325.0))
+ len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=324.9))
== 2
)
assert (