summaryrefslogtreecommitdiff
path: root/src/python/gudhi
diff options
context:
space:
mode:
authorVincent Rouvreau <vincent.rouvreau@inria.fr>2022-03-08 10:40:01 +0100
committerVincent Rouvreau <vincent.rouvreau@inria.fr>2022-03-08 10:40:01 +0100
commit6cb016c0ceff231c001928f641d344fc92c44b73 (patch)
tree5521dc1396347616a3644f96e6a9f96845c593e1 /src/python/gudhi
parent69168e8ed24165ab89ea1c57bc21dd994c93dd8e (diff)
parentbbff86f1218fc7bc9976353901aa94cfa54792f6 (diff)
Merge master and resolve commits
Diffstat (limited to 'src/python/gudhi')
-rw-r--r--src/python/gudhi/__init__.py.in4
-rw-r--r--src/python/gudhi/alpha_complex.pyx62
-rw-r--r--src/python/gudhi/clustering/tomato.py4
-rw-r--r--src/python/gudhi/cubical_complex.pyx12
-rw-r--r--src/python/gudhi/datasets/__init__.py0
-rw-r--r--src/python/gudhi/datasets/generators/__init__.py0
-rw-r--r--src/python/gudhi/datasets/generators/_points.cc121
-rw-r--r--src/python/gudhi/datasets/generators/points.py59
-rw-r--r--src/python/gudhi/periodic_cubical_complex.pyx12
-rw-r--r--src/python/gudhi/point_cloud/knn.py12
-rw-r--r--src/python/gudhi/representations/vector_methods.py257
-rw-r--r--src/python/gudhi/rips_complex.pyx8
-rw-r--r--src/python/gudhi/simplex_tree.pxd16
-rw-r--r--src/python/gudhi/simplex_tree.pyx156
-rw-r--r--src/python/gudhi/subsampling.pyx23
-rw-r--r--src/python/gudhi/wasserstein/wasserstein.py222
-rw-r--r--src/python/gudhi/weighted_rips_complex.py6
17 files changed, 783 insertions, 191 deletions
diff --git a/src/python/gudhi/__init__.py.in b/src/python/gudhi/__init__.py.in
index 79e12fbc..3043201a 100644
--- a/src/python/gudhi/__init__.py.in
+++ b/src/python/gudhi/__init__.py.in
@@ -23,6 +23,10 @@ __all__ = [@GUDHI_PYTHON_MODULES@ @GUDHI_PYTHON_MODULES_EXTRA@]
__available_modules = ''
__missing_modules = ''
+# For unitary tests purpose
+# could use "if 'collapse_edges' in gudhi.__all__" when collapse edges will have a python module
+__GUDHI_USE_EIGEN3 = @GUDHI_USE_EIGEN3@
+
# Try to import * from gudhi.__module_name for default modules.
# Extra modules require an explicit import by the user (mostly because of
# unusual dependencies, but also to avoid cluttering namespace gudhi and
diff --git a/src/python/gudhi/alpha_complex.pyx b/src/python/gudhi/alpha_complex.pyx
index ea128743..a4888914 100644
--- a/src/python/gudhi/alpha_complex.pyx
+++ b/src/python/gudhi/alpha_complex.pyx
@@ -16,7 +16,7 @@ from libcpp.utility cimport pair
from libcpp.string cimport string
from libcpp cimport bool
from libc.stdint cimport intptr_t
-import os
+import warnings
from gudhi.simplex_tree cimport *
from gudhi.simplex_tree import SimplexTree
@@ -28,66 +28,72 @@ __license__ = "GPL v3"
cdef extern from "Alpha_complex_interface.h" namespace "Gudhi":
cdef cppclass Alpha_complex_interface "Gudhi::alpha_complex::Alpha_complex_interface":
- Alpha_complex_interface(vector[vector[double]] points, bool fast_version, bool exact_version) nogil except +
+ Alpha_complex_interface(vector[vector[double]] points, vector[double] weights, bool fast_version, bool exact_version) nogil except +
vector[double] get_point(int vertex) nogil except +
void create_simplex_tree(Simplex_tree_interface_full_featured* simplex_tree, double max_alpha_square, bool default_filtration_value) nogil except +
# AlphaComplex python interface
cdef class AlphaComplex:
- """AlphaComplex is a simplicial complex constructed from the finite cells
- of a Delaunay Triangulation.
+ """AlphaComplex is a simplicial complex constructed from the finite cells of a Delaunay Triangulation.
- The filtration value of each simplex is computed as the square of the
- circumradius of the simplex if the circumsphere is empty (the simplex is
- then said to be Gabriel), and as the minimum of the filtration values of
- the codimension 1 cofaces that make it not Gabriel otherwise.
+ The filtration value of each simplex is computed as the square of the circumradius of the simplex if the
+ circumsphere is empty (the simplex is then said to be Gabriel), and as the minimum of the filtration values of the
+ codimension 1 cofaces that make it not Gabriel otherwise.
- All simplices that have a filtration value strictly greater than a given
- alpha squared value are not inserted into the complex.
+ All simplices that have a filtration value strictly greater than a given alpha squared value are not inserted into
+ the complex.
.. note::
- When Alpha_complex is constructed with an infinite value of alpha, the
- complex is a Delaunay complex.
-
+ When Alpha_complex is constructed with an infinite value of alpha, the complex is a Delaunay complex.
"""
cdef Alpha_complex_interface * this_ptr
# Fake constructor that does nothing but documenting the constructor
- def __init__(self, points=None, off_file='', precision='safe'):
+ def __init__(self, points=[], off_file='', weights=None, precision='safe'):
"""AlphaComplex constructor.
:param points: A list of points in d-Dimension.
- :type points: list of list of double
-
- Or
+ :type points: Iterable[Iterable[float]]
- :param off_file: An OFF file style name.
+ :param off_file: **[deprecated]** An `OFF file style <fileformats.html#off-file-format>`_ name.
+ If an `off_file` is given with `points` as arguments, only points from the file are taken into account.
:type off_file: string
+ :param weights: A list of weights. If set, the number of weights must correspond to the number of points.
+ :type weights: Iterable[float]
+
:param precision: Alpha complex precision can be 'fast', 'safe' or 'exact'. Default is 'safe'.
:type precision: string
+
+ :raises FileNotFoundError: **[deprecated]** If `off_file` is set but not found.
+ :raises ValueError: In case of inconsistency between the number of points and weights.
"""
# The real cython constructor
- def __cinit__(self, points = None, off_file = '', precision = 'safe'):
+ def __cinit__(self, points = [], off_file = '', weights=None, precision = 'safe'):
assert precision in ['fast', 'safe', 'exact'], "Alpha complex precision can only be 'fast', 'safe' or 'exact'"
cdef bool fast = precision == 'fast'
cdef bool exact = precision == 'exact'
- cdef vector[vector[double]] pts
if off_file:
- if os.path.isfile(off_file):
- points = read_points_from_off_file(off_file = off_file)
- else:
- print("file " + off_file + " not found.")
- if points is None:
- # Empty Alpha construction
- points=[]
+ warnings.warn("off_file is a deprecated parameter, please consider using gudhi.read_points_from_off_file",
+ DeprecationWarning)
+ points = read_points_from_off_file(off_file = off_file)
+
+ # weights are set but is inconsistent with the number of points
+ if weights != None and len(weights) != len(points):
+ raise ValueError("Inconsistency between the number of points and weights")
+
+ # need to copy the points to use them without the gil
+ cdef vector[vector[double]] pts
+ cdef vector[double] wgts
pts = points
+ if weights != None:
+ wgts = weights
with nogil:
- self.this_ptr = new Alpha_complex_interface(pts, fast, exact)
+ self.this_ptr = new Alpha_complex_interface(pts, wgts, fast, exact)
def __dealloc__(self):
if self.this_ptr != NULL:
diff --git a/src/python/gudhi/clustering/tomato.py b/src/python/gudhi/clustering/tomato.py
index fbba3cc8..d0e9995c 100644
--- a/src/python/gudhi/clustering/tomato.py
+++ b/src/python/gudhi/clustering/tomato.py
@@ -271,7 +271,7 @@ class Tomato:
l = self.max_weight_per_cc_.min()
r = self.max_weight_per_cc_.max()
if self.diagram_.size > 0:
- plt.plot(self.diagram_[:, 0], self.diagram_[:, 1], "ro")
+ plt.plot(self.diagram_[:, 0], self.diagram_[:, 1], "o", color="red")
l = min(l, self.diagram_[:, 1].min())
r = max(r, self.diagram_[:, 0].max())
if l == r:
@@ -283,7 +283,7 @@ class Tomato:
l, r = -1.0, 1.0
plt.plot([l, r], [l, r])
plt.plot(
- self.max_weight_per_cc_, numpy.full(self.max_weight_per_cc_.shape, 1.1 * l - 0.1 * r), "ro", color="green"
+ self.max_weight_per_cc_, numpy.full(self.max_weight_per_cc_.shape, 1.1 * l - 0.1 * r), "o", color="green"
)
plt.show()
diff --git a/src/python/gudhi/cubical_complex.pyx b/src/python/gudhi/cubical_complex.pyx
index 28fbe3af..8e244bb8 100644
--- a/src/python/gudhi/cubical_complex.pyx
+++ b/src/python/gudhi/cubical_complex.pyx
@@ -35,7 +35,7 @@ cdef extern from "Cubical_complex_interface.h" namespace "Gudhi":
cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi":
cdef cppclass Cubical_complex_persistence_interface "Gudhi::Persistent_cohomology_interface<Gudhi::Cubical_complex::Cubical_complex_interface<>>":
Cubical_complex_persistence_interface(Bitmap_cubical_complex_base_interface * st, bool persistence_dim_max) nogil
- void compute_persistence(int homology_coeff_field, double min_persistence) nogil
+ void compute_persistence(int homology_coeff_field, double min_persistence) nogil except+
vector[pair[int, pair[double, double]]] get_persistence() nogil
vector[vector[int]] cofaces_of_cubical_persistence_pairs() nogil
vector[int] betti_numbers() nogil
@@ -147,7 +147,7 @@ cdef class CubicalComplex:
:func:`persistence` returns.
:param homology_coeff_field: The homology coefficient field. Must be a
- prime number
+ prime number. Default value is 11. Max is 46337.
:type homology_coeff_field: int.
:param min_persistence: The minimum persistence value to take into
account (strictly greater than min_persistence). Default value is
@@ -169,7 +169,7 @@ cdef class CubicalComplex:
"""This function computes and returns the persistence of the complex.
:param homology_coeff_field: The homology coefficient field. Must be a
- prime number
+ prime number. Default value is 11. Max is 46337.
:type homology_coeff_field: int.
:param min_persistence: The minimum persistence value to take into
account (strictly greater than min_persistence). Default value is
@@ -281,4 +281,8 @@ cdef class CubicalComplex:
launched first.
"""
assert self.pcohptr != NULL, "compute_persistence() must be called before persistence_intervals_in_dimension()"
- return np.array(self.pcohptr.intervals_in_dimension(dimension))
+ piid = np.array(self.pcohptr.intervals_in_dimension(dimension))
+ # Workaround https://github.com/GUDHI/gudhi-devel/issues/507
+ if len(piid) == 0:
+ return np.empty(shape = [0, 2])
+ return piid
diff --git a/src/python/gudhi/datasets/__init__.py b/src/python/gudhi/datasets/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/src/python/gudhi/datasets/__init__.py
diff --git a/src/python/gudhi/datasets/generators/__init__.py b/src/python/gudhi/datasets/generators/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/src/python/gudhi/datasets/generators/__init__.py
diff --git a/src/python/gudhi/datasets/generators/_points.cc b/src/python/gudhi/datasets/generators/_points.cc
new file mode 100644
index 00000000..82fea25b
--- /dev/null
+++ b/src/python/gudhi/datasets/generators/_points.cc
@@ -0,0 +1,121 @@
+/* This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+ * See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+ * Author(s): Hind Montassif
+ *
+ * Copyright (C) 2021 Inria
+ *
+ * Modification(s):
+ * - YYYY/MM Author: Description of the modification
+ */
+
+#include <pybind11/pybind11.h>
+#include <pybind11/numpy.h>
+
+#include <gudhi/random_point_generators.h>
+#include <gudhi/Debug_utils.h>
+
+#include <CGAL/Epick_d.h>
+
+namespace py = pybind11;
+
+
+typedef CGAL::Epick_d< CGAL::Dynamic_dimension_tag > Kern;
+
+py::array_t<double> generate_points_on_sphere(size_t n_samples, int ambient_dim, double radius, std::string sample) {
+
+ if (sample != "random") {
+ throw pybind11::value_error("This sample type is not supported");
+ }
+
+ py::array_t<double> points({n_samples, (size_t)ambient_dim});
+
+ py::buffer_info buf = points.request();
+ double *ptr = static_cast<double *>(buf.ptr);
+
+ GUDHI_CHECK(n_samples == buf.shape[0], "Py array first dimension not matching n_samples on sphere");
+ GUDHI_CHECK(ambient_dim == buf.shape[1], "Py array second dimension not matching the ambient space dimension");
+
+
+ std::vector<typename Kern::Point_d> points_generated;
+
+ {
+ py::gil_scoped_release release;
+ points_generated = Gudhi::generate_points_on_sphere_d<Kern>(n_samples, ambient_dim, radius);
+ }
+
+ for (size_t i = 0; i < n_samples; i++)
+ for (int j = 0; j < ambient_dim; j++)
+ ptr[i*ambient_dim+j] = points_generated[i][j];
+
+ return points;
+}
+
+py::array_t<double> generate_points_on_torus(size_t n_samples, int dim, std::string sample) {
+
+ if ( (sample != "random") && (sample != "grid")) {
+ throw pybind11::value_error("This sample type is not supported");
+ }
+
+ std::vector<typename Kern::Point_d> points_generated;
+
+ {
+ py::gil_scoped_release release;
+ points_generated = Gudhi::generate_points_on_torus_d<Kern>(n_samples, dim, sample);
+ }
+
+ size_t npoints = points_generated.size();
+
+ GUDHI_CHECK(2*dim == points_generated[0].size(), "Py array second dimension not matching the double torus dimension");
+
+ py::array_t<double> points({npoints, (size_t)2*dim});
+
+ py::buffer_info buf = points.request();
+ double *ptr = static_cast<double *>(buf.ptr);
+
+ for (size_t i = 0; i < npoints; i++)
+ for (int j = 0; j < 2*dim; j++)
+ ptr[i*(2*dim)+j] = points_generated[i][j];
+
+ return points;
+}
+
+PYBIND11_MODULE(_points, m) {
+ m.attr("__license__") = "LGPL v3";
+
+ m.def("sphere", &generate_points_on_sphere,
+ py::arg("n_samples"), py::arg("ambient_dim"),
+ py::arg("radius") = 1., py::arg("sample") = "random",
+ R"pbdoc(
+ Generate random i.i.d. points uniformly on a (d-1)-sphere in R^d
+
+ :param n_samples: The number of points to be generated.
+ :type n_samples: integer
+ :param ambient_dim: The ambient dimension d.
+ :type ambient_dim: integer
+ :param radius: The radius. Default value is `1.`.
+ :type radius: float
+ :param sample: The sample type. Default and only available value is `"random"`.
+ :type sample: string
+ :returns: the generated points on a sphere.
+ )pbdoc");
+
+ m.def("ctorus", &generate_points_on_torus,
+ py::arg("n_samples"), py::arg("dim"), py::arg("sample") = "random",
+ R"pbdoc(
+ Generate random i.i.d. points on a d-torus in R^2d or as a grid
+
+ :param n_samples: The number of points to be generated.
+ :type n_samples: integer
+ :param dim: The dimension of the torus on which points would be generated in R^2*dim.
+ :type dim: integer
+ :param sample: The sample type. Available values are: `"random"` and `"grid"`. Default value is `"random"`.
+ :type sample: string
+ :returns: the generated points on a torus.
+
+ The shape of returned numpy array is:
+
+ If sample is 'random': (n_samples, 2*dim).
+
+ If sample is 'grid': (⌊n_samples**(1./dim)⌋**dim, 2*dim), where shape[0] is rounded down to the closest perfect 'dim'th power.
+ )pbdoc");
+}
diff --git a/src/python/gudhi/datasets/generators/points.py b/src/python/gudhi/datasets/generators/points.py
new file mode 100644
index 00000000..9bb2799d
--- /dev/null
+++ b/src/python/gudhi/datasets/generators/points.py
@@ -0,0 +1,59 @@
+# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+# Author(s): Hind Montassif
+#
+# Copyright (C) 2021 Inria
+#
+# Modification(s):
+# - YYYY/MM Author: Description of the modification
+
+import numpy as np
+
+from ._points import ctorus
+from ._points import sphere
+
+def _generate_random_points_on_torus(n_samples, dim):
+
+ # Generate random angles of size n_samples*dim
+ alpha = 2*np.pi*np.random.rand(n_samples*dim)
+
+ # Based on angles, construct points of size n_samples*dim on a circle and reshape the result in a n_samples*2*dim array
+ array_points = np.column_stack([np.cos(alpha), np.sin(alpha)]).reshape(-1, 2*dim)
+
+ return array_points
+
+def _generate_grid_points_on_torus(n_samples, dim):
+
+ # Generate points on a dim-torus as a grid
+ n_samples_grid = int((n_samples+.5)**(1./dim)) # add .5 to avoid rounding down with numerical approximations
+ alpha = np.linspace(0, 2*np.pi, n_samples_grid, endpoint=False)
+
+ array_points = np.column_stack([np.cos(alpha), np.sin(alpha)])
+ array_points_idx = np.empty([n_samples_grid]*dim + [dim], dtype=int)
+ for i, x in enumerate(np.ix_(*([np.arange(n_samples_grid)]*dim))):
+ array_points_idx[...,i] = x
+ return array_points[array_points_idx].reshape(-1, 2*dim)
+
+def torus(n_samples, dim, sample='random'):
+ """
+ Generate points on a flat dim-torus in R^2dim either randomly or on a grid
+
+ :param n_samples: The number of points to be generated.
+ :param dim: The dimension of the torus on which points would be generated in R^2*dim.
+ :param sample: The sample type of the generated points. Can be 'random' or 'grid'.
+ :returns: numpy array containing the generated points on a torus.
+
+ The shape of returned numpy array is:
+
+ If sample is 'random': (n_samples, 2*dim).
+
+ If sample is 'grid': (⌊n_samples**(1./dim)⌋**dim, 2*dim), where shape[0] is rounded down to the closest perfect 'dim'th power.
+ """
+ if sample == 'random':
+ # Generate points randomly
+ return _generate_random_points_on_torus(n_samples, dim)
+ elif sample == 'grid':
+ # Generate points on a grid
+ return _generate_grid_points_on_torus(n_samples, dim)
+ else:
+ raise ValueError("Sample type '{}' is not supported".format(sample))
diff --git a/src/python/gudhi/periodic_cubical_complex.pyx b/src/python/gudhi/periodic_cubical_complex.pyx
index d353d2af..6c21e902 100644
--- a/src/python/gudhi/periodic_cubical_complex.pyx
+++ b/src/python/gudhi/periodic_cubical_complex.pyx
@@ -32,7 +32,7 @@ cdef extern from "Cubical_complex_interface.h" namespace "Gudhi":
cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi":
cdef cppclass Periodic_cubical_complex_persistence_interface "Gudhi::Persistent_cohomology_interface<Gudhi::Cubical_complex::Cubical_complex_interface<Gudhi::cubical_complex::Bitmap_cubical_complex_periodic_boundary_conditions_base<double>>>":
Periodic_cubical_complex_persistence_interface(Periodic_cubical_complex_base_interface * st, bool persistence_dim_max) nogil
- void compute_persistence(int homology_coeff_field, double min_persistence) nogil
+ void compute_persistence(int homology_coeff_field, double min_persistence) nogil except +
vector[pair[int, pair[double, double]]] get_persistence() nogil
vector[vector[int]] cofaces_of_cubical_persistence_pairs() nogil
vector[int] betti_numbers() nogil
@@ -148,7 +148,7 @@ cdef class PeriodicCubicalComplex:
:func:`persistence` returns.
:param homology_coeff_field: The homology coefficient field. Must be a
- prime number
+ prime number. Default value is 11. Max is 46337.
:type homology_coeff_field: int.
:param min_persistence: The minimum persistence value to take into
account (strictly greater than min_persistence). Default value is
@@ -170,7 +170,7 @@ cdef class PeriodicCubicalComplex:
"""This function computes and returns the persistence of the complex.
:param homology_coeff_field: The homology coefficient field. Must be a
- prime number
+ prime number. Default value is 11. Max is 46337.
:type homology_coeff_field: int.
:param min_persistence: The minimum persistence value to take into
account (strictly greater than min_persistence). Default value is
@@ -280,4 +280,8 @@ cdef class PeriodicCubicalComplex:
launched first.
"""
assert self.pcohptr != NULL, "compute_persistence() must be called before persistence_intervals_in_dimension()"
- return np.array(self.pcohptr.intervals_in_dimension(dimension))
+ piid = np.array(self.pcohptr.intervals_in_dimension(dimension))
+ # Workaround https://github.com/GUDHI/gudhi-devel/issues/507
+ if len(piid) == 0:
+ return np.empty(shape = [0, 2])
+ return piid
diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py
index 994be3b6..de5844f9 100644
--- a/src/python/gudhi/point_cloud/knn.py
+++ b/src/python/gudhi/point_cloud/knn.py
@@ -8,6 +8,7 @@
# - YYYY/MM Author: Description of the modification
import numpy
+import warnings
# TODO: https://github.com/facebookresearch/faiss
@@ -111,7 +112,7 @@ class KNearestNeighbors:
nargs = {
k: v for k, v in self.params.items() if k in {"p", "n_jobs", "metric_params", "algorithm", "leaf_size"}
}
- self.nn = NearestNeighbors(self.k, metric=self.metric, **nargs)
+ self.nn = NearestNeighbors(n_neighbors=self.k, metric=self.metric, **nargs)
self.nn.fit(X)
if self.params["implementation"] == "hnsw":
@@ -257,6 +258,9 @@ class KNearestNeighbors:
if ef is not None:
self.graph.set_ef(ef)
neighbors, distances = self.graph.knn_query(X, k, num_threads=self.params["num_threads"])
+ with warnings.catch_warnings():
+ if not(numpy.all(numpy.isfinite(distances))):
+ warnings.warn("Overflow/infinite value encountered while computing 'distances'", RuntimeWarning)
# The k nearest neighbors are always sorted. I couldn't find it in the doc, but the code calls searchKnn,
# which returns a priority_queue, and then fills the return array backwards with top/pop on the queue.
if self.return_index:
@@ -290,6 +294,9 @@ class KNearestNeighbors:
if self.return_index:
if self.return_distance:
distances, neighbors = mat.Kmin_argKmin(k, dim=1)
+ with warnings.catch_warnings():
+ if not(torch.isfinite(distances).all()):
+ warnings.warn("Overflow/infinite value encountered while computing 'distances'", RuntimeWarning)
if p != numpy.inf:
distances = distances ** (1.0 / p)
return neighbors, distances
@@ -298,6 +305,9 @@ class KNearestNeighbors:
return neighbors
if self.return_distance:
distances = mat.Kmin(k, dim=1)
+ with warnings.catch_warnings():
+ if not(torch.isfinite(distances).all()):
+ warnings.warn("Overflow/infinite value encountered while computing 'distances'", RuntimeWarning)
if p != numpy.inf:
distances = distances ** (1.0 / p)
return distances
diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 5ca127f6..f8078d03 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -1,14 +1,17 @@
# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
-# Author(s): Mathieu Carrière, Martin Royer
+# Author(s): Mathieu Carrière, Martin Royer, Gard Spreemann
#
# Copyright (C) 2018-2020 Inria
#
# Modification(s):
# - 2020/06 Martin: ATOL integration
+# - 2020/12 Gard: A more flexible Betti curve class capable of computing exact curves.
+# - 2021/11 Vincent Rouvreau: factorize _automatic_sample_range
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
+from sklearn.exceptions import NotFittedError
from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler
from sklearn.neighbors import DistanceMetric
from sklearn.metrics import pairwise
@@ -45,10 +48,14 @@ class PersistenceImage(BaseEstimator, TransformerMixin):
y (n x 1 array): persistence diagram labels (unused).
"""
if np.isnan(np.array(self.im_range)).any():
- new_X = BirthPersistenceTransform().fit_transform(X)
- pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(new_X,y)
- [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]]
- self.im_range = np.where(np.isnan(np.array(self.im_range)), np.array([mx, Mx, my, My]), np.array(self.im_range))
+ try:
+ new_X = BirthPersistenceTransform().fit_transform(X)
+ pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(new_X,y)
+ [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]]
+ self.im_range = np.where(np.isnan(np.array(self.im_range)), np.array([mx, Mx, my, My]), np.array(self.im_range))
+ except ValueError:
+ # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507
+ pass
return self
def transform(self, X):
@@ -94,6 +101,28 @@ class PersistenceImage(BaseEstimator, TransformerMixin):
"""
return self.fit_transform([diag])[0,:]
+def _automatic_sample_range(sample_range, X, y):
+ """
+ Compute and returns sample range from the persistence diagrams if one of the sample_range values is numpy.nan.
+
+ Parameters:
+ sample_range (a numpy array of 2 float): minimum and maximum of all piecewise-linear function domains, of
+ the form [x_min, x_max].
+ X (list of n x 2 numpy arrays): input persistence diagrams.
+ y (n x 1 array): persistence diagram labels (unused).
+ """
+ nan_in_range = np.isnan(sample_range)
+ if nan_in_range.any():
+ try:
+ pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y)
+ [mx,my] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]]
+ [Mx,My] = [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]]
+ return np.where(nan_in_range, np.array([mx, My]), sample_range)
+ except ValueError:
+ # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507
+ pass
+ return sample_range
+
class Landscape(BaseEstimator, TransformerMixin):
"""
This is a class for computing persistence landscapes from a list of persistence diagrams. A persistence landscape is a collection of 1D piecewise-linear functions computed from the rank function associated to the persistence diagram. These piecewise-linear functions are then sampled evenly on a given range and the corresponding vectors of samples are concatenated and returned. See http://jmlr.org/papers/v16/bubenik15a.html for more details.
@@ -119,10 +148,7 @@ class Landscape(BaseEstimator, TransformerMixin):
X (list of n x 2 numpy arrays): input persistence diagrams.
y (n x 1 array): persistence diagram labels (unused).
"""
- if self.nan_in_range.any():
- pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y)
- [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]]
- self.sample_range = np.where(self.nan_in_range, np.array([mx, My]), np.array(self.sample_range))
+ self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
return self
def transform(self, X):
@@ -218,10 +244,7 @@ class Silhouette(BaseEstimator, TransformerMixin):
X (list of n x 2 numpy arrays): input persistence diagrams.
y (n x 1 array): persistence diagram labels (unused).
"""
- if np.isnan(np.array(self.sample_range)).any():
- pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y)
- [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]]
- self.sample_range = np.where(np.isnan(np.array(self.sample_range)), np.array([mx, My]), np.array(self.sample_range))
+ self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
return self
def transform(self, X):
@@ -285,77 +308,162 @@ class Silhouette(BaseEstimator, TransformerMixin):
"""
return self.fit_transform([diag])[0,:]
+
class BettiCurve(BaseEstimator, TransformerMixin):
"""
- This is a class for computing Betti curves from a list of persistence diagrams. A Betti curve is a 1D piecewise-constant function obtained from the rank function. It is sampled evenly on a given range and the vector of samples is returned. See https://www.researchgate.net/publication/316604237_Time_Series_Classification_via_Topological_Data_Analysis for more details.
+ Compute Betti curves from persistence diagrams. There are several modes of operation: with a given resolution (with or without a sample_range), with a predefined grid, and with none of the previous. With a predefined grid, the class computes the Betti numbers at those grid points. Without a predefined grid, if the resolution is set to None, it can be fit to a list of persistence diagrams and produce a grid that consists of (at least) the filtration values at which at least one of those persistence diagrams changes Betti numbers, and then compute the Betti numbers at those grid points. In the latter mode, the exact Betti curve is computed for the entire real line. Otherwise, if the resolution is given, the Betti curve is obtained by sampling evenly using either the given sample_range or based on the persistence diagrams.
"""
- def __init__(self, resolution=100, sample_range=[np.nan, np.nan]):
+
+ def __init__(self, resolution=100, sample_range=[np.nan, np.nan], predefined_grid=None):
"""
Constructor for the BettiCurve class.
Parameters:
resolution (int): number of sample for the piecewise-constant function (default 100).
sample_range ([double, double]): minimum and maximum of the piecewise-constant function domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
+ predefined_grid (1d array or None, default=None): Predefined filtration grid points at which to compute the Betti curves. Must be strictly ordered. Infinities are ok. If None (default), and resolution is given, the grid will be uniform from x_min to x_max in 'resolution' steps, otherwise a grid will be computed that captures all changes in Betti numbers in the provided data.
+
+ Attributes:
+ grid_ (1d array): The grid on which the Betti numbers are computed. If predefined_grid was specified, `grid_` will always be that grid, independently of data. If not, the grid is fitted to capture all filtration values at which the Betti numbers change.
+
+ Examples
+ --------
+ If pd is a persistence diagram and xs is a nonempty grid of finite values such that xs[0] >= pd.min(), then the results of:
+
+ >>> bc = BettiCurve(predefined_grid=xs) # doctest: +SKIP
+ >>> result = bc(pd) # doctest: +SKIP
+
+ and
+
+ >>> from scipy.interpolate import interp1d # doctest: +SKIP
+ >>> bc = BettiCurve(resolution=None, predefined_grid=None) # doctest: +SKIP
+ >>> bettis = bc.fit_transform([pd]) # doctest: +SKIP
+ >>> interp = interp1d(bc.grid_, bettis[0, :], kind="previous", fill_value="extrapolate") # doctest: +SKIP
+ >>> result = np.array(interp(xs), dtype=int) # doctest: +SKIP
+
+ are the same.
"""
- self.resolution, self.sample_range = resolution, sample_range
- def fit(self, X, y=None):
+ if (predefined_grid is not None) and (not isinstance(predefined_grid, np.ndarray)):
+ raise ValueError("Expected predefined_grid as array or None.")
+
+ self.predefined_grid = predefined_grid
+ self.resolution = resolution
+ self.sample_range = sample_range
+
+ def is_fitted(self):
+ return hasattr(self, "grid_")
+
+ def fit(self, X, y = None):
"""
- Fit the BettiCurve class on a list of persistence diagrams: if any of the values in **sample_range** is numpy.nan, replace it with the corresponding value computed on the given list of persistence diagrams.
+ Fit the BettiCurve class on a list of persistence diagrams: if any of the values in **sample_range** is numpy.nan, replace it with the corresponding value computed on the given list of persistence diagrams. When no predefined grid is provided and resolution set to None, compute a filtration grid that captures all changes in Betti numbers for all the given persistence diagrams.
Parameters:
- X (list of n x 2 numpy arrays): input persistence diagrams.
- y (n x 1 array): persistence diagram labels (unused).
+ X (list of 2d arrays): Persistence diagrams.
+ y (None): Ignored.
"""
- if np.isnan(np.array(self.sample_range)).any():
- pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y)
- [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]]
- self.sample_range = np.where(np.isnan(np.array(self.sample_range)), np.array([mx, My]), np.array(self.sample_range))
+
+ if self.predefined_grid is None:
+ if self.resolution is None: # Flexible/exact version
+ events = np.unique(np.concatenate([pd.flatten() for pd in X] + [[-np.inf]], axis=0))
+ self.grid_ = np.array(events)
+ else:
+ self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
+ self.grid_ = np.linspace(self.sample_range[0], self.sample_range[1], self.resolution)
+ else:
+ self.grid_ = self.predefined_grid # Get the predefined grid from user
+
return self
def transform(self, X):
"""
- Compute the Betti curve for each persistence diagram individually and concatenate the results.
+ Compute Betti curves.
Parameters:
- X (list of n x 2 numpy arrays): input persistence diagrams.
-
+ X (list of 2d arrays): Persistence diagrams.
+
Returns:
- numpy array with shape (number of diagrams) x (**resolution**): output Betti curves.
+ `len(X).len(self.grid_)` array of ints: Betti numbers of the given persistence diagrams at the grid points given in `self.grid_`
"""
- num_diag, Xfit = len(X), []
- x_values = np.linspace(self.sample_range[0], self.sample_range[1], self.resolution)
- step_x = x_values[1] - x_values[0]
- for i in range(num_diag):
+ if not self.is_fitted():
+ raise NotFittedError("Not fitted.")
- diagram, num_pts_in_diag = X[i], X[i].shape[0]
+ if not X:
+ X = [np.zeros((0, 2))]
+
+ N = len(X)
- bc = np.zeros(self.resolution)
- for j in range(num_pts_in_diag):
- [px,py] = diagram[j,:2]
- min_idx = np.clip(np.ceil((px - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
- max_idx = np.clip(np.ceil((py - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
- for k in range(min_idx, max_idx):
- bc[k] += 1
+ events = np.concatenate([pd.flatten(order="F") for pd in X], axis=0)
+ sorting = np.argsort(events)
+ offsets = np.zeros(1 + N, dtype=int)
+ for i in range(0, N):
+ offsets[i+1] = offsets[i] + 2*X[i].shape[0]
+ starts = offsets[0:N]
+ ends = offsets[1:N + 1] - 1
- Xfit.append(np.reshape(bc,[1,-1]))
+ bettis = [[0] for i in range(0, N)]
- Xfit = np.concatenate(Xfit, 0)
+ i = 0
+ for x in self.grid_:
+ while i < len(sorting) and events[sorting[i]] <= x:
+ j = np.searchsorted(ends, sorting[i])
+ delta = 1 if sorting[i] - starts[j] < len(X[j]) else -1
+ bettis[j][-1] += delta
+ i += 1
+ for k in range(0, N):
+ bettis[k].append(bettis[k][-1])
- return Xfit
+ return np.array(bettis, dtype=int)[:, 0:-1]
- def __call__(self, diag):
+ def fit_transform(self, X):
+ """
+ The result is the same as fit(X) followed by transform(X), but potentially faster.
"""
- Apply BettiCurve on a single persistence diagram and outputs the result.
- Parameters:
- diag (n x 2 numpy array): input persistence diagram.
+ if self.predefined_grid is None and self.resolution is None:
+ if not X:
+ X = [np.zeros((0, 2))]
- Returns:
- numpy array with shape (**resolution**): output Betti curve.
+ N = len(X)
+
+ events = np.concatenate([pd.flatten(order="F") for pd in X], axis=0)
+ sorting = np.argsort(events)
+ offsets = np.zeros(1 + N, dtype=int)
+ for i in range(0, N):
+ offsets[i+1] = offsets[i] + 2*X[i].shape[0]
+ starts = offsets[0:N]
+ ends = offsets[1:N + 1] - 1
+
+ xs = [-np.inf]
+ bettis = [[0] for i in range(0, N)]
+
+ for i in sorting:
+ j = np.searchsorted(ends, i)
+ delta = 1 if i - starts[j] < len(X[j]) else -1
+ if events[i] == xs[-1]:
+ bettis[j][-1] += delta
+ else:
+ xs.append(events[i])
+ for k in range(0, j):
+ bettis[k].append(bettis[k][-1])
+ bettis[j].append(bettis[j][-1] + delta)
+ for k in range(j+1, N):
+ bettis[k].append(bettis[k][-1])
+
+ self.grid_ = np.array(xs)
+ return np.array(bettis, dtype=int)
+
+ else:
+ return self.fit(X).transform(X)
+
+ def __call__(self, diag):
"""
- return self.fit_transform([diag])[0,:]
+ Shorthand for transform on a single persistence diagram.
+ """
+ return self.fit_transform([diag])[0, :]
+
+
class Entropy(BaseEstimator, TransformerMixin):
"""
@@ -381,10 +489,7 @@ class Entropy(BaseEstimator, TransformerMixin):
X (list of n x 2 numpy arrays): input persistence diagrams.
y (n x 1 array): persistence diagram labels (unused).
"""
- if np.isnan(np.array(self.sample_range)).any():
- pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y)
- [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]]
- self.sample_range = np.where(np.isnan(np.array(self.sample_range)), np.array([mx, My]), np.array(self.sample_range))
+ self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
return self
def transform(self, X):
@@ -403,9 +508,13 @@ class Entropy(BaseEstimator, TransformerMixin):
new_X = BirthPersistenceTransform().fit_transform(X)
for i in range(num_diag):
-
orig_diagram, diagram, num_pts_in_diag = X[i], new_X[i], X[i].shape[0]
- new_diagram = DiagramScaler(use=True, scalers=[([1], MaxAbsScaler())]).fit_transform([diagram])[0]
+ try:
+ new_diagram = DiagramScaler(use=True, scalers=[([1], MaxAbsScaler())]).fit_transform([diagram])[0]
+ except ValueError:
+ # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507
+ assert len(diagram) == 0
+ new_diagram = np.empty(shape = [0, 2])
if self.mode == "scalar":
ent = - np.sum( np.multiply(new_diagram[:,1], np.log(new_diagram[:,1])) )
@@ -419,12 +528,11 @@ class Entropy(BaseEstimator, TransformerMixin):
max_idx = np.clip(np.ceil((py - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
for k in range(min_idx, max_idx):
ent[k] += (-1) * new_diagram[j,1] * np.log(new_diagram[j,1])
- if self.normalized:
- ent = ent / np.linalg.norm(ent, ord=1)
- Xfit.append(np.reshape(ent,[1,-1]))
-
- Xfit = np.concatenate(Xfit, 0)
+ if self.normalized:
+ ent = ent / np.linalg.norm(ent, ord=1)
+ Xfit.append(np.reshape(ent,[1,-1]))
+ Xfit = np.concatenate(Xfit, axis=0)
return Xfit
def __call__(self, diag):
@@ -485,7 +593,13 @@ class TopologicalVector(BaseEstimator, TransformerMixin):
diagram, num_pts_in_diag = X[i], X[i].shape[0]
pers = 0.5 * (diagram[:,1]-diagram[:,0])
min_pers = np.minimum(pers,np.transpose(pers))
- distances = DistanceMetric.get_metric("chebyshev").pairwise(diagram)
+ # Works fine with sklearn 1.0, but an ValueError exception is thrown on past versions
+ try:
+ distances = DistanceMetric.get_metric("chebyshev").pairwise(diagram)
+ except ValueError:
+ # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507
+ assert len(diagram) == 0
+ distances = np.empty(shape = [0, 0])
vect = np.flip(np.sort(np.triu(np.minimum(distances, min_pers)), axis=None), 0)
dim = min(len(vect), thresh)
Xfit[i, :dim] = vect[:dim]
@@ -612,18 +726,19 @@ class Atol(BaseEstimator, TransformerMixin):
>>> b = np.array([[4, 2, 0], [4, 4, 0], [4, 0, 2]])
>>> c = np.array([[3, 2, -1], [1, 2, -1]])
>>> atol_vectoriser = Atol(quantiser=KMeans(n_clusters=2, random_state=202006))
- >>> atol_vectoriser.fit(X=[a, b, c]).centers
- array([[ 2. , 0.66666667, 3.33333333],
- [ 2.6 , 2.8 , -0.4 ]])
+ >>> atol_vectoriser.fit(X=[a, b, c]).centers # doctest: +SKIP
+ >>> # array([[ 2. , 0.66666667, 3.33333333],
+ >>> # [ 2.6 , 2.8 , -0.4 ]])
>>> atol_vectoriser(a)
- array([1.18168665, 0.42375966])
+ >>> # array([1.18168665, 0.42375966]) # doctest: +SKIP
>>> atol_vectoriser(c)
- array([0.02062512, 1.25157463])
- >>> atol_vectoriser.transform(X=[a, b, c])
- array([[1.18168665, 0.42375966],
- [0.29861028, 1.06330156],
- [0.02062512, 1.25157463]])
+ >>> # array([0.02062512, 1.25157463]) # doctest: +SKIP
+ >>> atol_vectoriser.transform(X=[a, b, c]) # doctest: +SKIP
+ >>> # array([[1.18168665, 0.42375966],
+ >>> # [0.29861028, 1.06330156],
+ >>> # [0.02062512, 1.25157463]])
"""
+ # Note the example above must be up to date with the one in tests called test_atol_doc
def __init__(self, quantiser, weighting_method="cloud", contrast="gaussian"):
"""
Constructor for the Atol measure vectorisation class.
diff --git a/src/python/gudhi/rips_complex.pyx b/src/python/gudhi/rips_complex.pyx
index 72e82c79..c3470292 100644
--- a/src/python/gudhi/rips_complex.pyx
+++ b/src/python/gudhi/rips_complex.pyx
@@ -49,13 +49,13 @@ cdef class RipsComplex:
:type max_edge_length: float
:param points: A list of points in d-Dimension.
- :type points: list of list of double
+ :type points: list of list of float
Or
:param distance_matrix: A distance matrix (full square or lower
triangular).
- :type points: list of list of double
+ :type points: list of list of float
And in both cases
@@ -89,10 +89,10 @@ cdef class RipsComplex:
def create_simplex_tree(self, max_dimension=1):
"""
- :param max_dimension: graph expansion for rips until this given maximal
+ :param max_dimension: graph expansion for Rips until this given maximal
dimension.
:type max_dimension: int
- :returns: A simplex tree created from the Delaunay Triangulation.
+ :returns: A simplex tree encoding the Vietoris–Rips filtration.
:rtype: SimplexTree
"""
stree = SimplexTree()
diff --git a/src/python/gudhi/simplex_tree.pxd b/src/python/gudhi/simplex_tree.pxd
index ff750af9..4f229663 100644
--- a/src/python/gudhi/simplex_tree.pxd
+++ b/src/python/gudhi/simplex_tree.pxd
@@ -36,9 +36,16 @@ cdef extern from "Simplex_tree_interface.h" namespace "Gudhi":
Simplex_tree_skeleton_iterator operator++() nogil
bint operator!=(Simplex_tree_skeleton_iterator) nogil
+ cdef cppclass Simplex_tree_boundary_iterator "Gudhi::Simplex_tree_interface<Gudhi::Simplex_tree_options_full_featured>::Boundary_simplex_iterator":
+ Simplex_tree_boundary_iterator() nogil
+ Simplex_tree_simplex_handle& operator*() nogil
+ Simplex_tree_boundary_iterator operator++() nogil
+ bint operator!=(Simplex_tree_boundary_iterator) nogil
+
cdef cppclass Simplex_tree_interface_full_featured "Gudhi::Simplex_tree_interface<Gudhi::Simplex_tree_options_full_featured>":
- Simplex_tree() nogil
+ Simplex_tree_interface_full_featured() nogil
+ Simplex_tree_interface_full_featured(Simplex_tree_interface_full_featured&) nogil
double simplex_filtration(vector[int] simplex) nogil
void assign_simplex_filtration(vector[int] simplex, double filtration) nogil
void initialize_filtration() nogil
@@ -57,7 +64,9 @@ cdef extern from "Simplex_tree_interface.h" namespace "Gudhi":
bool make_filtration_non_decreasing() nogil
void compute_extended_filtration() nogil
vector[vector[pair[int, pair[double, double]]]] compute_extended_persistence_subdiagrams(vector[pair[int, pair[double, double]]] dgm, double min_persistence) nogil
- Simplex_tree_interface_full_featured* collapse_edges(int nb_collapse_iteration) nogil
+ Simplex_tree_interface_full_featured* collapse_edges(int nb_collapse_iteration) nogil except +
+ void reset_filtration(double filtration, int dimension) nogil
+ bint operator==(Simplex_tree_interface_full_featured) nogil
# Iterators over Simplex tree
pair[vector[int], double] get_simplex_and_filtration(Simplex_tree_simplex_handle f_simplex) nogil
Simplex_tree_simplices_iterator get_simplices_iterator_begin() nogil
@@ -66,6 +75,7 @@ cdef extern from "Simplex_tree_interface.h" namespace "Gudhi":
vector[Simplex_tree_simplex_handle].const_iterator get_filtration_iterator_end() nogil
Simplex_tree_skeleton_iterator get_skeleton_iterator_begin(int dimension) nogil
Simplex_tree_skeleton_iterator get_skeleton_iterator_end(int dimension) nogil
+ pair[Simplex_tree_boundary_iterator, Simplex_tree_boundary_iterator] get_boundary_iterators(vector[int] simplex) nogil except +
# Expansion with blockers
ctypedef bool (*blocker_func_t)(vector[int], void *user_data)
void expansion_with_blockers_callback(int dimension, blocker_func_t user_func, void *user_data)
@@ -73,7 +83,7 @@ cdef extern from "Simplex_tree_interface.h" namespace "Gudhi":
cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi":
cdef cppclass Simplex_tree_persistence_interface "Gudhi::Persistent_cohomology_interface<Gudhi::Simplex_tree<Gudhi::Simplex_tree_options_full_featured>>":
Simplex_tree_persistence_interface(Simplex_tree_interface_full_featured * st, bool persistence_dim_max) nogil
- void compute_persistence(int homology_coeff_field, double min_persistence) nogil
+ void compute_persistence(int homology_coeff_field, double min_persistence) nogil except +
vector[pair[int, pair[double, double]]] get_persistence() nogil
vector[int] betti_numbers() nogil
vector[int] persistent_betti_numbers(double from_value, double to_value) nogil
diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx
index 383d1949..644110c1 100644
--- a/src/python/gudhi/simplex_tree.pyx
+++ b/src/python/gudhi/simplex_tree.pyx
@@ -9,9 +9,8 @@
from cython.operator import dereference, preincrement
from libc.stdint cimport intptr_t
-import numpy
-from numpy import array as np_array
-cimport simplex_tree
+import numpy as np
+cimport gudhi.simplex_tree
__author__ = "Vincent Rouvreau"
__copyright__ = "Copyright (C) 2016 Inria"
@@ -42,13 +41,29 @@ cdef class SimplexTree:
cdef Simplex_tree_persistence_interface * pcohptr
# Fake constructor that does nothing but documenting the constructor
- def __init__(self):
+ def __init__(self, other = None):
"""SimplexTree constructor.
+
+ :param other: If `other` is `None` (default value), an empty `SimplexTree` is created.
+ If `other` is a `SimplexTree`, the `SimplexTree` is constructed from a deep copy of `other`.
+ :type other: SimplexTree (Optional)
+ :returns: An empty or a copy simplex tree.
+ :rtype: SimplexTree
+
+ :raises TypeError: In case `other` is neither `None`, nor a `SimplexTree`.
+ :note: If the `SimplexTree` is a copy, the persistence information is not copied. If you need it in the clone,
+ you have to call :func:`compute_persistence` on it even if you had already computed it in the original.
"""
# The real cython constructor
- def __cinit__(self):
- self.thisptr = <intptr_t>(new Simplex_tree_interface_full_featured())
+ def __cinit__(self, other = None):
+ if other:
+ if isinstance(other, SimplexTree):
+ self.thisptr = _get_copy_intptr(other)
+ else:
+ raise TypeError("`other` argument requires to be of type `SimplexTree`, or `None`.")
+ else:
+ self.thisptr = <intptr_t>(new Simplex_tree_interface_full_featured())
def __dealloc__(self):
cdef Simplex_tree_interface_full_featured* ptr = self.get_ptr()
@@ -67,6 +82,21 @@ cdef class SimplexTree:
"""
return self.pcohptr != NULL
+ def copy(self):
+ """
+ :returns: A simplex tree that is a deep copy of itself.
+ :rtype: SimplexTree
+
+ :note: The persistence information is not copied. If you need it in the clone, you have to call
+ :func:`compute_persistence` on it even if you had already computed it in the original.
+ """
+ stree = SimplexTree()
+ stree.thisptr = _get_copy_intptr(self)
+ return stree
+
+ def __deepcopy__(self):
+ return self.copy()
+
def filtration(self, simplex):
"""This function returns the filtration value for a given N-simplex in
this simplicial complex, or +infinity if it is not in the complex.
@@ -288,6 +318,22 @@ cdef class SimplexTree:
ct.append((v, filtered_simplex.second))
return ct
+ def get_boundaries(self, simplex):
+ """This function returns a generator with the boundaries of a given N-simplex.
+ If you do not need the filtration values, the boundary can also be obtained as
+ :code:`itertools.combinations(simplex,len(simplex)-1)`.
+
+ :param simplex: The N-simplex, represented by a list of vertex.
+ :type simplex: list of int.
+ :returns: The (simplices of the) boundary of a simplex
+ :rtype: generator with tuples(simplex, filtration)
+ """
+ cdef pair[Simplex_tree_boundary_iterator, Simplex_tree_boundary_iterator] it = self.get_ptr().get_boundary_iterators(simplex)
+
+ while it.first != it.second:
+ yield self.get_ptr().get_simplex_and_filtration(dereference(it.first))
+ preincrement(it.first)
+
def remove_maximal_simplex(self, simplex):
"""This function removes a given maximal N-simplex from the simplicial
complex.
@@ -331,7 +377,7 @@ cdef class SimplexTree:
return self.get_ptr().prune_above_filtration(filtration)
def expansion(self, max_dim):
- """Expands the Simplex_tree containing only its one skeleton
+ """Expands the simplex tree containing only its one skeleton
until dimension max_dim.
The expanded simplicial complex until dimension :math:`d`
@@ -341,7 +387,7 @@ cdef class SimplexTree:
The filtration value assigned to a simplex is the maximal filtration
value of one of its edges.
- The Simplex_tree must contain no simplex of dimension bigger than
+ The simplex tree must contain no simplex of dimension bigger than
1 when calling the method.
:param max_dim: The maximal dimension.
@@ -361,38 +407,54 @@ cdef class SimplexTree:
"""
return self.get_ptr().make_filtration_non_decreasing()
+ def reset_filtration(self, filtration, min_dim = 0):
+ """This function resets the filtration value of all the simplices of dimension at least min_dim. Resets all the
+ simplex tree when `min_dim = 0`.
+ `reset_filtration` may break the filtration property with `min_dim > 0`, and it is the user's responsibility to
+ make it a valid filtration (using a large enough `filt_value`, or calling `make_filtration_non_decreasing`
+ afterwards for instance).
+
+ :param filtration: New threshold value.
+ :type filtration: float.
+ :param min_dim: The minimal dimension. Default value is 0.
+ :type min_dim: int.
+ """
+ self.get_ptr().reset_filtration(filtration, min_dim)
+
def extend_filtration(self):
- """ Extend filtration for computing extended persistence. This function only uses the
- filtration values at the 0-dimensional simplices, and computes the extended persistence
- diagram induced by the lower-star filtration computed with these values.
+ """ Extend filtration for computing extended persistence. This function only uses the filtration values at the
+ 0-dimensional simplices, and computes the extended persistence diagram induced by the lower-star filtration
+ computed with these values.
.. note::
- Note that after calling this function, the filtration
- values are actually modified within the Simplex_tree.
- The function :func:`extended_persistence`
- retrieves the original values.
+ Note that after calling this function, the filtration values are actually modified within the simplex tree.
+ The function :func:`extended_persistence` retrieves the original values.
.. note::
- Note that this code creates an extra vertex internally, so you should make sure that
- the Simplex_tree does not contain a vertex with the largest possible value (i.e., 4294967295).
+ Note that this code creates an extra vertex internally, so you should make sure that the simplex tree does
+ not contain a vertex with the largest possible value (i.e., 4294967295).
+
+ This `notebook <https://github.com/GUDHI/TDA-tutorial/blob/master/Tuto-GUDHI-extended-persistence.ipynb>`_
+ explains how to compute an extension of persistence called extended persistence.
"""
self.get_ptr().compute_extended_filtration()
def extended_persistence(self, homology_coeff_field=11, min_persistence=0):
- """This function retrieves good values for extended persistence, and separate the diagrams
- into the Ordinary, Relative, Extended+ and Extended- subdiagrams.
+ """This function retrieves good values for extended persistence, and separate the diagrams into the Ordinary,
+ Relative, Extended+ and Extended- subdiagrams.
- :param homology_coeff_field: The homology coefficient field. Must be a
- prime number. Default value is 11.
+ :param homology_coeff_field: The homology coefficient field. Must be a prime number. Default value is 11. Max is 46337.
:type homology_coeff_field: int
- :param min_persistence: The minimum persistence value (i.e., the absolute value of the difference between the persistence diagram point coordinates) to take into
- account (strictly greater than min_persistence). Default value is
- 0.0.
- Sets min_persistence to -1.0 to see all values.
+ :param min_persistence: The minimum persistence value (i.e., the absolute value of the difference between the
+ persistence diagram point coordinates) to take into account (strictly greater than min_persistence).
+ Default value is 0.0. Sets min_persistence to -1.0 to see all values.
:type min_persistence: float
- :returns: A list of four persistence diagrams in the format described in :func:`persistence`. The first one is Ordinary, the second one is Relative, the third one is Extended+ and the fourth one is Extended-. See https://link.springer.com/article/10.1007/s10208-008-9027-z and/or section 2.2 in https://link.springer.com/article/10.1007/s10208-017-9370-z for a description of these subtypes.
+ :returns: A list of four persistence diagrams in the format described in :func:`persistence`. The first one is
+ Ordinary, the second one is Relative, the third one is Extended+ and the fourth one is Extended-.
+ See https://link.springer.com/article/10.1007/s10208-008-9027-z and/or section 2.2 in
+ https://link.springer.com/article/10.1007/s10208-017-9370-z for a description of these subtypes.
.. note::
@@ -403,6 +465,9 @@ cdef class SimplexTree:
The coordinates of the persistence diagram points might be a little different than the
original filtration values due to the internal transformation (scaling to [-2,-1]) that is
performed on these values during the computation of extended persistence.
+
+ This `notebook <https://github.com/GUDHI/TDA-tutorial/blob/master/Tuto-GUDHI-extended-persistence.ipynb>`_
+ explains how to compute an extension of persistence called extended persistence.
"""
cdef vector[pair[int, pair[double, double]]] persistence_result
if self.pcohptr != NULL:
@@ -436,7 +501,7 @@ cdef class SimplexTree:
"""This function computes and returns the persistence of the simplicial complex.
:param homology_coeff_field: The homology coefficient field. Must be a
- prime number. Default value is 11.
+ prime number. Default value is 11. Max is 46337.
:type homology_coeff_field: int
:param min_persistence: The minimum persistence value to take into
account (strictly greater than min_persistence). Default value is
@@ -459,7 +524,7 @@ cdef class SimplexTree:
when you do not want the list :func:`persistence` returns.
:param homology_coeff_field: The homology coefficient field. Must be a
- prime number. Default value is 11.
+ prime number. Default value is 11. Max is 46337.
:type homology_coeff_field: int
:param min_persistence: The minimum persistence value to take into
account (strictly greater than min_persistence). Default value is
@@ -529,7 +594,11 @@ cdef class SimplexTree:
function to be launched first.
"""
assert self.pcohptr != NULL, "compute_persistence() must be called before persistence_intervals_in_dimension()"
- return np_array(self.pcohptr.intervals_in_dimension(dimension))
+ piid = np.array(self.pcohptr.intervals_in_dimension(dimension))
+ # Workaround https://github.com/GUDHI/gudhi-devel/issues/507
+ if len(piid) == 0:
+ return np.empty(shape = [0, 2])
+ return piid
def persistence_pairs(self):
"""This function returns a list of persistence birth and death simplices pairs.
@@ -570,8 +639,8 @@ cdef class SimplexTree:
"""
assert self.pcohptr != NULL, "lower_star_persistence_generators() requires that persistence() be called first."
gen = self.pcohptr.lower_star_generators()
- normal = [np_array(d).reshape(-1,2) for d in gen.first]
- infinite = [np_array(d) for d in gen.second]
+ normal = [np.array(d).reshape(-1,2) for d in gen.first]
+ infinite = [np.array(d) for d in gen.second]
return (normal, infinite)
def flag_persistence_generators(self):
@@ -589,19 +658,19 @@ cdef class SimplexTree:
assert self.pcohptr != NULL, "flag_persistence_generators() requires that persistence() be called first."
gen = self.pcohptr.flag_generators()
if len(gen.first) == 0:
- normal0 = numpy.empty((0,3))
+ normal0 = np.empty((0,3))
normals = []
else:
l = iter(gen.first)
- normal0 = np_array(next(l)).reshape(-1,3)
- normals = [np_array(d).reshape(-1,4) for d in l]
+ normal0 = np.array(next(l)).reshape(-1,3)
+ normals = [np.array(d).reshape(-1,4) for d in l]
if len(gen.second) == 0:
- infinite0 = numpy.empty(0)
+ infinite0 = np.empty(0)
infinites = []
else:
l = iter(gen.second)
- infinite0 = np_array(next(l))
- infinites = [np_array(d).reshape(-1,2) for d in l]
+ infinite0 = np.array(next(l))
+ infinites = [np.array(d).reshape(-1,2) for d in l]
return (normal0, normals, infinite0, infinites)
def collapse_edges(self, nb_iterations = 1):
@@ -614,6 +683,9 @@ cdef class SimplexTree:
:param nb_iterations: The number of edge collapse iterations to perform. Default is 1.
:type nb_iterations: int
+
+ :note: collapse_edges method requires `Eigen <installation.html#eigen>`_ >= 3.1.0 and an exception is thrown
+ if this method is not available.
"""
# Backup old pointer
cdef Simplex_tree_interface_full_featured* ptr = self.get_ptr()
@@ -623,3 +695,13 @@ cdef class SimplexTree:
self.thisptr = <intptr_t>(ptr.collapse_edges(nb_iter))
# Delete old pointer
del ptr
+
+ def __eq__(self, other:SimplexTree):
+ """Test for structural equality
+ :returns: True if the 2 simplex trees are equal, False otherwise.
+ :rtype: bool
+ """
+ return dereference(self.get_ptr()) == dereference(other.get_ptr())
+
+cdef intptr_t _get_copy_intptr(SimplexTree stree) nogil:
+ return <intptr_t>(new Simplex_tree_interface_full_featured(dereference(stree.get_ptr())))
diff --git a/src/python/gudhi/subsampling.pyx b/src/python/gudhi/subsampling.pyx
index f77c6f75..46f32335 100644
--- a/src/python/gudhi/subsampling.pyx
+++ b/src/python/gudhi/subsampling.pyx
@@ -33,7 +33,7 @@ def choose_n_farthest_points(points=None, off_file='', nb_points=0, starting_poi
The iteration starts with the landmark `starting point`.
:param points: The input point set.
- :type points: Iterable[Iterable[float]].
+ :type points: Iterable[Iterable[float]]
Or
@@ -42,14 +42,15 @@ def choose_n_farthest_points(points=None, off_file='', nb_points=0, starting_poi
And in both cases
- :param nb_points: Number of points of the subsample.
- :type nb_points: unsigned.
+ :param nb_points: Number of points of the subsample (the subsample may be \
+ smaller if there are fewer than nb_points distinct input points)
+ :type nb_points: int
:param starting_point: The iteration starts with the landmark `starting \
- point`,which is the index of the point to start with. If not set, this \
+ point`, which is the index of the point to start with. If not set, this \
index is chosen randomly.
- :type starting_point: unsigned.
+ :type starting_point: int
:returns: The subsample point set.
- :rtype: List[List[float]].
+ :rtype: List[List[float]]
"""
if off_file:
if os.path.isfile(off_file):
@@ -76,7 +77,7 @@ def pick_n_random_points(points=None, off_file='', nb_points=0):
"""Subsample a point set by picking random vertices.
:param points: The input point set.
- :type points: Iterable[Iterable[float]].
+ :type points: Iterable[Iterable[float]]
Or
@@ -86,7 +87,7 @@ def pick_n_random_points(points=None, off_file='', nb_points=0):
And in both cases
:param nb_points: Number of points of the subsample.
- :type nb_points: unsigned.
+ :type nb_points: int
:returns: The subsample point set.
:rtype: List[List[float]]
"""
@@ -104,10 +105,10 @@ def pick_n_random_points(points=None, off_file='', nb_points=0):
def sparsify_point_set(points=None, off_file='', min_squared_dist=0.0):
"""Outputs a subset of the input points so that the squared distance
- between any two points is greater than or equal to min_squared_dist.
+ between any two points is greater than min_squared_dist.
:param points: The input point set.
- :type points: Iterable[Iterable[float]].
+ :type points: Iterable[Iterable[float]]
Or
@@ -118,7 +119,7 @@ def sparsify_point_set(points=None, off_file='', min_squared_dist=0.0):
:param min_squared_dist: Minimum squared distance separating the output \
points.
- :type min_squared_dist: float.
+ :type min_squared_dist: float
:returns: The subsample point set.
:rtype: List[List[float]]
"""
diff --git a/src/python/gudhi/wasserstein/wasserstein.py b/src/python/gudhi/wasserstein/wasserstein.py
index a9d1cdff..dc18806e 100644
--- a/src/python/gudhi/wasserstein/wasserstein.py
+++ b/src/python/gudhi/wasserstein/wasserstein.py
@@ -9,6 +9,7 @@
import numpy as np
import scipy.spatial.distance as sc
+import warnings
try:
import ot
@@ -70,6 +71,7 @@ def _perstot_autodiff(X, order, internal_p):
'''
return _dist_to_diag(X, internal_p).norms.lp(order)
+
def _perstot(X, order, internal_p, enable_autodiff):
'''
:param X: (n x 2) numpy.array (points of a given diagram).
@@ -79,6 +81,9 @@ def _perstot(X, order, internal_p, enable_autodiff):
transparent to automatic differentiation.
:type enable_autodiff: bool
:returns: float, the total persistence of the diagram (that is, its distance to the empty diagram).
+
+ .. note::
+ Can be +inf if the diagram has an essential part (points with infinite coordinates).
'''
if enable_autodiff:
import eagerpy as ep
@@ -88,32 +93,163 @@ def _perstot(X, order, internal_p, enable_autodiff):
return np.linalg.norm(_dist_to_diag(X, internal_p), ord=order)
-def wasserstein_distance(X, Y, matching=False, order=1., internal_p=np.inf, enable_autodiff=False):
+def _get_essential_parts(a):
'''
- :param X: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points
- (i.e. with infinite coordinate).
- :param Y: (m x 2) numpy.array encoding the second diagram.
- :param matching: if True, computes and returns the optimal matching between X and Y, encoded as
- a (n x 2) np.array [...[i,j]...], meaning the i-th point in X is matched to
- the j-th point in Y, with the convention (-1) represents the diagonal.
- :param order: exponent for Wasserstein; Default value is 1.
- :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2);
- Default value is `np.inf`.
- :param enable_autodiff: If X and Y are torch.tensor or tensorflow.Tensor, make the computation
+ :param a: (n x 2) numpy.array (point of a diagram)
+ :returns: five lists of indices (between 0 and len(a)) accounting for the five types of points with infinite
+ coordinates that can occur in a diagram, namely:
+ type0 : (-inf, finite)
+ type1 : (finite, +inf)
+ type2 : (-inf, +inf)
+ type3 : (-inf, -inf)
+ type4 : (+inf, +inf)
+ .. note::
+ For instance, a[_get_essential_parts(a)[0]] returns the points in a of coordinates (-inf, x) for some finite x.
+ Note also that points with (+inf, -inf) are not handled (points (x,y) in dgm satisfy by assumption (y >= x)).
+
+ Finally, we consider that points with coordinates (-inf,-inf) and (+inf, +inf) belong to the diagonal.
+ '''
+ if len(a):
+ first_coord_finite = np.isfinite(a[:,0])
+ second_coord_finite = np.isfinite(a[:,1])
+ first_coord_infinite_positive = (a[:,0] == np.inf)
+ second_coord_infinite_positive = (a[:,1] == np.inf)
+ first_coord_infinite_negative = (a[:,0] == -np.inf)
+ second_coord_infinite_negative = (a[:,1] == -np.inf)
+
+ ess_first_type = np.where(second_coord_finite & first_coord_infinite_negative)[0] # coord (-inf, x)
+ ess_second_type = np.where(first_coord_finite & second_coord_infinite_positive)[0] # coord (x, +inf)
+ ess_third_type = np.where(first_coord_infinite_negative & second_coord_infinite_positive)[0] # coord (-inf, +inf)
+
+ ess_fourth_type = np.where(first_coord_infinite_negative & second_coord_infinite_negative)[0] # coord (-inf, -inf)
+ ess_fifth_type = np.where(first_coord_infinite_positive & second_coord_infinite_positive)[0] # coord (+inf, +inf)
+ return ess_first_type, ess_second_type, ess_third_type, ess_fourth_type, ess_fifth_type
+ else:
+ return [], [], [], [], []
+
+
+def _cost_and_match_essential_parts(X, Y, idX, idY, order, axis):
+ '''
+ :param X: (n x 2) numpy.array (dgm points)
+ :param Y: (n x 2) numpy.array (dgm points)
+ :param idX: indices to consider for this one dimensional OT problem (in X)
+ :param idY: indices to consider for this one dimensional OT problem (in Y)
+ :param order: exponent for Wasserstein distance computation
+ :param axis: must be 0 or 1, correspond to the coordinate which is finite.
+ :returns: cost (float) and match for points with *one* infinite coordinate.
+
+ .. note::
+ Assume idX, idY come when calling _handle_essential_parts, thus have same length.
+ '''
+ u = X[idX, axis]
+ v = Y[idY, axis]
+
+ cost = np.sum(np.abs(np.sort(u) - np.sort(v))**(order)) # OT cost in 1D
+
+ sortidX = idX[np.argsort(u)]
+ sortidY = idY[np.argsort(v)]
+ # We return [i,j] sorted per value
+ match = list(zip(sortidX, sortidY))
+
+ return cost, match
+
+
+def _handle_essential_parts(X, Y, order):
+ '''
+ :param X: (n x 2) numpy array, first diagram.
+ :param Y: (n x 2) numpy array, second diagram.
+ :order: Wasserstein order for cost computation.
+ :returns: cost and matching due to essential parts. If cost is +inf, matching will be set to None.
+ '''
+ ess_parts_X = _get_essential_parts(X)
+ ess_parts_Y = _get_essential_parts(Y)
+
+ # Treats the case of infinite cost (cardinalities of essential parts differ).
+ for u, v in list(zip(ess_parts_X, ess_parts_Y))[:3]: # ignore types 4 and 5 as they belong to the diagonal
+ if len(u) != len(v):
+ return np.inf, None
+
+ # Now we know each essential part has the same number of points in both diagrams.
+ # Handle type 0 and type 1 essential parts (those with one finite coordinates)
+ c1, m1 = _cost_and_match_essential_parts(X, Y, ess_parts_X[0], ess_parts_Y[0], axis=1, order=order)
+ c2, m2 = _cost_and_match_essential_parts(X, Y, ess_parts_X[1], ess_parts_Y[1], axis=0, order=order)
+
+ c = c1 + c2
+ m = m1 + m2
+
+ # Handle type3 (coordinates (-inf,+inf), so we just align points)
+ m += list(zip(ess_parts_X[2], ess_parts_Y[2]))
+
+ # Handle type 4 and 5, considered as belonging to the diagonal so matched to (-1) with cost 0.
+ for z in ess_parts_X[3:]:
+ m += [(u, -1) for u in z] # points in X are matched to -1
+ for z in ess_parts_Y[3:]:
+ m += [(-1, v) for v in z] # -1 is match to points in Y
+
+ return c, np.array(m)
+
+
+def _finite_part(X):
+ '''
+ :param X: (n x 2) numpy array encoding a persistence diagram.
+ :returns: The finite part of a diagram `X` (points with finite coordinates).
+ '''
+ return X[np.where(np.isfinite(X[:,0]) & np.isfinite(X[:,1]))]
+
+
+def _warn_infty(matching):
+ '''
+ Handle essential parts with different cardinalities. Warn the user about cost being infinite and (if
+ `matching=True`) about the returned matching being `None`.
+ '''
+ if matching:
+ warnings.warn('Cardinality of essential parts differs. Distance (cost) is +inf, and the returned matching is None.')
+ return np.inf, None
+ else:
+ warnings.warn('Cardinality of essential parts differs. Distance (cost) is +inf.')
+ return np.inf
+
+
+def wasserstein_distance(X, Y, matching=False, order=1., internal_p=np.inf, enable_autodiff=False,
+ keep_essential_parts=True):
+ '''
+ Compute the Wasserstein distance between persistence diagram using Python Optimal Transport backend.
+ Diagrams can contain points with infinity coordinates (essential parts).
+ Points with (-inf,-inf) and (+inf,+inf) coordinates are considered as belonging to the diagonal.
+ If the distance between two diagrams is +inf (which happens if the cardinalities of essential
+ parts differ) and optimal matching is required, it will be set to ``None``.
+
+ :param X: The first diagram.
+ :type X: n x 2 numpy.array
+ :param Y: The second diagram.
+ :type Y: m x 2 numpy.array
+ :param matching: if ``True``, computes and returns the optimal matching between X and Y, encoded as
+ a (n x 2) np.array [...[i,j]...], meaning the i-th point in X is matched to
+ the j-th point in Y, with the convention that (-1) represents the diagonal.
+ :param order: Wasserstein exponent q (1 <= q < infinity).
+ :type order: float
+ :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2).
+ :type internal_p: float
+ :param enable_autodiff: If X and Y are ``torch.tensor`` or ``tensorflow.Tensor``, make the computation
transparent to automatic differentiation. This requires the package EagerPy and is currently incompatible
- with `matching=True`.
+ with ``matching=True`` and with ``keep_essential_parts=True``.
- .. note:: This considers the function defined on the coordinates of the off-diagonal points of X and Y
+ .. note:: This considers the function defined on the coordinates of the off-diagonal finite points of X and Y
and lets the various frameworks compute its gradient. It never pulls new points from the diagonal.
:type enable_autodiff: bool
- :returns: the Wasserstein distance of order q (1 <= q < infinity) between persistence diagrams with
+ :param keep_essential_parts: If ``False``, only considers the finite points in the diagrams.
+ Otherwise, include essential parts in cost and matching computation.
+ :type keep_essential_parts: bool
+ :returns: The Wasserstein distance of order q (1 <= q < infinity) between persistence diagrams with
respect to the internal_p-norm as ground metric.
If matching is set to True, also returns the optimal matching between X and Y.
+ If cost is +inf, any matching is optimal and thus it returns `None` instead.
'''
+
+ # First step: handle empty diagrams
n = len(X)
m = len(Y)
- # handle empty diagrams
if n == 0:
if m == 0:
if not matching:
@@ -122,16 +258,45 @@ def wasserstein_distance(X, Y, matching=False, order=1., internal_p=np.inf, enab
else:
return 0., np.array([])
else:
- if not matching:
- return _perstot(Y, order, internal_p, enable_autodiff)
+ cost = _perstot(Y, order, internal_p, enable_autodiff)
+ if cost == np.inf:
+ return _warn_infty(matching)
else:
- return _perstot(Y, order, internal_p, enable_autodiff), np.array([[-1, j] for j in range(m)])
+ if not matching:
+ return cost
+ else:
+ return cost, np.array([[-1, j] for j in range(m)])
elif m == 0:
- if not matching:
- return _perstot(X, order, internal_p, enable_autodiff)
+ cost = _perstot(X, order, internal_p, enable_autodiff)
+ if cost == np.inf:
+ return _warn_infty(matching)
else:
- return _perstot(X, order, internal_p, enable_autodiff), np.array([[i, -1] for i in range(n)])
+ if not matching:
+ return cost
+ else:
+ return cost, np.array([[i, -1] for i in range(n)])
+
+ # Check essential part and enable autodiff together
+ if enable_autodiff and keep_essential_parts:
+ warnings.warn('''enable_autodiff=True and keep_essential_parts=True are incompatible together.
+ keep_essential_parts is set to False: only points with finite coordinates are considered
+ in the following.
+ ''')
+ keep_essential_parts = False
+
+ # Second step: handle essential parts if needed.
+ if keep_essential_parts:
+ essential_cost, essential_matching = _handle_essential_parts(X, Y, order=order)
+ if (essential_cost == np.inf):
+ return _warn_infty(matching) # Tells the user that cost is infty and matching (if True) is None.
+ # avoid computing transport cost between the finite parts if essential parts
+ # cardinalities do not match (saves time)
+ else:
+ essential_cost = 0
+ essential_matching = None
+
+ # Now the standard pipeline for finite parts
if enable_autodiff:
import eagerpy as ep
@@ -139,6 +304,12 @@ def wasserstein_distance(X, Y, matching=False, order=1., internal_p=np.inf, enab
Y_orig = ep.astensor(Y)
X = X_orig.numpy()
Y = Y_orig.numpy()
+
+ # Extract finite points of the diagrams.
+ X, Y = _finite_part(X), _finite_part(Y)
+ n = len(X)
+ m = len(Y)
+
M = _build_dist_matrix(X, Y, order=order, internal_p=internal_p)
a = np.ones(n+1) # weight vector of the input diagram. Uniform here.
a[-1] = m
@@ -154,7 +325,10 @@ def wasserstein_distance(X, Y, matching=False, order=1., internal_p=np.inf, enab
# Now we turn to -1 points encoding the diagonal
match[:,0][match[:,0] >= n] = -1
match[:,1][match[:,1] >= m] = -1
- return ot_cost ** (1./order) , match
+ # Finally incorporate the essential part matching
+ if essential_matching is not None:
+ match = np.concatenate([match, essential_matching]) if essential_matching.size else match
+ return (ot_cost + essential_cost) ** (1./order) , match
if enable_autodiff:
P = ot.emd(a=a, b=b, M=M, numItermax=2000000)
@@ -173,9 +347,9 @@ def wasserstein_distance(X, Y, matching=False, order=1., internal_p=np.inf, enab
return ep.concatenate(dists).norms.lp(order).raw
# We can also concatenate the 3 vectors to compute just one norm.
- # Comptuation of the otcost using the ot.emd2 library.
+ # Comptuation of the ot cost using the ot.emd2 library.
# Note: it is the Wasserstein distance to the power q.
# The default numItermax=100000 is not sufficient for some examples with 5000 points, what is a good value?
ot_cost = ot.emd2(a, b, M, numItermax=2000000)
- return ot_cost ** (1./order)
+ return (ot_cost + essential_cost) ** (1./order)
diff --git a/src/python/gudhi/weighted_rips_complex.py b/src/python/gudhi/weighted_rips_complex.py
index 0541572b..16f63c3d 100644
--- a/src/python/gudhi/weighted_rips_complex.py
+++ b/src/python/gudhi/weighted_rips_complex.py
@@ -12,9 +12,11 @@ from gudhi import SimplexTree
class WeightedRipsComplex:
"""
Class to generate a weighted Rips complex from a distance matrix and weights on vertices,
- in the way described in :cite:`dtmfiltrations`.
+ in the way described in :cite:`dtmfiltrations` with `p=1`. The filtration value of vertex `i` is `2*weights[i]`,
+ and the filtration value of edge `ij` is `distance_matrix[i][j]+weights[i]+weights[j]`,
+ or the maximum of the filtrations of its extremities, whichever is largest.
Remark that all the filtration values are doubled compared to the definition in the paper
- for the consistency with RipsComplex.
+ for consistency with RipsComplex.
"""
def __init__(self,
distance_matrix,