summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.github/test-requirements.txt1
-rw-r--r--.github/workflows/pip-build-windows.yml9
-rw-r--r--.github/workflows/pip-packaging-windows.yml8
-rw-r--r--src/Bottleneck_distance/include/gudhi/Bottleneck.h8
-rw-r--r--src/Bottleneck_distance/include/gudhi/Persistence_graph.h18
-rw-r--r--src/Bottleneck_distance/test/bottleneck_unit_test.cpp5
-rw-r--r--src/Spatial_searching/include/gudhi/Kd_tree_search.h98
-rw-r--r--src/Subsampling/include/gudhi/pick_n_random_points.h14
-rw-r--r--src/Subsampling/include/gudhi/sparsify_point_set.h33
-rw-r--r--src/cmake/modules/GUDHI_third_party_libraries.cmake4
-rw-r--r--src/python/doc/bottleneck_distance_user.rst4
-rw-r--r--src/python/doc/cubical_complex_user.rst22
-rw-r--r--src/python/doc/representations.rst64
-rw-r--r--src/python/doc/wasserstein_distance_user.rst7
-rw-r--r--src/python/gudhi/representations/vector_methods.py17
-rw-r--r--src/python/gudhi/simplex_tree.pyx41
-rw-r--r--src/python/gudhi/subsampling.pyx2
-rw-r--r--src/python/include/Alpha_complex_factory.h9
-rwxr-xr-xsrc/python/test/test_bottleneck_distance.py12
-rwxr-xr-xsrc/python/test/test_representations.py20
-rwxr-xr-xsrc/python/test/test_subsampling.py16
21 files changed, 276 insertions, 136 deletions
diff --git a/.github/test-requirements.txt b/.github/test-requirements.txt
index 7d69c0b9..688a2a11 100644
--- a/.github/test-requirements.txt
+++ b/.github/test-requirements.txt
@@ -8,6 +8,7 @@ scipy
scikit-learn
POT
tensorflow
+tensorflow-addons
torch<1.5
pykeops
hnswlib
diff --git a/.github/workflows/pip-build-windows.yml b/.github/workflows/pip-build-windows.yml
index 11f1ace9..995d52dd 100644
--- a/.github/workflows/pip-build-windows.yml
+++ b/.github/workflows/pip-build-windows.yml
@@ -18,18 +18,13 @@ jobs:
with:
python-version: ${{ matrix.python-version }}
architecture: x64
- - name: Patch
- run: |
- (new-object System.Net.WebClient).DownloadFile('https://github.com/microsoft/vcpkg/files/4978792/vcpkg_fixup_pkgconfig.cmake.txt','c:\vcpkg\scripts\cmake\vcpkg_fixup_pkgconfig.cmake')
- (new-object System.Net.WebClient).DownloadFile('https://github.com/microsoft/vcpkg/files/4978796/vcpkg_acquire_msys.cmake.txt','c:\vcpkg\scripts\cmake\vcpkg_acquire_msys.cmake')
- shell: powershell
- name: Install dependencies
run: |
vcpkg update
vcpkg upgrade --no-dry-run
- vcpkg install boost-graph boost-serialization boost-date-time boost-system boost-filesystem boost-units boost-thread boost-program-options eigen3 mpfr mpir cgal --triplet x64-windows
+ type c:/vcpkg/ports/cgal/portfile.cmake
+ vcpkg install eigen3 cgal --triplet x64-windows
python -m pip install --user -r .github/build-requirements.txt
- python -m pip install --user twine
python -m pip list
- name: Build python wheel
run: |
diff --git a/.github/workflows/pip-packaging-windows.yml b/.github/workflows/pip-packaging-windows.yml
index 2e45ad71..8f4ab6e7 100644
--- a/.github/workflows/pip-packaging-windows.yml
+++ b/.github/workflows/pip-packaging-windows.yml
@@ -20,16 +20,12 @@ jobs:
with:
python-version: ${{ matrix.python-version }}
architecture: x64
- - name: Patch
- run: |
- (new-object System.Net.WebClient).DownloadFile('https://github.com/microsoft/vcpkg/files/4978792/vcpkg_fixup_pkgconfig.cmake.txt','c:\vcpkg\scripts\cmake\vcpkg_fixup_pkgconfig.cmake')
- (new-object System.Net.WebClient).DownloadFile('https://github.com/microsoft/vcpkg/files/4978796/vcpkg_acquire_msys.cmake.txt','c:\vcpkg\scripts\cmake\vcpkg_acquire_msys.cmake')
- shell: powershell
- name: Install dependencies
run: |
vcpkg update
vcpkg upgrade --no-dry-run
- vcpkg install boost-graph boost-serialization boost-date-time boost-system boost-filesystem boost-units boost-thread boost-program-options eigen3 mpfr mpir cgal --triplet x64-windows
+ type c:/vcpkg/ports/cgal/portfile.cmake
+ vcpkg install eigen3 cgal --triplet x64-windows
python -m pip install --user -r .github/build-requirements.txt
python -m pip install --user twine
python -m pip list
diff --git a/src/Bottleneck_distance/include/gudhi/Bottleneck.h b/src/Bottleneck_distance/include/gudhi/Bottleneck.h
index e466828a..c916898d 100644
--- a/src/Bottleneck_distance/include/gudhi/Bottleneck.h
+++ b/src/Bottleneck_distance/include/gudhi/Bottleneck.h
@@ -35,8 +35,12 @@ namespace persistence_diagram {
inline double bottleneck_distance_approx(Persistence_graph& g, double e) {
double b_lower_bound = 0.;
- double b_upper_bound = g.diameter_bound();
- const double alpha = std::pow(g.size(), 1. / 5.);
+ double b_upper_bound = g.max_dist_to_diagonal();
+ int siz = g.size();
+ if (siz <= 1)
+ // The value of alpha would be wrong in this case
+ return b_upper_bound;
+ const double alpha = std::pow(siz, 1. / 5.);
Graph_matching m(g);
Graph_matching biggest_unperfect(g);
while (b_upper_bound - b_lower_bound > 2 * e) {
diff --git a/src/Bottleneck_distance/include/gudhi/Persistence_graph.h b/src/Bottleneck_distance/include/gudhi/Persistence_graph.h
index e1e3522e..33f03b9c 100644
--- a/src/Bottleneck_distance/include/gudhi/Persistence_graph.h
+++ b/src/Bottleneck_distance/include/gudhi/Persistence_graph.h
@@ -45,14 +45,14 @@ class Persistence_graph {
int corresponding_point_in_v(int u_point_index) const;
/** \internal \brief Given a point from U and a point from V, returns the distance between those points. */
double distance(int u_point_index, int v_point_index) const;
- /** \internal \brief Returns size = |U| = |V|. */
+ /** \internal \brief Returns size = |U| + |V|. */
int size() const;
/** \internal \brief Is there as many infinite points (alive components) in both diagrams ? */
double bottleneck_alive() const;
/** \internal \brief Returns the O(n^2) sorted distances between the points. */
std::vector<double> sorted_distances() const;
- /** \internal \brief Returns an upper bound for the diameter of the convex hull of all non infinite points */
- double diameter_bound() const;
+ /** \internal \brief Returns an upper bound for the bottleneck distance of the finite points. */
+ double max_dist_to_diagonal() const;
/** \internal \brief Returns the corresponding internal point */
Internal_point get_u_point(int u_point_index) const;
/** \internal \brief Returns the corresponding internal point */
@@ -160,13 +160,13 @@ inline Internal_point Persistence_graph::get_v_point(int v_point_index) const {
return Internal_point(m, m, v_point_index);
}
-inline double Persistence_graph::diameter_bound() const {
+inline double Persistence_graph::max_dist_to_diagonal() const {
double max = 0.;
- for (auto it = u.cbegin(); it != u.cend(); it++)
- max = (std::max)(max, it->y());
- for (auto it = v.cbegin(); it != v.cend(); it++)
- max = (std::max)(max, it->y());
- return max;
+ for (auto& p : u)
+ max = (std::max)(max, p.y() - p.x());
+ for (auto& p : v)
+ max = (std::max)(max, p.y() - p.x());
+ return max / 2;
}
} // namespace persistence_diagram
diff --git a/src/Bottleneck_distance/test/bottleneck_unit_test.cpp b/src/Bottleneck_distance/test/bottleneck_unit_test.cpp
index 2c520045..44141baa 100644
--- a/src/Bottleneck_distance/test/bottleneck_unit_test.cpp
+++ b/src/Bottleneck_distance/test/bottleneck_unit_test.cpp
@@ -153,4 +153,9 @@ BOOST_AUTO_TEST_CASE(global) {
BOOST_CHECK(bottleneck_distance(v1, v2, 0.) <= upper_bound / 100.);
BOOST_CHECK(bottleneck_distance(v1, v2, upper_bound / 10000.) <= upper_bound / 100. + upper_bound / 10000.);
BOOST_CHECK(std::abs(bottleneck_distance(v1, v2, 0.) - bottleneck_distance(v1, v2, upper_bound / 10000.)) <= upper_bound / 10000.);
+
+ std::vector< std::pair<double, double> > empty;
+ std::vector< std::pair<double, double> > one = {{8, 10}};
+ BOOST_CHECK(bottleneck_distance(empty, empty) == 0);
+ BOOST_CHECK(bottleneck_distance(empty, one) == 1);
}
diff --git a/src/Spatial_searching/include/gudhi/Kd_tree_search.h b/src/Spatial_searching/include/gudhi/Kd_tree_search.h
index 87969dd9..a50a8537 100644
--- a/src/Spatial_searching/include/gudhi/Kd_tree_search.h
+++ b/src/Spatial_searching/include/gudhi/Kd_tree_search.h
@@ -12,11 +12,12 @@
#ifndef KD_TREE_SEARCH_H_
#define KD_TREE_SEARCH_H_
+#include <gudhi/Debug_utils.h>
+
#include <CGAL/Orthogonal_k_neighbor_search.h>
#include <CGAL/Orthogonal_incremental_neighbor_search.h>
#include <CGAL/Search_traits.h>
#include <CGAL/Search_traits_adapter.h>
-#include <CGAL/Fuzzy_sphere.h>
#include <CGAL/property_map.h>
#include <CGAL/version.h> // for CGAL_VERSION_NR
@@ -40,7 +41,6 @@
namespace Gudhi {
namespace spatial_searching {
-
/**
* \class Kd_tree_search Kd_tree_search.h gudhi/Kd_tree_search.h
* \brief Spatial tree data structure to perform (approximate) nearest and furthest neighbor search.
@@ -83,7 +83,8 @@ class Kd_tree_search {
typedef CGAL::Search_traits<
FT, Point,
typename Traits::Cartesian_const_iterator_d,
- typename Traits::Construct_cartesian_const_iterator_d> Traits_base;
+ typename Traits::Construct_cartesian_const_iterator_d,
+ typename Traits::Dimension> Traits_base;
typedef CGAL::Search_traits_adapter<
std::ptrdiff_t,
@@ -110,7 +111,76 @@ class Kd_tree_search {
/// of a point P and `second` is the squared distance between P and the query point.
typedef Incremental_neighbor_search INS_range;
- typedef CGAL::Fuzzy_sphere<STraits> Fuzzy_sphere;
+ // Because CGAL::Fuzzy_sphere takes the radius and not its square
+ struct Sphere_for_kdtree_search
+ {
+ typedef typename Traits::Point_d Point_d;
+ typedef typename Traits::FT FT;
+ typedef typename Traits::Dimension D;
+ typedef D Dimension;
+
+ private:
+ STraits traits;
+ Point_d c;
+ FT sqradmin, sqradmax;
+ bool use_max;
+
+ public:
+ // `prefer_max` means that we prefer outputting more points at squared distance between r2min and r2max,
+ // while `!prefer_max` means we prefer fewer.
+ Sphere_for_kdtree_search(Point_d const& c_, FT const& r2min, FT const& r2max, bool prefer_max=true, STraits const& traits_ = {})
+ : traits(traits_), c(c_), sqradmin(r2min), sqradmax(r2max), use_max(prefer_max)
+ { GUDHI_CHECK(r2min >= 0 && r2max >= r2min, "0 <= r2min <= r2max"); }
+
+ bool contains(std::ptrdiff_t i) const {
+ const Point_d& p = get(traits.point_property_map(), i);
+ auto ccci = traits.construct_cartesian_const_iterator_d_object();
+ return contains_point_given_as_coordinates(ccci(p), ccci(p, 0));
+ }
+
+ template <typename Coord_iterator>
+ bool contains_point_given_as_coordinates(Coord_iterator pi, Coord_iterator CGAL_UNUSED) const {
+ FT distance = 0;
+ auto ccci = traits.construct_cartesian_const_iterator_d_object();
+ auto ci = ccci(c);
+ auto ce = ccci(c, 0);
+ FT const& limit = use_max ? sqradmax : sqradmin;
+ while (ci != ce) {
+ distance += CGAL::square(*pi++ - *ci++);
+ // I think Clément advised to check the distance at every step instead of
+ // just at the end, especially when the dimension becomes large. Distance
+ // isn't part of the concept anyway.
+ if (distance > limit) return false;
+ }
+ return true;
+ }
+
+ bool inner_range_intersects(CGAL::Kd_tree_rectangle<FT, D> const& rect) const {
+ auto ccci = traits.construct_cartesian_const_iterator_d_object();
+ FT distance = 0;
+ auto ci = ccci(c);
+ auto ce = ccci(c, 0);
+ for (int i = 0; ci != ce; ++i, ++ci) {
+ distance += CGAL::square(CGAL::max<FT>(CGAL::max<FT>(*ci - rect.max_coord(i), rect.min_coord(i) - *ci), 0 ));
+ if (distance > sqradmin) return false;
+ }
+ return true;
+ }
+
+
+ bool outer_range_contains(CGAL::Kd_tree_rectangle<FT, D> const& rect) const {
+ auto ccci = traits.construct_cartesian_const_iterator_d_object();
+ FT distance = 0;
+ auto ci = ccci(c);
+ auto ce = ccci(c, 0);
+ for (int i = 0; ci != ce; ++i, ++ci) {
+ distance += CGAL::square(CGAL::max<FT>(*ci - rect.min_coord(i), rect.max_coord(i) - *ci));
+ if (distance > sqradmax) return false;
+ }
+ return true;
+ }
+ };
+
/// \brief Constructor
/// @param[in] points Const reference to the point range. This range
/// is not copied, so it should not be destroyed or modified afterwards.
@@ -266,10 +336,26 @@ class Kd_tree_search {
/// @param[in] eps Approximation factor.
template <typename OutputIterator>
void all_near_neighbors(Point const& p,
- FT radius,
+ FT const& radius,
OutputIterator it,
FT eps = FT(0)) const {
- m_tree.search(it, Fuzzy_sphere(p, radius, eps, m_tree.traits()));
+ all_near_neighbors2(p, CGAL::square(radius - eps), CGAL::square(radius + eps), it);
+ }
+
+ /// \brief Search for all the neighbors in a ball. This is similar to `all_near_neighbors` but takes directly
+ /// the square of the minimum distance below which points must be considered neighbors and square of the
+ /// maximum distance above which they cannot be.
+ /// @param[in] p The query point.
+ /// @param[in] sq_radius_min The square of the minimum search radius
+ /// @param[in] sq_radius_max The square of the maximum search radius
+ /// @param[out] it The points that lie inside the sphere of center `p` and squared radius `sq_radius`.
+ /// Note: `it` is used this way: `*it++ = each_point`.
+ template <typename OutputIterator>
+ void all_near_neighbors2(Point const& p,
+ FT const& sq_radius_min,
+ FT const& sq_radius_max,
+ OutputIterator it) const {
+ m_tree.search(it, Sphere_for_kdtree_search(p, sq_radius_min, sq_radius_max, true, m_tree.traits()));
}
int tree_depth() const {
diff --git a/src/Subsampling/include/gudhi/pick_n_random_points.h b/src/Subsampling/include/gudhi/pick_n_random_points.h
index a67b2b84..e4246c29 100644
--- a/src/Subsampling/include/gudhi/pick_n_random_points.h
+++ b/src/Subsampling/include/gudhi/pick_n_random_points.h
@@ -11,7 +11,9 @@
#ifndef PICK_N_RANDOM_POINTS_H_
#define PICK_N_RANDOM_POINTS_H_
-#include <gudhi/Clock.h>
+#ifdef GUDHI_SUBSAMPLING_PROFILING
+# include <gudhi/Clock.h>
+#endif
#include <boost/range/size.hpp>
@@ -44,6 +46,12 @@ void pick_n_random_points(Point_container const &points,
Gudhi::Clock t;
#endif
+ std::random_device rd;
+ std::mt19937 g(rd());
+
+#if __cplusplus >= 201703L
+ std::sample(std::begin(points), std::end(points), output_it, final_size, g);
+#else
std::size_t nbP = boost::size(points);
if (final_size > nbP)
final_size = nbP;
@@ -51,14 +59,12 @@ void pick_n_random_points(Point_container const &points,
std::vector<int> landmarks(nbP);
std::iota(landmarks.begin(), landmarks.end(), 0);
- std::random_device rd;
- std::mt19937 g(rd());
-
std::shuffle(landmarks.begin(), landmarks.end(), g);
landmarks.resize(final_size);
for (int l : landmarks)
*output_it++ = points[l];
+#endif
#ifdef GUDHI_SUBSAMPLING_PROFILING
t.end();
diff --git a/src/Subsampling/include/gudhi/sparsify_point_set.h b/src/Subsampling/include/gudhi/sparsify_point_set.h
index b30cec80..4571b8f3 100644
--- a/src/Subsampling/include/gudhi/sparsify_point_set.h
+++ b/src/Subsampling/include/gudhi/sparsify_point_set.h
@@ -11,6 +11,13 @@
#ifndef SPARSIFY_POINT_SET_H_
#define SPARSIFY_POINT_SET_H_
+#include <boost/version.hpp>
+#if BOOST_VERSION < 106600
+# include <boost/function_output_iterator.hpp>
+#else
+# include <boost/iterator/function_output_iterator.hpp>
+#endif
+
#include <gudhi/Kd_tree_search.h>
#ifdef GUDHI_SUBSAMPLING_PROFILING
#include <gudhi/Clock.h>
@@ -27,7 +34,7 @@ namespace subsampling {
* \ingroup subsampling
* \brief Outputs a subset of the input points so that the
* squared distance between any two points
- * is greater than or equal to `min_squared_dist`.
+ * is greater than `min_squared_dist`.
*
* \tparam Kernel must be a model of the <a target="_blank"
* href="http://doc.cgal.org/latest/Spatial_searching/classSearchTraits.html">SearchTraits</a>
@@ -63,29 +70,15 @@ sparsify_point_set(
// Parse the input points, and add them if they are not too close to
// the other points
std::size_t pt_idx = 0;
- for (typename Point_range::const_iterator it_pt = input_pts.begin();
- it_pt != input_pts.end();
- ++it_pt, ++pt_idx) {
- if (dropped_points[pt_idx])
+ for (auto const& pt : input_pts) {
+ if (dropped_points[pt_idx++])
continue;
- *output_it++ = *it_pt;
-
- auto ins_range = points_ds.incremental_nearest_neighbors(*it_pt);
+ *output_it++ = pt;
// If another point Q is closer that min_squared_dist, mark Q to be dropped
- for (auto const& neighbor : ins_range) {
- std::size_t neighbor_point_idx = neighbor.first;
- // If the neighbor is too close, we drop the neighbor
- if (neighbor.second < min_squared_dist) {
- // N.B.: If neighbor_point_idx < pt_idx,
- // dropped_points[neighbor_point_idx] is already true but adding a
- // test doesn't make things faster, so why bother?
- dropped_points[neighbor_point_idx] = true;
- } else {
- break;
- }
- }
+ auto drop = [&dropped_points] (std::ptrdiff_t neighbor_point_idx) { dropped_points[neighbor_point_idx] = true; };
+ points_ds.all_near_neighbors2(pt, min_squared_dist, min_squared_dist, boost::make_function_output_iterator(std::ref(drop)));
}
#ifdef GUDHI_SUBSAMPLING_PROFILING
diff --git a/src/cmake/modules/GUDHI_third_party_libraries.cmake b/src/cmake/modules/GUDHI_third_party_libraries.cmake
index 9dadac4f..e1566877 100644
--- a/src/cmake/modules/GUDHI_third_party_libraries.cmake
+++ b/src/cmake/modules/GUDHI_third_party_libraries.cmake
@@ -106,8 +106,8 @@ function( find_python_module PYTHON_MODULE_NAME )
OUTPUT_VARIABLE PYTHON_MODULE_VERSION
ERROR_VARIABLE PYTHON_MODULE_ERROR)
if(PYTHON_MODULE_RESULT EQUAL 0)
- # Remove carriage return
- string(STRIP ${PYTHON_MODULE_VERSION} PYTHON_MODULE_VERSION)
+ # Remove all carriage returns as it can be multiline
+ string(REGEX REPLACE "\n" " " PYTHON_MODULE_VERSION "${PYTHON_MODULE_VERSION}")
message ("++ Python module ${PYTHON_MODULE_NAME} - Version ${PYTHON_MODULE_VERSION} found")
set(${PYTHON_MODULE_NAME_UP}_VERSION ${PYTHON_MODULE_VERSION} PARENT_SCOPE)
diff --git a/src/python/doc/bottleneck_distance_user.rst b/src/python/doc/bottleneck_distance_user.rst
index 6c6e08d9..7baa76cc 100644
--- a/src/python/doc/bottleneck_distance_user.rst
+++ b/src/python/doc/bottleneck_distance_user.rst
@@ -47,7 +47,7 @@ The following example explains how the distance is computed:
:figclass: align-center
The point (0, 13) is at distance 6.5 from the diagonal and more
- specifically from the point (6.5, 6.5)
+ specifically from the point (6.5, 6.5).
Basic example
@@ -72,6 +72,6 @@ The output is:
.. testoutput::
- Bottleneck distance approximation = 0.81
+ Bottleneck distance approximation = 0.72
Bottleneck distance value = 0.75
diff --git a/src/python/doc/cubical_complex_user.rst b/src/python/doc/cubical_complex_user.rst
index 3fd4e27a..6a211347 100644
--- a/src/python/doc/cubical_complex_user.rst
+++ b/src/python/doc/cubical_complex_user.rst
@@ -47,8 +47,8 @@ be a set of two elements).
For further details and theory of cubical complexes, please consult :cite:`kaczynski2004computational` as well as the
following paper :cite:`peikert2012topological`.
-Data structure.
----------------
+Data structure
+--------------
The implementation of Cubical complex provides a representation of complexes that occupy a rectangular region in
:math:`\mathbb{R}^n`. This extra assumption allows for a memory efficient way of storing cubical complexes in a form
@@ -77,8 +77,8 @@ Knowing the sizes of the bitmap, by a series of modulo operation, we can determi
present in the product that gives the cube :math:`C`. In a similar way, we can compute boundary and the coboundary of
each cube. Further details can be found in the literature.
-Input Format.
--------------
+Input Format
+------------
In the current implantation, filtration is given at the maximal cubes, and it is then extended by the lower star
filtration to all cubes. There are a number of constructors that can be used to construct cubical complex by users
@@ -108,8 +108,8 @@ the program output is:
Cubical complex is of dimension 2 - 49 simplices.
-Periodic boundary conditions.
------------------------------
+Periodic boundary conditions
+----------------------------
Often one would like to impose periodic boundary conditions to the cubical complex (cf.
:doc:`periodic_cubical_complex_ref`).
@@ -154,7 +154,13 @@ the program output is:
Periodic cubical complex is of dimension 2 - 42 simplices.
-Examples.
----------
+Examples
+--------
End user programs are available in python/example/ folder.
+
+Tutorial
+--------
+
+This `notebook <https://github.com/GUDHI/TDA-tutorial/blob/master/Tuto-GUDHI-cubical-complexes.ipynb>`_
+explains how to represent sublevels sets of functions using cubical complexes. \ No newline at end of file
diff --git a/src/python/doc/representations.rst b/src/python/doc/representations.rst
index 041e3247..b0477197 100644
--- a/src/python/doc/representations.rst
+++ b/src/python/doc/representations.rst
@@ -12,11 +12,45 @@ This module, originally available at https://github.com/MathieuCarriere/sklearn-
A diagram is represented as a numpy array of shape (n,2), as can be obtained from :func:`~gudhi.SimplexTree.persistence_intervals_in_dimension` for instance. Points at infinity are represented as a numpy array of shape (n,1), storing only the birth time. The classes in this module can handle several persistence diagrams at once. In that case, the diagrams are provided as a list of numpy arrays. Note that it is not necessary for the diagrams to have the same number of points, i.e., for the corresponding arrays to have the same number of rows: all classes can handle arrays with different shapes.
-A small example is provided
+Examples
+--------
-.. only:: builder_html
+Landscapes
+^^^^^^^^^^
- * :download:`diagram_vectorizations_distances_kernels.py <../example/diagram_vectorizations_distances_kernels.py>`
+This example computes the first two Landscapes associated to a persistence diagram with four points. The landscapes are evaluated on ten samples, leading to two vectors with ten coordinates each, that are eventually concatenated in order to produce a single vector representation.
+
+.. testcode::
+
+ import numpy as np
+ from gudhi.representations import Landscape
+ # A single diagram with 4 points
+ D = np.array([[0.,4.],[1.,2.],[3.,8.],[6.,8.]])
+ diags = [D]
+ l=Landscape(num_landscapes=2,resolution=10).fit_transform(diags)
+ print(l)
+
+The output is:
+
+.. testoutput::
+
+ [[1.02851895 2.05703791 2.57129739 1.54277843 0.89995409 1.92847304
+ 2.95699199 3.08555686 2.05703791 1.02851895 0. 0.64282435
+ 0. 0. 0.51425948 0. 0. 0.
+ 0.77138922 1.02851895]]
+
+Various kernels
+^^^^^^^^^^^^^^^
+
+This small example is also provided
+:download:`diagram_vectorizations_distances_kernels.py <../example/diagram_vectorizations_distances_kernels.py>`
+
+Machine Learning and Topological Data Analysis
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This `notebook <https://github.com/GUDHI/TDA-tutorial/blob/master/Tuto-GUDHI-representations.ipynb>`_ explains how to
+efficiently combine machine learning and topological data analysis with the
+:doc:`representations module<representations>`.
Preprocessing
@@ -46,27 +80,3 @@ Metrics
:members:
:special-members:
:show-inheritance:
-
-Basic example
--------------
-
-This example computes the first two Landscapes associated to a persistence diagram with four points. The landscapes are evaluated on ten samples, leading to two vectors with ten coordinates each, that are eventually concatenated in order to produce a single vector representation.
-
-.. testcode::
-
- import numpy as np
- from gudhi.representations import Landscape
- # A single diagram with 4 points
- D = np.array([[0.,4.],[1.,2.],[3.,8.],[6.,8.]])
- diags = [D]
- l=Landscape(num_landscapes=2,resolution=10).fit_transform(diags)
- print(l)
-
-The output is:
-
-.. testoutput::
-
- [[1.02851895 2.05703791 2.57129739 1.54277843 0.89995409 1.92847304
- 2.95699199 3.08555686 2.05703791 1.02851895 0. 0.64282435
- 0. 0. 0.51425948 0. 0. 0.
- 0.77138922 1.02851895]]
diff --git a/src/python/doc/wasserstein_distance_user.rst b/src/python/doc/wasserstein_distance_user.rst
index 96ec7872..9ffc2759 100644
--- a/src/python/doc/wasserstein_distance_user.rst
+++ b/src/python/doc/wasserstein_distance_user.rst
@@ -175,3 +175,10 @@ The output is:
[[0.27916667 0.55416667]
[0.7375 0.7625 ]
[0.2375 0.2625 ]]
+
+Tutorial
+********
+
+This
+`notebook <https://github.com/GUDHI/TDA-tutorial/blob/master/Tuto-GUDHI-Barycenters-of-persistence-diagrams.ipynb>`_
+presents the concept of barycenter, or Fréchet mean, of a family of persistence diagrams. \ No newline at end of file
diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 5ca127f6..cdcb1fde 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -323,22 +323,15 @@ class BettiCurve(BaseEstimator, TransformerMixin):
Returns:
numpy array with shape (number of diagrams) x (**resolution**): output Betti curves.
"""
- num_diag, Xfit = len(X), []
+ Xfit = []
x_values = np.linspace(self.sample_range[0], self.sample_range[1], self.resolution)
step_x = x_values[1] - x_values[0]
- for i in range(num_diag):
-
- diagram, num_pts_in_diag = X[i], X[i].shape[0]
-
+ for diagram in X:
+ diagram_int = np.clip(np.ceil((diagram[:,:2] - self.sample_range[0]) / step_x), 0, self.resolution).astype(int)
bc = np.zeros(self.resolution)
- for j in range(num_pts_in_diag):
- [px,py] = diagram[j,:2]
- min_idx = np.clip(np.ceil((px - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
- max_idx = np.clip(np.ceil((py - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
- for k in range(min_idx, max_idx):
- bc[k] += 1
-
+ for interval in diagram_int:
+ bc[interval[0]:interval[1]] += 1
Xfit.append(np.reshape(bc,[1,-1]))
Xfit = np.concatenate(Xfit, 0)
diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx
index 813dc5c2..cdd2e87b 100644
--- a/src/python/gudhi/simplex_tree.pyx
+++ b/src/python/gudhi/simplex_tree.pyx
@@ -389,37 +389,39 @@ cdef class SimplexTree:
self.get_ptr().reset_filtration(filtration, min_dim)
def extend_filtration(self):
- """ Extend filtration for computing extended persistence. This function only uses the
- filtration values at the 0-dimensional simplices, and computes the extended persistence
- diagram induced by the lower-star filtration computed with these values.
+ """ Extend filtration for computing extended persistence. This function only uses the filtration values at the
+ 0-dimensional simplices, and computes the extended persistence diagram induced by the lower-star filtration
+ computed with these values.
.. note::
- Note that after calling this function, the filtration
- values are actually modified within the simplex tree.
- The function :func:`extended_persistence`
- retrieves the original values.
+ Note that after calling this function, the filtration values are actually modified within the simplex tree.
+ The function :func:`extended_persistence` retrieves the original values.
.. note::
- Note that this code creates an extra vertex internally, so you should make sure that
- the simplex tree does not contain a vertex with the largest possible value (i.e., 4294967295).
+ Note that this code creates an extra vertex internally, so you should make sure that the simplex tree does
+ not contain a vertex with the largest possible value (i.e., 4294967295).
+
+ This `notebook <https://github.com/GUDHI/TDA-tutorial/blob/master/Tuto-GUDHI-extended-persistence.ipynb>`_
+ explains how to compute an extension of persistence called extended persistence.
"""
self.get_ptr().compute_extended_filtration()
def extended_persistence(self, homology_coeff_field=11, min_persistence=0):
- """This function retrieves good values for extended persistence, and separate the diagrams
- into the Ordinary, Relative, Extended+ and Extended- subdiagrams.
+ """This function retrieves good values for extended persistence, and separate the diagrams into the Ordinary,
+ Relative, Extended+ and Extended- subdiagrams.
- :param homology_coeff_field: The homology coefficient field. Must be a
- prime number. Default value is 11.
+ :param homology_coeff_field: The homology coefficient field. Must be a prime number. Default value is 11.
:type homology_coeff_field: int
- :param min_persistence: The minimum persistence value (i.e., the absolute value of the difference between the persistence diagram point coordinates) to take into
- account (strictly greater than min_persistence). Default value is
- 0.0.
- Sets min_persistence to -1.0 to see all values.
+ :param min_persistence: The minimum persistence value (i.e., the absolute value of the difference between the
+ persistence diagram point coordinates) to take into account (strictly greater than min_persistence).
+ Default value is 0.0. Sets min_persistence to -1.0 to see all values.
:type min_persistence: float
- :returns: A list of four persistence diagrams in the format described in :func:`persistence`. The first one is Ordinary, the second one is Relative, the third one is Extended+ and the fourth one is Extended-. See https://link.springer.com/article/10.1007/s10208-008-9027-z and/or section 2.2 in https://link.springer.com/article/10.1007/s10208-017-9370-z for a description of these subtypes.
+ :returns: A list of four persistence diagrams in the format described in :func:`persistence`. The first one is
+ Ordinary, the second one is Relative, the third one is Extended+ and the fourth one is Extended-.
+ See https://link.springer.com/article/10.1007/s10208-008-9027-z and/or section 2.2 in
+ https://link.springer.com/article/10.1007/s10208-017-9370-z for a description of these subtypes.
.. note::
@@ -430,6 +432,9 @@ cdef class SimplexTree:
The coordinates of the persistence diagram points might be a little different than the
original filtration values due to the internal transformation (scaling to [-2,-1]) that is
performed on these values during the computation of extended persistence.
+
+ This `notebook <https://github.com/GUDHI/TDA-tutorial/blob/master/Tuto-GUDHI-extended-persistence.ipynb>`_
+ explains how to compute an extension of persistence called extended persistence.
"""
cdef vector[pair[int, pair[double, double]]] persistence_result
if self.pcohptr != NULL:
diff --git a/src/python/gudhi/subsampling.pyx b/src/python/gudhi/subsampling.pyx
index b11d07e5..46f32335 100644
--- a/src/python/gudhi/subsampling.pyx
+++ b/src/python/gudhi/subsampling.pyx
@@ -105,7 +105,7 @@ def pick_n_random_points(points=None, off_file='', nb_points=0):
def sparsify_point_set(points=None, off_file='', min_squared_dist=0.0):
"""Outputs a subset of the input points so that the squared distance
- between any two points is greater than or equal to min_squared_dist.
+ between any two points is greater than min_squared_dist.
:param points: The input point set.
:type points: Iterable[Iterable[float]]
diff --git a/src/python/include/Alpha_complex_factory.h b/src/python/include/Alpha_complex_factory.h
index d699ad9b..3405fdd6 100644
--- a/src/python/include/Alpha_complex_factory.h
+++ b/src/python/include/Alpha_complex_factory.h
@@ -48,11 +48,14 @@ static CgalPointType pt_cython_to_cgal(std::vector<double> const& vec) {
class Abstract_alpha_complex {
public:
virtual std::vector<double> get_point(int vh) = 0;
+
virtual bool create_simplex_tree(Simplex_tree_interface<>* simplex_tree, double max_alpha_square,
bool default_filtration_value) = 0;
+
+ virtual ~Abstract_alpha_complex() = default;
};
-class Exact_Alphacomplex_dD : public Abstract_alpha_complex {
+class Exact_Alphacomplex_dD final : public Abstract_alpha_complex {
private:
using Kernel = CGAL::Epeck_d<CGAL::Dynamic_dimension_tag>;
using Point = typename Kernel::Point_d;
@@ -78,7 +81,7 @@ class Exact_Alphacomplex_dD : public Abstract_alpha_complex {
Alpha_complex<Kernel> alpha_complex_;
};
-class Inexact_Alphacomplex_dD : public Abstract_alpha_complex {
+class Inexact_Alphacomplex_dD final : public Abstract_alpha_complex {
private:
using Kernel = CGAL::Epick_d<CGAL::Dynamic_dimension_tag>;
using Point = typename Kernel::Point_d;
@@ -104,7 +107,7 @@ class Inexact_Alphacomplex_dD : public Abstract_alpha_complex {
};
template <complexity Complexity>
-class Alphacomplex_3D : public Abstract_alpha_complex {
+class Alphacomplex_3D final : public Abstract_alpha_complex {
private:
using Point = typename Alpha_complex_3d<Complexity, false, false>::Bare_point_3;
diff --git a/src/python/test/test_bottleneck_distance.py b/src/python/test/test_bottleneck_distance.py
index 6915bea8..07fcc9cc 100755
--- a/src/python/test/test_bottleneck_distance.py
+++ b/src/python/test/test_bottleneck_distance.py
@@ -25,3 +25,15 @@ def test_basic_bottleneck():
assert gudhi.bottleneck_distance(diag1, diag2, 0.1) == pytest.approx(0.75, abs=0.1)
assert gudhi.hera.bottleneck_distance(diag1, diag2, 0) == 0.75
assert gudhi.hera.bottleneck_distance(diag1, diag2, 0.1) == pytest.approx(0.75, rel=0.1)
+
+ import numpy as np
+
+ # Translating both diagrams along the diagonal should not affect the distance,
+ # checks that negative numbers are not an issue
+ diag1 = np.array(diag1) - 100
+ diag2 = np.array(diag2) - 100
+
+ assert gudhi.bottleneck_distance(diag1, diag2) == 0.75
+ assert gudhi.bottleneck_distance(diag1, diag2, 0.1) == pytest.approx(0.75, abs=0.1)
+ assert gudhi.hera.bottleneck_distance(diag1, diag2, 0) == 0.75
+ assert gudhi.hera.bottleneck_distance(diag1, diag2, 0.1) == pytest.approx(0.75, rel=0.1)
diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py
index e5c211a0..43c914f3 100755
--- a/src/python/test/test_representations.py
+++ b/src/python/test/test_representations.py
@@ -39,11 +39,11 @@ def test_multiple():
d2 = BottleneckDistance(epsilon=0.00001).fit_transform(l1)
d3 = pairwise_persistence_diagram_distances(l1, l1b, e=0.00001, n_jobs=4)
assert d1 == pytest.approx(d2)
- assert d3 == pytest.approx(d2, abs=1e-5) # Because of 0 entries (on the diagonal)
+ assert d3 == pytest.approx(d2, abs=1e-5) # Because of 0 entries (on the diagonal)
d1 = pairwise_persistence_diagram_distances(l1, l2, metric="wasserstein", order=2, internal_p=2)
d2 = WassersteinDistance(order=2, internal_p=2, n_jobs=4).fit(l2).transform(l1)
print(d1.shape, d2.shape)
- assert d1 == pytest.approx(d2, rel=.02)
+ assert d1 == pytest.approx(d2, rel=0.02)
def test_dummy_atol():
@@ -53,8 +53,22 @@ def test_dummy_atol():
for weighting_method in ["cloud", "iidproba"]:
for contrast in ["gaussian", "laplacian", "indicator"]:
- atol_vectoriser = Atol(quantiser=KMeans(n_clusters=1, random_state=202006), weighting_method=weighting_method, contrast=contrast)
+ atol_vectoriser = Atol(
+ quantiser=KMeans(n_clusters=1, random_state=202006),
+ weighting_method=weighting_method,
+ contrast=contrast,
+ )
atol_vectoriser.fit([a, b, c])
atol_vectoriser(a)
atol_vectoriser.transform(X=[a, b, c])
+
+from gudhi.representations.vector_methods import BettiCurve
+
+
+def test_infinity():
+ a = np.array([[1.0, 8.0], [2.0, np.inf], [3.0, 4.0]])
+ c = BettiCurve(20, [0.0, 10.0])(a)
+ assert c[1] == 0
+ assert c[7] == 3
+ assert c[9] == 2
diff --git a/src/python/test/test_subsampling.py b/src/python/test/test_subsampling.py
index 31f64e32..4019852e 100755
--- a/src/python/test/test_subsampling.py
+++ b/src/python/test/test_subsampling.py
@@ -141,12 +141,16 @@ def test_simple_sparsify_points():
# assert gudhi.sparsify_point_set(points = [], min_squared_dist = 0.0) == []
# assert gudhi.sparsify_point_set(points = [], min_squared_dist = 10.0) == []
assert gudhi.sparsify_point_set(points=point_set, min_squared_dist=0.0) == point_set
- assert gudhi.sparsify_point_set(points=point_set, min_squared_dist=1.0) == point_set
- assert gudhi.sparsify_point_set(points=point_set, min_squared_dist=2.0) == [
+ assert gudhi.sparsify_point_set(points=point_set, min_squared_dist=0.999) == point_set
+ assert gudhi.sparsify_point_set(points=point_set, min_squared_dist=1.001) == [
[0, 1],
[1, 0],
]
- assert gudhi.sparsify_point_set(points=point_set, min_squared_dist=2.01) == [[0, 1]]
+ assert gudhi.sparsify_point_set(points=point_set, min_squared_dist=1.999) == [
+ [0, 1],
+ [1, 0],
+ ]
+ assert gudhi.sparsify_point_set(points=point_set, min_squared_dist=2.001) == [[0, 1]]
assert (
len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=0.0))
@@ -157,11 +161,11 @@ def test_simple_sparsify_points():
== 5
)
assert (
- len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=40.0))
+ len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=40.1))
== 4
)
assert (
- len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=90.0))
+ len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=89.9))
== 3
)
assert (
@@ -169,7 +173,7 @@ def test_simple_sparsify_points():
== 2
)
assert (
- len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=325.0))
+ len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=324.9))
== 2
)
assert (