summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVincent Rouvreau <vincent.rouvreau@inria.fr>2022-08-18 16:36:25 +0200
committerVincent Rouvreau <vincent.rouvreau@inria.fr>2022-08-18 16:36:25 +0200
commitb1280505d965aa752f882fca61d6abd91e95a9d7 (patch)
tree2cc0660a396fbb67c367fa4e88eb6d650e3afa7b
parentced97241ae95483e253469ee94d6b8968066a7c7 (diff)
parentfc8290b3ff3236ce88a5e6761dee3242ee1376a0 (diff)
Merge branch 'master' into ninja_for_osx_and_windows
-rw-r--r--.github/next_release.md21
-rw-r--r--.github/workflows/pip-packaging-windows.yml2
-rw-r--r--CMakeGUDHIVersion.txt2
-rw-r--r--src/Cech_complex/benchmark/cech_complex_benchmark.cpp4
-rw-r--r--src/Cech_complex/include/gudhi/Cech_complex.h19
-rw-r--r--src/Cech_complex/include/gudhi/Cech_complex_blocker.h42
-rw-r--r--src/Cech_complex/include/gudhi/Sphere_circumradius.h17
-rw-r--r--src/Contraction/include/gudhi/Skeleton_blocker_contractor.h21
-rw-r--r--src/Doxyfile.in7
-rw-r--r--src/python/CMakeLists.txt11
-rw-r--r--src/python/doc/cubical_complex_sklearn_itf_ref.rst102
-rw-r--r--src/python/doc/cubical_complex_sum.inc33
-rw-r--r--src/python/doc/cubical_complex_user.rst11
-rw-r--r--src/python/doc/differentiation_sum.inc4
-rw-r--r--src/python/doc/img/sklearn.pngbin0 -> 9368 bytes
-rw-r--r--src/python/doc/img/tensorflow.pngbin0 -> 3846 bytes
-rw-r--r--src/python/doc/installation.rst2
-rw-r--r--src/python/doc/persistent_cohomology_user.rst29
-rw-r--r--src/python/doc/rips_complex_sum.inc6
-rw-r--r--src/python/doc/rips_complex_user.rst8
-rw-r--r--src/python/doc/simplex_tree_sum.inc6
-rw-r--r--src/python/gudhi/representations/preprocessing.py57
-rw-r--r--src/python/gudhi/representations/vector_methods.py18
-rw-r--r--src/python/gudhi/simplex_tree.pyx6
-rw-r--r--src/python/gudhi/sklearn/__init__.py0
-rw-r--r--src/python/gudhi/sklearn/cubical_persistence.py110
-rwxr-xr-xsrc/python/test/test_dtm.py16
-rwxr-xr-xsrc/python/test/test_representations.py21
-rw-r--r--src/python/test/test_representations_preprocessing.py39
-rw-r--r--src/python/test/test_sklearn_cubical_persistence.py59
30 files changed, 538 insertions, 135 deletions
diff --git a/.github/next_release.md b/.github/next_release.md
index f8085513..64bda353 100644
--- a/.github/next_release.md
+++ b/.github/next_release.md
@@ -1,30 +1,19 @@
-We are pleased to announce the release 3.X.X of the GUDHI library.
+We are pleased to announce the release 3.7.0 of the GUDHI library.
As a major new feature, the GUDHI library now offers ...
We are now using GitHub to develop the GUDHI library, do not hesitate to [fork the GUDHI project on GitHub](https://github.com/GUDHI/gudhi-devel). From a user point of view, we recommend to download GUDHI user version (gudhi.3.X.X.tar.gz).
-Below is a list of changes made since GUDHI 3.5.0:
+Below is a list of changes made since GUDHI 3.6.0:
-- [Alpha complex](https://gudhi.inria.fr/python/latest/alpha_complex_user.html)
- - the python weighted version for alpha complex is now available in any dimension D.
- - `alpha_complex = gudhi.AlphaComplex(off_file='/data/points/tore3D_300.off')` is deprecated, please use [read_points_from_off_file](https://gudhi.inria.fr/python/latest/point_cloud.html#gudhi.read_points_from_off_file) instead.
-
-- [Representations](https://gudhi.inria.fr/python/latest/representations.html#gudhi.representations.vector_methods.BettiCurve)
- - A more flexible Betti curve class capable of computing exact curves
-
-- [Simplex tree](https://gudhi.inria.fr/python/latest/simplex_tree_ref.html)
- - `__deepcopy__`, `copy` and copy constructors
-
-- Installation
- - Boost &ge; 1.66.0 is now required (was &ge; 1.56.0).
- - Python >= 3.5 and cython >= 0.27 are now required.
+- [Module](link)
+ - ...
- [Module](link)
- ...
- Miscellaneous
- - The [list of bugs that were solved since GUDHI-3.5.0](https://github.com/GUDHI/gudhi-devel/issues?q=label%3A3.6.0+is%3Aclosed) is available on GitHub.
+ - The [list of bugs that were solved since GUDHI-3.6.0](https://github.com/GUDHI/gudhi-devel/issues?q=label%3A3.7.0+is%3Aclosed) is available on GitHub.
All modules are distributed under the terms of the MIT license.
However, there are still GPL dependencies for many modules. We invite you to check our [license dedicated web page](https://gudhi.inria.fr/licensing/) for further details.
diff --git a/.github/workflows/pip-packaging-windows.yml b/.github/workflows/pip-packaging-windows.yml
index 142a114c..48a98036 100644
--- a/.github/workflows/pip-packaging-windows.yml
+++ b/.github/workflows/pip-packaging-windows.yml
@@ -10,7 +10,7 @@ jobs:
strategy:
max-parallel: 4
matrix:
- python-version: ['3.6', '3.7', '3.8', '3.9', '3.10']
+ python-version: ['3.7', '3.8', '3.9', '3.10']
name: Build wheels for Python ${{ matrix.python-version }}
steps:
- uses: actions/checkout@v1
diff --git a/CMakeGUDHIVersion.txt b/CMakeGUDHIVersion.txt
index b7f93799..1dab47ab 100644
--- a/CMakeGUDHIVersion.txt
+++ b/CMakeGUDHIVersion.txt
@@ -1,6 +1,6 @@
# Must be conform to pep440 - https://www.python.org/dev/peps/pep-0440/#pre-releases
set (GUDHI_MAJOR_VERSION 3)
-set (GUDHI_MINOR_VERSION 6)
+set (GUDHI_MINOR_VERSION 7)
# GUDHI_PATCH_VERSION can be 'ZaN' for Alpha release, 'ZbN' for Beta release, 'ZrcN' for release candidate or 'Z' for a final release.
set (GUDHI_PATCH_VERSION 0a0)
set(GUDHI_VERSION ${GUDHI_MAJOR_VERSION}.${GUDHI_MINOR_VERSION}.${GUDHI_PATCH_VERSION})
diff --git a/src/Cech_complex/benchmark/cech_complex_benchmark.cpp b/src/Cech_complex/benchmark/cech_complex_benchmark.cpp
index a9dc5d0d..a0e727be 100644
--- a/src/Cech_complex/benchmark/cech_complex_benchmark.cpp
+++ b/src/Cech_complex/benchmark/cech_complex_benchmark.cpp
@@ -40,9 +40,9 @@ Simplex_tree benchmark_cech(const std::string& off_file_points, const Filtration
Points_off_reader_cgal off_reader_cgal(off_file_points);
Gudhi::Clock cech_clock("Cech computation");
- Cech_complex cech_complex_from_points(off_reader_cgal.get_point_cloud(), radius);
+ Cech_complex cech_complex_from_points(off_reader_cgal.get_point_cloud(), radius, exact);
Simplex_tree cech_stree;
- cech_complex_from_points.create_complex(cech_stree, dim_max, exact);
+ cech_complex_from_points.create_complex(cech_stree, dim_max);
// ------------------------------------------
// Display information about the Cech complex
diff --git a/src/Cech_complex/include/gudhi/Cech_complex.h b/src/Cech_complex/include/gudhi/Cech_complex.h
index 08b7a72f..625f7c9c 100644
--- a/src/Cech_complex/include/gudhi/Cech_complex.h
+++ b/src/Cech_complex/include/gudhi/Cech_complex.h
@@ -62,15 +62,18 @@ class Cech_complex {
*
* @param[in] points Range of points where each point is defined as `kernel::Point_d`.
* @param[in] max_radius Maximal radius value.
+ * @param[in] exact Exact filtration values computation. Not exact if `Kernel` is not <a target="_blank"
+ * href="https://doc.cgal.org/latest/Kernel_d/structCGAL_1_1Epeck__d.html">CGAL::Epeck_d</a>.
+ * Default is false.
*
*/
template<typename InputPointRange >
- Cech_complex(const InputPointRange & points, Filtration_value max_radius) : max_radius_(max_radius) {
+ Cech_complex(const InputPointRange & points, Filtration_value max_radius, const bool exact = false) : max_radius_(max_radius), exact_(exact) {
point_cloud_.assign(std::begin(points), std::end(points));
cech_skeleton_graph_ = Gudhi::compute_proximity_graph<SimplicialComplexForCechComplex>(
- point_cloud_, max_radius_, Sphere_circumradius<Kernel, Filtration_value>());
+ point_cloud_, max_radius_, Sphere_circumradius<Kernel, Filtration_value>(exact));
}
/** \brief Initializes the simplicial complex from the proximity graph and expands it until a given maximal
@@ -78,19 +81,17 @@ class Cech_complex {
*
* @param[in] complex SimplicialComplexForCech to be created.
* @param[in] dim_max graph expansion until this given maximal dimension.
- * @param[in] exact Exact filtration values computation. Not exact if `Kernel` is not <a target="_blank"
- * href="https://doc.cgal.org/latest/Kernel_d/structCGAL_1_1Epeck__d.html">CGAL::Epeck_d</a>.
* @exception std::invalid_argument In debug mode, if `complex.num_vertices()` does not return 0.
*
*/
- void create_complex(SimplicialComplexForCechComplex& complex, int dim_max, const bool exact = false) {
+ void create_complex(SimplicialComplexForCechComplex& complex, int dim_max) {
GUDHI_CHECK(complex.num_vertices() == 0,
std::invalid_argument("Cech_complex::create_complex - simplicial complex is not empty"));
// insert the proximity graph in the simplicial complex
complex.insert_graph(cech_skeleton_graph_);
// expand the graph until dimension dim_max
- complex.expansion_with_blockers(dim_max, cech_blocker(&complex, this, exact));
+ complex.expansion_with_blockers(dim_max, cech_blocker(&complex, this));
}
/** @return max_radius value given at construction. */
@@ -106,11 +107,17 @@ class Cech_complex {
*/
std::vector<Sphere> & get_cache() { return cache_; }
+ /** \brief Check exact option
+ * @return Exact option.
+ */
+ const bool is_exact() { return exact_; }
+
private:
Proximity_graph cech_skeleton_graph_;
Filtration_value max_radius_;
Point_cloud point_cloud_;
std::vector<Sphere> cache_;
+ const bool exact_;
};
} // namespace cech_complex
diff --git a/src/Cech_complex/include/gudhi/Cech_complex_blocker.h b/src/Cech_complex/include/gudhi/Cech_complex_blocker.h
index 1bb205b3..e78e37b7 100644
--- a/src/Cech_complex/include/gudhi/Cech_complex_blocker.h
+++ b/src/Cech_complex/include/gudhi/Cech_complex_blocker.h
@@ -54,6 +54,7 @@ class Cech_blocker {
using Simplex_handle = typename SimplicialComplexForCech::Simplex_handle;
using Filtration_value = typename SimplicialComplexForCech::Filtration_value;
+ using Simplex_key = typename SimplicialComplexForCech::Simplex_key;
template<class PointIterator>
Sphere get_sphere(PointIterator begin, PointIterator end) const {
@@ -70,57 +71,54 @@ class Cech_blocker {
* \return true if the simplex radius is greater than the Cech_complex max_radius*/
bool operator()(Simplex_handle sh) {
using Point_cloud = std::vector<Point_d>;
- CGAL::NT_converter<FT, Filtration_value> cast_to_fv;
Filtration_value radius = 0;
bool is_min_enclos_ball = false;
+ Point_cloud points;
+ points.reserve(sc_ptr_->dimension(sh)+1);
// for each face of simplex sh, test outsider point is indeed inside enclosing ball, if yes, take it and exit loop, otherwise, new sphere is circumsphere of all vertices
for (auto face_opposite_vertex : sc_ptr_->boundary_opposite_vertex_simplex_range(sh)) {
- Sphere sph;
auto k = sc_ptr_->key(face_opposite_vertex.first);
+ Simplex_key sph_key;
if(k != sc_ptr_->null_key()) {
- sph = cc_ptr_->get_cache().at(k);
+ sph_key = k;
}
else {
- Point_cloud face_points;
for (auto vertex : sc_ptr_->simplex_vertex_range(face_opposite_vertex.first)) {
- face_points.push_back(cc_ptr_->get_point(vertex));
+ points.push_back(cc_ptr_->get_point(vertex));
#ifdef DEBUG_TRACES
std::clog << "#(" << vertex << ")#";
#endif // DEBUG_TRACES
}
- sph = get_sphere(face_points.cbegin(), face_points.cend());
// Put edge sphere in cache
- sc_ptr_->assign_key(face_opposite_vertex.first, cc_ptr_->get_cache().size());
- cc_ptr_->get_cache().push_back(sph);
- // Clear face_points
- face_points.clear();
+ sph_key = cc_ptr_->get_cache().size();
+ sc_ptr_->assign_key(face_opposite_vertex.first, sph_key);
+ cc_ptr_->get_cache().push_back(get_sphere(points.cbegin(), points.cend()));
+ // Clear face points
+ points.clear();
}
// Check if the minimal enclosing ball of current face contains the extra point/opposite vertex
+ Sphere const& sph = cc_ptr_->get_cache()[sph_key];
if (kernel_.squared_distance_d_object()(sph.first, cc_ptr_->get_point(face_opposite_vertex.second)) <= sph.second) {
+ is_min_enclos_ball = true;
+ sc_ptr_->assign_key(sh, sph_key);
+ radius = sc_ptr_->filtration(face_opposite_vertex.first);
#ifdef DEBUG_TRACES
std::clog << "center: " << sph.first << ", radius: " << radius << std::endl;
#endif // DEBUG_TRACES
- is_min_enclos_ball = true;
-#if CGAL_VERSION_NR >= 1050000000
- if(exact_) CGAL::exact(sph.second);
-#endif
- radius = std::sqrt(cast_to_fv(sph.second));
- sc_ptr_->assign_key(sh, cc_ptr_->get_cache().size());
- cc_ptr_->get_cache().push_back(sph);
break;
}
}
// Spheres of each face don't contain the whole simplex
if(!is_min_enclos_ball) {
- Point_cloud points;
for (auto vertex : sc_ptr_->simplex_vertex_range(sh)) {
points.push_back(cc_ptr_->get_point(vertex));
}
Sphere sph = get_sphere(points.cbegin(), points.cend());
#if CGAL_VERSION_NR >= 1050000000
- if(exact_) CGAL::exact(sph.second);
+ if(cc_ptr_->is_exact()) CGAL::exact(sph.second);
#endif
+ CGAL::NT_converter<FT, Filtration_value> cast_to_fv;
radius = std::sqrt(cast_to_fv(sph.second));
sc_ptr_->assign_key(sh, cc_ptr_->get_cache().size());
@@ -130,18 +128,18 @@ class Cech_blocker {
#ifdef DEBUG_TRACES
if (radius > cc_ptr_->max_radius()) std::clog << "radius > max_radius => expansion is blocked\n";
#endif // DEBUG_TRACES
- sc_ptr_->assign_filtration(sh, radius);
+ // Check that the filtration to be assigned (radius) would be valid
+ if (radius > sc_ptr_->filtration(sh)) sc_ptr_->assign_filtration(sh, radius);
return (radius > cc_ptr_->max_radius());
}
/** \internal \brief Čech complex blocker constructor. */
- Cech_blocker(SimplicialComplexForCech* sc_ptr, Cech_complex* cc_ptr, const bool exact) : sc_ptr_(sc_ptr), cc_ptr_(cc_ptr), exact_(exact) {}
+ Cech_blocker(SimplicialComplexForCech* sc_ptr, Cech_complex* cc_ptr) : sc_ptr_(sc_ptr), cc_ptr_(cc_ptr) {}
private:
SimplicialComplexForCech* sc_ptr_;
Cech_complex* cc_ptr_;
Kernel kernel_;
- const bool exact_;
};
} // namespace cech_complex
diff --git a/src/Cech_complex/include/gudhi/Sphere_circumradius.h b/src/Cech_complex/include/gudhi/Sphere_circumradius.h
index 790f6950..2f916c0a 100644
--- a/src/Cech_complex/include/gudhi/Sphere_circumradius.h
+++ b/src/Cech_complex/include/gudhi/Sphere_circumradius.h
@@ -12,6 +12,7 @@
#define SPHERE_CIRCUMRADIUS_H_
#include <CGAL/Epick_d.h> // for #include <CGAL/NT_converter.h> which is not working/compiling alone
+#include <CGAL/Lazy_exact_nt.h> // for CGAL::exact
#include <cmath> // for std::sqrt
#include <vector>
@@ -26,6 +27,7 @@ template<typename Kernel, typename Filtration_value>
class Sphere_circumradius {
private:
Kernel kernel_;
+ const bool exact_;
public:
using FT = typename Kernel::FT;
using Point = typename Kernel::Point_d;
@@ -42,7 +44,9 @@ class Sphere_circumradius {
*
*/
Filtration_value operator()(const Point& point_1, const Point& point_2) const {
- return std::sqrt(cast_to_fv(kernel_.squared_distance_d_object()(point_1, point_2))) / 2.;
+ auto squared_dist_obj = kernel_.squared_distance_d_object()(point_1, point_2);
+ if(exact_) CGAL::exact(squared_dist_obj);
+ return std::sqrt(cast_to_fv(squared_dist_obj)) / 2.;
}
/** \brief Circumradius of sphere passing through point cloud using CGAL.
@@ -53,9 +57,18 @@ class Sphere_circumradius {
*
*/
Filtration_value operator()(const Point_cloud& point_cloud) const {
- return std::sqrt(cast_to_fv(kernel_.compute_squared_radius_d_object()(point_cloud.begin(), point_cloud.end())));
+ auto squared_radius_obj = kernel_.compute_squared_radius_d_object()(point_cloud.begin(), point_cloud.end());
+ if(exact_) CGAL::exact(squared_radius_obj);
+ return std::sqrt(cast_to_fv(squared_radius_obj));
}
+ /** \brief Constructor
+ * @param[in] exact Option for exact filtration values computation. Not exact if `Kernel` is not <a target="_blank"
+ * href="https://doc.cgal.org/latest/Kernel_d/structCGAL_1_1Epeck__d.html">CGAL::Epeck_d</a>.
+ * Default is false.
+ */
+ Sphere_circumradius(const bool exact = false) : exact_(exact) {}
+
};
} // namespace cech_complex
diff --git a/src/Contraction/include/gudhi/Skeleton_blocker_contractor.h b/src/Contraction/include/gudhi/Skeleton_blocker_contractor.h
index 56b76318..6911ca2e 100644
--- a/src/Contraction/include/gudhi/Skeleton_blocker_contractor.h
+++ b/src/Contraction/include/gudhi/Skeleton_blocker_contractor.h
@@ -171,8 +171,13 @@ typename GeometricSimplifiableComplex::Vertex_handle> {
Self const* algorithm_;
};
+#if CGAL_VERSION_NR < 1050500000
typedef CGAL::Modifiable_priority_queue<Edge_handle, Compare_cost, Undirected_edge_id> PQ;
- typedef typename PQ::handle pq_handle;
+#else
+ typedef CGAL::Modifiable_priority_queue<Edge_handle, Compare_cost, Undirected_edge_id, CGAL::CGAL_BOOST_PENDING_RELAXED_HEAP> PQ;
+#endif
+
+ typedef bool pq_handle;
// An Edge_data is associated with EVERY edge in the complex (collapsible or not).
@@ -196,7 +201,7 @@ typename GeometricSimplifiableComplex::Vertex_handle> {
}
bool is_in_PQ() const {
- return PQHandle_ != PQ::null_handle();
+ return PQHandle_ != false;
}
void set_PQ_handle(pq_handle h) {
@@ -204,7 +209,7 @@ typename GeometricSimplifiableComplex::Vertex_handle> {
}
void reset_PQ_handle() {
- PQHandle_ = PQ::null_handle();
+ PQHandle_ = false;
}
private:
@@ -238,16 +243,22 @@ typename GeometricSimplifiableComplex::Vertex_handle> {
}
void insert_in_PQ(Edge_handle edge, Edge_data& data) {
- data.set_PQ_handle(heap_PQ_->push(edge));
+ heap_PQ_->push(edge);
+ data.set_PQ_handle(true);
++current_num_edges_heap_;
}
void update_in_PQ(Edge_handle edge, Edge_data& data) {
+#if CGAL_VERSION_NR < 1050500000
data.set_PQ_handle(heap_PQ_->update(edge, data.PQ_handle()));
+#else
+ heap_PQ_->update(edge);
+#endif
}
void remove_from_PQ(Edge_handle edge, Edge_data& data) {
- data.set_PQ_handle(heap_PQ_->erase(edge, data.PQ_handle()));
+ heap_PQ_->erase(edge);
+ data.set_PQ_handle(false);
--current_num_edges_heap_;
}
diff --git a/src/Doxyfile.in b/src/Doxyfile.in
index fb68ceb1..6e0e0333 100644
--- a/src/Doxyfile.in
+++ b/src/Doxyfile.in
@@ -152,7 +152,7 @@ FULL_PATH_NAMES = YES
# will be relative from the directory where doxygen is started.
# This tag requires that the tag FULL_PATH_NAMES is set to YES.
-STRIP_FROM_PATH =
+STRIP_FROM_PATH = @CMAKE_SOURCE_DIR@
# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the
# path mentioned in the documentation of a class, which tells the reader which
@@ -162,7 +162,8 @@ STRIP_FROM_PATH =
# using the -I flag.
STRIP_FROM_INC_PATH = include \
- concept
+ concept \
+ @CMAKE_SOURCE_DIR@
# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but
# less readable) file names. This can be useful is your file systems doesn't
@@ -832,7 +833,7 @@ EXCLUDE = @CMAKE_SOURCE_DIR@/data/ \
@CMAKE_SOURCE_DIR@/ext/ \
@CMAKE_SOURCE_DIR@/README.md \
@CMAKE_SOURCE_DIR@/.github \
- @CMAKE_CURRENT_BINARY_DIR@/new_gudhi_version_creation.md \
+ @CMAKE_CURRENT_BINARY_DIR@ \
@GUDHI_DOXYGEN_SOURCE_PREFIX@/GudhUI/ \
@GUDHI_DOXYGEN_SOURCE_PREFIX@/cmake/ \
@GUDHI_DOXYGEN_SOURCE_PREFIX@/python/
diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt
index 65adef75..5f323935 100644
--- a/src/python/CMakeLists.txt
+++ b/src/python/CMakeLists.txt
@@ -298,6 +298,7 @@ if(PYTHONINTERP_FOUND)
file(COPY "gudhi/dtm_rips_complex.py" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi")
file(COPY "gudhi/hera/__init__.py" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi/hera")
file(COPY "gudhi/datasets" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi" FILES_MATCHING PATTERN "*.py")
+ file(COPY "gudhi/sklearn" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi/")
# Some files for pip package
@@ -574,6 +575,11 @@ if(PYTHONINTERP_FOUND)
add_gudhi_py_test(test_betti_curve_representations)
endif()
+ # Representations preprocessing
+ if(SKLEARN_FOUND)
+ add_gudhi_py_test(test_representations_preprocessing)
+ endif()
+
# Time Delay
add_gudhi_py_test(test_time_delay)
@@ -603,6 +609,11 @@ if(PYTHONINTERP_FOUND)
add_gudhi_py_test(test_remote_datasets)
endif()
+ # sklearn
+ if(SKLEARN_FOUND)
+ add_gudhi_py_test(test_sklearn_cubical_persistence)
+ endif()
+
# persistence graphical tools
if(MATPLOTLIB_FOUND)
add_gudhi_py_test(test_persistence_graphical_tools)
diff --git a/src/python/doc/cubical_complex_sklearn_itf_ref.rst b/src/python/doc/cubical_complex_sklearn_itf_ref.rst
new file mode 100644
index 00000000..2fb8ec6a
--- /dev/null
+++ b/src/python/doc/cubical_complex_sklearn_itf_ref.rst
@@ -0,0 +1,102 @@
+:orphan:
+
+.. To get rid of WARNING: document isn't included in any toctree
+
+Cubical complex persistence scikit-learn like interface
+#######################################################
+
+.. list-table::
+ :width: 100%
+ :header-rows: 0
+
+ * - :Since: GUDHI 3.6.0
+ - :License: MIT
+ - :Requires: `Scikit-learn <installation.html#scikit-learn>`_
+
+Cubical complex persistence scikit-learn like interface example
+---------------------------------------------------------------
+
+In this example, hand written digits are used as an input.
+a TDA scikit-learn pipeline is constructed and is composed of:
+
+#. :class:`~gudhi.sklearn.cubical_persistence.CubicalPersistence` that builds a cubical complex from the inputs and
+ returns its persistence diagrams
+#. :class:`~gudhi.representations.preprocessing.DiagramSelector` that removes non-finite persistence diagrams values
+#. :class:`~gudhi.representations.vector_methods.PersistenceImage` that builds the persistence images from persistence diagrams
+#. `SVC <https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html>`_ which is a scikit-learn support
+ vector classifier.
+
+This ML pipeline is trained to detect if the hand written digit is an '8' or not, thanks to the fact that an '8' has
+two holes in :math:`\mathbf{H}_1`, or, like in this example, three connected components in :math:`\mathbf{H}_0`.
+
+.. code-block:: python
+
+ # Standard scientific Python imports
+ import numpy as np
+
+ # Standard scikit-learn imports
+ from sklearn.datasets import fetch_openml
+ from sklearn.pipeline import Pipeline
+ from sklearn.model_selection import train_test_split
+ from sklearn.svm import SVC
+ from sklearn import metrics
+
+ # Import TDA pipeline requirements
+ from gudhi.sklearn.cubical_persistence import CubicalPersistence
+ from gudhi.representations import PersistenceImage, DiagramSelector
+
+ X, y = fetch_openml("mnist_784", version=1, return_X_y=True, as_frame=False)
+
+ # Target is: "is an eight ?"
+ y = (y == "8") * 1
+ print("There are", np.sum(y), "eights out of", len(y), "numbers.")
+
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
+ pipe = Pipeline(
+ [
+ ("cub_pers", CubicalPersistence(homology_dimensions=0, newshape=[28, 28], n_jobs=-2)),
+ # Or for multiple persistence dimension computation
+ # ("cub_pers", CubicalPersistence(homology_dimensions=[0, 1], newshape=[28, 28], n_jobs=-2)),
+ # ("H0_diags", DimensionSelector(index=0), # where index is the index in homology_dimensions array
+ ("finite_diags", DiagramSelector(use=True, point_type="finite")),
+ (
+ "pers_img",
+ PersistenceImage(bandwidth=50, weight=lambda x: x[1] ** 2, im_range=[0, 256, 0, 256], resolution=[20, 20]),
+ ),
+ ("svc", SVC()),
+ ]
+ )
+
+ # Learn from the train subset
+ pipe.fit(X_train, y_train)
+ # Predict from the test subset
+ predicted = pipe.predict(X_test)
+
+ print(f"Classification report for TDA pipeline {pipe}:\n" f"{metrics.classification_report(y_test, predicted)}\n")
+
+.. code-block:: none
+
+ There are 6825 eights out of 70000 numbers.
+ Classification report for TDA pipeline Pipeline(steps=[('cub_pers',
+ CubicalPersistence(newshape=[28, 28], n_jobs=-2)),
+ ('finite_diags', DiagramSelector(use=True)),
+ ('pers_img',
+ PersistenceImage(bandwidth=50, im_range=[0, 256, 0, 256],
+ weight=<function <lambda> at 0x7f3e54137ae8>)),
+ ('svc', SVC())]):
+ precision recall f1-score support
+
+ 0 0.97 0.99 0.98 25284
+ 1 0.92 0.68 0.78 2716
+
+ accuracy 0.96 28000
+ macro avg 0.94 0.84 0.88 28000
+ weighted avg 0.96 0.96 0.96 28000
+
+Cubical complex persistence scikit-learn like interface reference
+-----------------------------------------------------------------
+
+.. autoclass:: gudhi.sklearn.cubical_persistence.CubicalPersistence
+ :members:
+ :special-members: __init__
+ :show-inheritance: \ No newline at end of file
diff --git a/src/python/doc/cubical_complex_sum.inc b/src/python/doc/cubical_complex_sum.inc
index 90ec9fc2..b27843e5 100644
--- a/src/python/doc/cubical_complex_sum.inc
+++ b/src/python/doc/cubical_complex_sum.inc
@@ -1,17 +1,22 @@
.. table::
:widths: 30 40 30
- +--------------------------------------------------------------------------+----------------------------------------------------------------------+---------------------------------------------------------+
- | .. figure:: | The cubical complex represents a grid as a cell complex with | :Author: Pawel Dlotko |
- | ../../doc/Bitmap_cubical_complex/Cubical_complex_representation.png | cells of all dimensions. | |
- | :alt: Cubical complex representation | | :Since: GUDHI 2.0.0 |
- | :figclass: align-center | | |
- | | | :License: MIT |
- | | | |
- +--------------------------------------------------------------------------+----------------------------------------------------------------------+---------------------------------------------------------+
- | * :doc:`cubical_complex_user` | * :doc:`cubical_complex_ref` |
- | | * :doc:`periodic_cubical_complex_ref` |
- +--------------------------------------------------------------------------+----------------------------------------------------------------------+---------------------------------------------------------+
- | | * :doc:`cubical_complex_tflow_itf_ref` | :requires: `TensorFlow <installation.html#tensorflow>`_ |
- | | | |
- +--------------------------------------------------------------------------+----------------------------------------------------------------------+---------------------------------------------------------+
+ +--------------------------------------------------------------------------+--------------------------------------------------------------+-------------------------------------------------------------+
+ | .. figure:: | The cubical complex represents a grid as a cell complex with | :Author: Pawel Dlotko |
+ | ../../doc/Bitmap_cubical_complex/Cubical_complex_representation.png | cells of all dimensions. | :Since: GUDHI 2.0.0 |
+ | :alt: Cubical complex representation | | :License: MIT |
+ | :figclass: align-center | | |
+ +--------------------------------------------------------------------------+--------------------------------------------------------------+-------------------------------------------------------------+
+ | * :doc:`cubical_complex_user` | * :doc:`cubical_complex_ref` |
+ | | * :doc:`periodic_cubical_complex_ref` |
+ +--------------------------------------------------------------------------+--------------------------------------------------------------+-------------------------------------------------------------+
+ | .. image:: | * :doc:`cubical_complex_tflow_itf_ref` | :requires: `TensorFlow <installation.html#tensorflow>`_ |
+ | img/tensorflow.png | | |
+ | :target: https://www.tensorflow.org | | |
+ | :height: 30 | | |
+ +--------------------------------------------------------------------------+--------------------------------------------------------------+-------------------------------------------------------------+
+ | .. image:: | * :doc:`cubical_complex_sklearn_itf_ref` | :Requires: `Scikit-learn <installation.html#scikit-learn>`_ |
+ | img/sklearn.png | | |
+ | :target: https://scikit-learn.org | | |
+ | :height: 30 | | |
+ +--------------------------------------------------------------------------+--------------------------------------------------------------+-------------------------------------------------------------+
diff --git a/src/python/doc/cubical_complex_user.rst b/src/python/doc/cubical_complex_user.rst
index 6a211347..42a23875 100644
--- a/src/python/doc/cubical_complex_user.rst
+++ b/src/python/doc/cubical_complex_user.rst
@@ -7,14 +7,7 @@ Cubical complex user manual
Definition
----------
-===================================== ===================================== =====================================
-:Author: Pawel Dlotko :Since: GUDHI PYTHON 2.0.0 :License: GPL v3
-===================================== ===================================== =====================================
-
-+---------------------------------------------+----------------------------------------------------------------------+
-| :doc:`cubical_complex_user` | * :doc:`cubical_complex_ref` |
-| | * :doc:`periodic_cubical_complex_ref` |
-+---------------------------------------------+----------------------------------------------------------------------+
+.. include:: cubical_complex_sum.inc
The cubical complex is an example of a structured complex useful in computational mathematics (specially rigorous
numerics) and image analysis.
@@ -163,4 +156,4 @@ Tutorial
--------
This `notebook <https://github.com/GUDHI/TDA-tutorial/blob/master/Tuto-GUDHI-cubical-complexes.ipynb>`_
-explains how to represent sublevels sets of functions using cubical complexes. \ No newline at end of file
+explains how to represent sublevels sets of functions using cubical complexes.
diff --git a/src/python/doc/differentiation_sum.inc b/src/python/doc/differentiation_sum.inc
index 3aec33df..140cf180 100644
--- a/src/python/doc/differentiation_sum.inc
+++ b/src/python/doc/differentiation_sum.inc
@@ -1,8 +1,8 @@
.. list-table::
- :widths: 40 30 30
+ :width: 100%
:header-rows: 0
- * - :Since: GUDHI 3.5.0
+ * - :Since: GUDHI 3.6.0
- :License: MIT
- :Requires: `TensorFlow <installation.html#tensorflow>`_
diff --git a/src/python/doc/img/sklearn.png b/src/python/doc/img/sklearn.png
new file mode 100644
index 00000000..d1fecbbf
--- /dev/null
+++ b/src/python/doc/img/sklearn.png
Binary files differ
diff --git a/src/python/doc/img/tensorflow.png b/src/python/doc/img/tensorflow.png
new file mode 100644
index 00000000..a75f3f5b
--- /dev/null
+++ b/src/python/doc/img/tensorflow.png
Binary files differ
diff --git a/src/python/doc/installation.rst b/src/python/doc/installation.rst
index dd476054..4eefd415 100644
--- a/src/python/doc/installation.rst
+++ b/src/python/doc/installation.rst
@@ -175,7 +175,7 @@ A complete configuration would be :
Scikit-learn version 1.0.1
POT version 0.8.0
HNSWlib found
- PyKeOps version [pyKeOps]: 1.5
+ PyKeOps version [pyKeOps]: 2.1
EagerPy version 0.30.0
TensorFlow version 2.7.0
Sphinx version 4.3.0
diff --git a/src/python/doc/persistent_cohomology_user.rst b/src/python/doc/persistent_cohomology_user.rst
index a3f294b2..39744b95 100644
--- a/src/python/doc/persistent_cohomology_user.rst
+++ b/src/python/doc/persistent_cohomology_user.rst
@@ -6,19 +6,24 @@ Persistent cohomology user manual
=================================
Definition
----------
-===================================== ===================================== =====================================
-:Author: Clément Maria :Since: GUDHI PYTHON 2.0.0 :License: GPL v3
-===================================== ===================================== =====================================
-
-+-----------------------------------------------------------------+-----------------------------------------------------------------------+
-| :doc:`persistent_cohomology_user` | Please refer to each data structure that contains persistence |
-| | feature for reference: |
-| | |
-| | * :doc:`simplex_tree_ref` |
-| | * :doc:`cubical_complex_ref` |
-| | * :doc:`periodic_cubical_complex_ref` |
-+-----------------------------------------------------------------+-----------------------------------------------------------------------+
+.. list-table::
+ :width: 100%
+ :header-rows: 0
+
+ * - :Author: Clément Maria
+ - :Since: GUDHI 2.0.0
+ - :License: MIT
+
+.. list-table::
+ :width: 100%
+ :header-rows: 0
+
+ * - :doc:`persistent_cohomology_user`
+ - Please refer to each data structure that contains persistence feature for reference:
+ * :doc:`simplex_tree_ref`
+ * :doc:`cubical_complex_ref`
+ * :doc:`periodic_cubical_complex_ref`
Computation of persistent cohomology using the algorithm of :cite:`DBLP:journals/dcg/SilvaMV11` and
:cite:`DBLP:conf/compgeom/DeyFW14` and the Compressed Annotation Matrix implementation of
diff --git a/src/python/doc/rips_complex_sum.inc b/src/python/doc/rips_complex_sum.inc
index 6931ebee..2b125e54 100644
--- a/src/python/doc/rips_complex_sum.inc
+++ b/src/python/doc/rips_complex_sum.inc
@@ -12,6 +12,8 @@
+----------------------------------------------------------------+------------------------------------------------------------------------+----------------------------------------------------------------------------------+
| * :doc:`rips_complex_user` | * :doc:`rips_complex_ref` |
+----------------------------------------------------------------+------------------------------------------------------------------------+----------------------------------------------------------------------------------+
- | | * :doc:`rips_complex_tflow_itf_ref` | :requires: `TensorFlow <installation.html#tensorflow>`_ |
- | | | |
+ | .. image:: | * :doc:`rips_complex_tflow_itf_ref` | :requires: `TensorFlow <installation.html#tensorflow>`_ |
+ | img/tensorflow.png | | |
+ | :target: https://www.tensorflow.org | | |
+ | :height: 30 | | |
+----------------------------------------------------------------+------------------------------------------------------------------------+----------------------------------------------------------------------------------+
diff --git a/src/python/doc/rips_complex_user.rst b/src/python/doc/rips_complex_user.rst
index 27d218d4..c41a7803 100644
--- a/src/python/doc/rips_complex_user.rst
+++ b/src/python/doc/rips_complex_user.rst
@@ -7,13 +7,7 @@ Rips complex user manual
Definition
----------
-================================================================================ ================================ ======================
-:Authors: Clément Maria, Pawel Dlotko, Vincent Rouvreau, Marc Glisse, Yuichi Ike :Since: GUDHI 2.0.0 :License: GPL v3
-================================================================================ ================================ ======================
-
-+-------------------------------------------+----------------------------------------------------------------------+
-| :doc:`rips_complex_user` | :doc:`rips_complex_ref` |
-+-------------------------------------------+----------------------------------------------------------------------+
+.. include:: rips_complex_sum.inc
The `Rips complex <https://en.wikipedia.org/wiki/Vietoris%E2%80%93Rips_complex>`_ is a simplicial complex that
generalizes proximity (:math:`\varepsilon`-ball) graphs to higher dimensions. The vertices correspond to the input
diff --git a/src/python/doc/simplex_tree_sum.inc b/src/python/doc/simplex_tree_sum.inc
index 3ad1292c..6b534c9e 100644
--- a/src/python/doc/simplex_tree_sum.inc
+++ b/src/python/doc/simplex_tree_sum.inc
@@ -11,6 +11,8 @@
+----------------------------------------------------------------+------------------------------------------------------------------------+---------------------------------------------------------+
| * :doc:`simplex_tree_user` | * :doc:`simplex_tree_ref` |
+----------------------------------------------------------------+------------------------------------------------------------------------+---------------------------------------------------------+
- | | * :doc:`ls_simplex_tree_tflow_itf_ref` | :requires: `TensorFlow <installation.html#tensorflow>`_ |
- | | | |
+ | .. image:: | * :doc:`ls_simplex_tree_tflow_itf_ref` | :requires: `TensorFlow <installation.html#tensorflow>`_ |
+ | img/tensorflow.png | | |
+ | :target: https://www.tensorflow.org | | |
+ | :height: 30 | | |
+----------------------------------------------------------------+------------------------------------------------------------------------+---------------------------------------------------------+
diff --git a/src/python/gudhi/representations/preprocessing.py b/src/python/gudhi/representations/preprocessing.py
index a8545349..8722e162 100644
--- a/src/python/gudhi/representations/preprocessing.py
+++ b/src/python/gudhi/representations/preprocessing.py
@@ -1,10 +1,11 @@
# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
-# Author(s): Mathieu Carrière
+# Author(s): Mathieu Carrière, Vincent Rouvreau
#
# Copyright (C) 2018-2019 Inria
#
# Modification(s):
+# - 2021/10 Vincent Rouvreau: Add DimensionSelector
# - YYYY/MM Author: Description of the modification
import numpy as np
@@ -75,7 +76,7 @@ class Clamping(BaseEstimator, TransformerMixin):
Constructor for the Clamping class.
Parameters:
- limit (double): clamping value (default np.inf).
+ limit (float): clamping value (default np.inf).
"""
self.minimum = minimum
self.maximum = maximum
@@ -234,7 +235,7 @@ class ProminentPoints(BaseEstimator, TransformerMixin):
use (bool): whether to use the class or not (default False).
location (string): either "upper" or "lower" (default "upper"). Whether to keep the points that are far away ("upper") or close ("lower") to the diagonal.
num_pts (int): cardinality threshold (default 10). If location == "upper", keep the top **num_pts** points that are the farthest away from the diagonal. If location == "lower", keep the top **num_pts** points that are the closest to the diagonal.
- threshold (double): distance-to-diagonal threshold (default -1). If location == "upper", keep the points that are at least at a distance **threshold** from the diagonal. If location == "lower", keep the points that are at most at a distance **threshold** from the diagonal.
+ threshold (float): distance-to-diagonal threshold (default -1). If location == "upper", keep the points that are at least at a distance **threshold** from the diagonal. If location == "lower", keep the points that are at most at a distance **threshold** from the diagonal.
"""
self.num_pts = num_pts
self.threshold = threshold
@@ -317,7 +318,7 @@ class DiagramSelector(BaseEstimator, TransformerMixin):
Parameters:
use (bool): whether to use the class or not (default False).
- limit (double): second coordinate value that is the criterion for being an essential point (default numpy.inf).
+ limit (float): second coordinate value that is the criterion for being an essential point (default numpy.inf).
point_type (string): either "finite" or "essential". The type of the points that are going to be extracted.
"""
self.use, self.limit, self.point_type = use, limit, point_type
@@ -363,3 +364,51 @@ class DiagramSelector(BaseEstimator, TransformerMixin):
n x 2 numpy array: extracted persistence diagram.
"""
return self.fit_transform([diag])[0]
+
+
+# Mermaid sequence diagram - https://mermaid-js.github.io/mermaid-live-editor/
+# sequenceDiagram
+# USER->>DimensionSelector: fit_transform(<br/>[[array( Hi(X0) ), array( Hj(X0) ), ...],<br/> [array( Hi(X1) ), array( Hj(X1) ), ...],<br/> ...])
+# DimensionSelector->>thread1: _transform([array( Hi(X0) ), array( Hj(X0) )], ...)
+# DimensionSelector->>thread2: _transform([array( Hi(X1) ), array( Hj(X1) )], ...)
+# Note right of DimensionSelector: ...
+# thread1->>DimensionSelector: array( Hn(X0) )
+# thread2->>DimensionSelector: array( Hn(X1) )
+# Note right of DimensionSelector: ...
+# DimensionSelector->>USER: [array( Hn(X0) ), <br/> array( Hn(X1) ), <br/> ...]
+
+class DimensionSelector(BaseEstimator, TransformerMixin):
+ """
+ This is a class to select persistence diagrams in a specific dimension from its index.
+ """
+
+ def __init__(self, index=0):
+ """
+ Constructor for the DimensionSelector class.
+
+ Parameters:
+ index (int): The returned persistence diagrams dimension index. Default value is `0`.
+ """
+ self.index = index
+
+ def fit(self, X, Y=None):
+ """
+ Nothing to be done, but useful when included in a scikit-learn Pipeline.
+ """
+ return self
+
+ def transform(self, X, Y=None):
+ """
+ Select persistence diagrams from its dimension.
+
+ Parameters:
+ X (list of list of tuple): List of list of persistence pairs, i.e.
+ `[[array( Hi(X0) ), array( Hj(X0) ), ...], [array( Hi(X1) ), array( Hj(X1) ), ...], ...]`
+
+ Returns:
+ list of tuple:
+ Persistence diagrams in a specific dimension. i.e. if `index` was set to `m` and `Hn` is at index `m` of
+ the input, it returns `[array( Hn(X0) ), array( Hn(X1), ...]`
+ """
+
+ return [persistence[self.index] for persistence in X]
diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index f8078d03..69ff5e1e 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -508,26 +508,20 @@ class Entropy(BaseEstimator, TransformerMixin):
new_X = BirthPersistenceTransform().fit_transform(X)
for i in range(num_diag):
- orig_diagram, diagram, num_pts_in_diag = X[i], new_X[i], X[i].shape[0]
- try:
- new_diagram = DiagramScaler(use=True, scalers=[([1], MaxAbsScaler())]).fit_transform([diagram])[0]
- except ValueError:
- # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507
- assert len(diagram) == 0
- new_diagram = np.empty(shape = [0, 2])
-
+ orig_diagram, new_diagram, num_pts_in_diag = X[i], new_X[i], X[i].shape[0]
+
+ p = new_diagram[:,1]
+ p = p/np.sum(p)
if self.mode == "scalar":
- ent = - np.sum( np.multiply(new_diagram[:,1], np.log(new_diagram[:,1])) )
+ ent = -np.dot(p, np.log(p))
Xfit.append(np.array([[ent]]))
-
else:
ent = np.zeros(self.resolution)
for j in range(num_pts_in_diag):
[px,py] = orig_diagram[j,:2]
min_idx = np.clip(np.ceil((px - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
max_idx = np.clip(np.ceil((py - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
- for k in range(min_idx, max_idx):
- ent[k] += (-1) * new_diagram[j,1] * np.log(new_diagram[j,1])
+ ent[min_idx:max_idx]-=p[j]*np.log(p[j])
if self.normalized:
ent = ent / np.linalg.norm(ent, ord=1)
Xfit.append(np.reshape(ent,[1,-1]))
diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx
index 521a7763..05bfe22e 100644
--- a/src/python/gudhi/simplex_tree.pyx
+++ b/src/python/gudhi/simplex_tree.pyx
@@ -487,9 +487,9 @@ cdef class SimplexTree:
otherwise it is kept. The algorithm then proceeds with the next candidate.
.. warning::
- Several candidates of the same dimension may be inserted simultaneously before calling `block_simplex`, so
- if you examine the complex in `block_simplex`, you may hit a few simplices of the same dimension that have
- not been vetted by `block_simplex` yet, or have already been rejected but not yet removed.
+ Several candidates of the same dimension may be inserted simultaneously before calling `blocker_func`, so
+ if you examine the complex in `blocker_func`, you may hit a few simplices of the same dimension that have
+ not been vetted by `blocker_func` yet, or have already been rejected but not yet removed.
:param max_dim: Expansion maximal dimension value.
:type max_dim: int
diff --git a/src/python/gudhi/sklearn/__init__.py b/src/python/gudhi/sklearn/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/src/python/gudhi/sklearn/__init__.py
diff --git a/src/python/gudhi/sklearn/cubical_persistence.py b/src/python/gudhi/sklearn/cubical_persistence.py
new file mode 100644
index 00000000..672af278
--- /dev/null
+++ b/src/python/gudhi/sklearn/cubical_persistence.py
@@ -0,0 +1,110 @@
+# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+# Author(s): Vincent Rouvreau
+#
+# Copyright (C) 2021 Inria
+#
+# Modification(s):
+# - YYYY/MM Author: Description of the modification
+
+from .. import CubicalComplex
+from sklearn.base import BaseEstimator, TransformerMixin
+
+import numpy as np
+# joblib is required by scikit-learn
+from joblib import Parallel, delayed
+
+# Mermaid sequence diagram - https://mermaid-js.github.io/mermaid-live-editor/
+# sequenceDiagram
+# USER->>CubicalPersistence: fit_transform(X)
+# CubicalPersistence->>thread1: _tranform(X[0])
+# CubicalPersistence->>thread2: _tranform(X[1])
+# Note right of CubicalPersistence: ...
+# thread1->>CubicalPersistence: [array( H0(X[0]) ), array( H1(X[0]) )]
+# thread2->>CubicalPersistence: [array( H0(X[1]) ), array( H1(X[1]) )]
+# Note right of CubicalPersistence: ...
+# CubicalPersistence->>USER: [[array( H0(X[0]) ), array( H1(X[0]) )],<br/> [array( H0(X[1]) ), array( H1(X[1]) )],<br/> ...]
+
+
+class CubicalPersistence(BaseEstimator, TransformerMixin):
+ """
+ This is a class for computing the persistence diagrams from a cubical complex.
+ """
+
+ def __init__(
+ self,
+ homology_dimensions,
+ newshape=None,
+ homology_coeff_field=11,
+ min_persistence=0.0,
+ n_jobs=None,
+ ):
+ """
+ Constructor for the CubicalPersistence class.
+
+ Parameters:
+ homology_dimensions (int or list of int): The returned persistence diagrams dimension(s).
+ Short circuit the use of :class:`~gudhi.representations.preprocessing.DimensionSelector` when only one
+ dimension matters (in other words, when `homology_dimensions` is an int).
+ newshape (tuple of ints): If cells filtration values require to be reshaped
+ (cf. :func:`~gudhi.sklearn.cubical_persistence.CubicalPersistence.transform`), set `newshape`
+ to perform `numpy.reshape(X, newshape, order='C')` in
+ :func:`~gudhi.sklearn.cubical_persistence.CubicalPersistence.transform` method.
+ homology_coeff_field (int): The homology coefficient field. Must be a prime number. Default value is 11.
+ min_persistence (float): The minimum persistence value to take into account (strictly greater than
+ `min_persistence`). Default value is `0.0`. Set `min_persistence` to `-1.0` to see all values.
+ n_jobs (int): cf. https://joblib.readthedocs.io/en/latest/generated/joblib.Parallel.html
+ """
+ self.homology_dimensions = homology_dimensions
+ self.newshape = newshape
+ self.homology_coeff_field = homology_coeff_field
+ self.min_persistence = min_persistence
+ self.n_jobs = n_jobs
+
+ def fit(self, X, Y=None):
+ """
+ Nothing to be done, but useful when included in a scikit-learn Pipeline.
+ """
+ return self
+
+ def __transform(self, cells):
+ cubical_complex = CubicalComplex(top_dimensional_cells=cells)
+ cubical_complex.compute_persistence(
+ homology_coeff_field=self.homology_coeff_field, min_persistence=self.min_persistence
+ )
+ return [
+ cubical_complex.persistence_intervals_in_dimension(dim) for dim in self.homology_dimensions
+ ]
+
+ def __transform_only_this_dim(self, cells):
+ cubical_complex = CubicalComplex(top_dimensional_cells=cells)
+ cubical_complex.compute_persistence(
+ homology_coeff_field=self.homology_coeff_field, min_persistence=self.min_persistence
+ )
+ return cubical_complex.persistence_intervals_in_dimension(self.homology_dimensions)
+
+ def transform(self, X, Y=None):
+ """Compute all the cubical complexes and their associated persistence diagrams.
+
+ :param X: List of cells filtration values (`numpy.reshape(X, newshape, order='C'` if `newshape` is set with a tuple of ints).
+ :type X: list of list of float OR list of numpy.ndarray
+
+ :return: Persistence diagrams in the format:
+
+ - If `homology_dimensions` was set to `n`: `[array( Hn(X[0]) ), array( Hn(X[1]) ), ...]`
+ - If `homology_dimensions` was set to `[i, j]`: `[[array( Hi(X[0]) ), array( Hj(X[0]) )], [array( Hi(X[1]) ), array( Hj(X[1]) )], ...]`
+ :rtype: list of (,2) array_like or list of list of (,2) array_like
+ """
+ if self.newshape is not None:
+ X = np.reshape(X, self.newshape, order='C')
+
+ # Depends on homology_dimensions is an integer or a list of integer (else case)
+ if isinstance(self.homology_dimensions, int):
+ # threads is preferred as cubical construction and persistence computation releases the GIL
+ return Parallel(n_jobs=self.n_jobs, prefer="threads")(
+ delayed(self.__transform_only_this_dim)(cells) for cells in X
+ )
+ else:
+ # threads is preferred as cubical construction and persistence computation releases the GIL
+ return Parallel(n_jobs=self.n_jobs, prefer="threads")(delayed(self.__transform)(cells) for cells in X)
+
diff --git a/src/python/test/test_dtm.py b/src/python/test/test_dtm.py
index e46d616c..b276f041 100755
--- a/src/python/test/test_dtm.py
+++ b/src/python/test/test_dtm.py
@@ -91,11 +91,11 @@ def test_density():
def test_dtm_overflow_warnings():
pts = numpy.array([[10., 100000000000000000000000000000.], [1000., 100000000000000000000000000.]])
-
- with warnings.catch_warnings(record=True) as w:
- # TODO Test "keops" implementation as well when next version of pykeops (current is 1.5) is released (should fix the problem (cf. issue #543))
- dtm = DistanceToMeasure(2, implementation="hnsw")
- r = dtm.fit_transform(pts)
- assert len(w) == 1
- assert issubclass(w[0].category, RuntimeWarning)
- assert "Overflow" in str(w[0].message)
+ impl_warn = ["keops", "hnsw"]
+ for impl in impl_warn:
+ with warnings.catch_warnings(record=True) as w:
+ dtm = DistanceToMeasure(2, implementation=impl)
+ r = dtm.fit_transform(pts)
+ assert len(w) == 1
+ assert issubclass(w[0].category, RuntimeWarning)
+ assert "Overflow" in str(w[0].message)
diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py
index d219ce7a..4a455bb6 100755
--- a/src/python/test/test_representations.py
+++ b/src/python/test/test_representations.py
@@ -152,7 +152,26 @@ def test_vectorization_empty_diagrams():
scv = Entropy(mode="vector", normalized=False, resolution=random_resolution)(empty_diag)
assert not np.any(scv)
assert scv.shape[0] == random_resolution
-
+
+def test_entropy_miscalculation():
+ diag_ex = np.array([[0.0,1.0], [0.0,1.0], [0.0,2.0]])
+ def pe(pd):
+ l = pd[:,1] - pd[:,0]
+ l = l/sum(l)
+ return -np.dot(l, np.log(l))
+ sce = Entropy(mode="scalar")
+ assert [[pe(diag_ex)]] == sce.fit_transform([diag_ex])
+ sce = Entropy(mode="vector", resolution=4, normalized=False)
+ pef = [-1/4*np.log(1/4)-1/4*np.log(1/4)-1/2*np.log(1/2),
+ -1/4*np.log(1/4)-1/4*np.log(1/4)-1/2*np.log(1/2),
+ -1/2*np.log(1/2),
+ 0.0]
+ assert all(([pef] == sce.fit_transform([diag_ex]))[0])
+ sce = Entropy(mode="vector", resolution=4, normalized=True)
+ pefN = (sce.fit_transform([diag_ex]))[0]
+ area = np.linalg.norm(pefN, ord=1)
+ assert area==1
+
def test_kernel_empty_diagrams():
empty_diag = np.empty(shape = [0, 2])
assert SlicedWassersteinDistance(num_directions=100)(empty_diag, empty_diag) == 0.
diff --git a/src/python/test/test_representations_preprocessing.py b/src/python/test/test_representations_preprocessing.py
new file mode 100644
index 00000000..838cf30c
--- /dev/null
+++ b/src/python/test/test_representations_preprocessing.py
@@ -0,0 +1,39 @@
+""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+ See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+ Author(s): Vincent Rouvreau
+
+ Copyright (C) 2021 Inria
+
+ Modification(s):
+ - YYYY/MM Author: Description of the modification
+"""
+
+from gudhi.representations.preprocessing import DimensionSelector
+import numpy as np
+import pytest
+
+H0_0 = np.array([0.0, 0.0])
+H1_0 = np.array([1.0, 0.0])
+H0_1 = np.array([0.0, 1.0])
+H1_1 = np.array([1.0, 1.0])
+H0_2 = np.array([0.0, 2.0])
+H1_2 = np.array([1.0, 2.0])
+
+
+def test_dimension_selector():
+ X = [[H0_0, H1_0], [H0_1, H1_1], [H0_2, H1_2]]
+ ds = DimensionSelector(index=0)
+ h0 = ds.fit_transform(X)
+ np.testing.assert_array_equal(h0[0], H0_0)
+ np.testing.assert_array_equal(h0[1], H0_1)
+ np.testing.assert_array_equal(h0[2], H0_2)
+
+ ds = DimensionSelector(index=1)
+ h1 = ds.fit_transform(X)
+ np.testing.assert_array_equal(h1[0], H1_0)
+ np.testing.assert_array_equal(h1[1], H1_1)
+ np.testing.assert_array_equal(h1[2], H1_2)
+
+ ds = DimensionSelector(index=2)
+ with pytest.raises(IndexError):
+ h2 = ds.fit_transform([[H0_0, H1_0], [H0_1, H1_1], [H0_2, H1_2]])
diff --git a/src/python/test/test_sklearn_cubical_persistence.py b/src/python/test/test_sklearn_cubical_persistence.py
new file mode 100644
index 00000000..1c05a215
--- /dev/null
+++ b/src/python/test/test_sklearn_cubical_persistence.py
@@ -0,0 +1,59 @@
+""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+ See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+ Author(s): Vincent Rouvreau
+
+ Copyright (C) 2021 Inria
+
+ Modification(s):
+ - YYYY/MM Author: Description of the modification
+"""
+
+from gudhi.sklearn.cubical_persistence import CubicalPersistence
+import numpy as np
+from sklearn import datasets
+
+CUBICAL_PERSISTENCE_H0_IMG0 = np.array([[0.0, 6.0], [0.0, 8.0], [0.0, np.inf]])
+
+
+def test_simple_constructor_from_top_cells():
+ cells = datasets.load_digits().images[0]
+ cp = CubicalPersistence(homology_dimensions=0)
+ np.testing.assert_array_equal(cp._CubicalPersistence__transform_only_this_dim(cells), CUBICAL_PERSISTENCE_H0_IMG0)
+ cp = CubicalPersistence(homology_dimensions=[0, 2])
+ diags = cp._CubicalPersistence__transform(cells)
+ assert len(diags) == 2
+ np.testing.assert_array_equal(diags[0], CUBICAL_PERSISTENCE_H0_IMG0)
+
+
+def test_simple_constructor_from_top_cells_list():
+ digits = datasets.load_digits().images[:10]
+ cp = CubicalPersistence(homology_dimensions=0, n_jobs=-2)
+
+ diags = cp.fit_transform(digits)
+ assert len(diags) == 10
+ np.testing.assert_array_equal(diags[0], CUBICAL_PERSISTENCE_H0_IMG0)
+
+ cp = CubicalPersistence(homology_dimensions=[0, 1], n_jobs=-1)
+ diagsH0H1 = cp.fit_transform(digits)
+ assert len(diagsH0H1) == 10
+ for idx in range(10):
+ np.testing.assert_array_equal(diags[idx], diagsH0H1[idx][0])
+
+def test_simple_constructor_from_flattened_cells():
+ cells = datasets.load_digits().images[0]
+ # Not squared (extended) flatten cells
+ flat_cells = np.hstack((cells, np.zeros((cells.shape[0], 2)))).flatten()
+
+ cp = CubicalPersistence(homology_dimensions=0, newshape=[-1, 8, 10])
+ diags = cp.fit_transform([flat_cells])
+
+ np.testing.assert_array_equal(diags[0], CUBICAL_PERSISTENCE_H0_IMG0)
+
+ # Not squared (extended) non-flatten cells
+ cells = np.hstack((cells, np.zeros((cells.shape[0], 2))))
+
+ # The aim of this second part of the test is to resize even if not mandatory
+ cp = CubicalPersistence(homology_dimensions=0, newshape=[-1, 8, 10])
+ diags = cp.fit_transform([cells])
+
+ np.testing.assert_array_equal(diags[0], CUBICAL_PERSISTENCE_H0_IMG0)