summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Alpha_complex/include/gudhi/Alpha_complex_3d.h56
-rw-r--r--src/CMakeLists.txt1
-rw-r--r--src/Collapse/doc/dominated_edge.pngbin0 -> 349766 bytes
-rw-r--r--src/Collapse/doc/intro_edge_collapse.h101
-rw-r--r--src/Collapse/example/CMakeLists.txt23
-rw-r--r--src/Collapse/example/edge_collapse_basic_example.cpp36
-rw-r--r--src/Collapse/example/edge_collapse_conserve_persistence.cpp159
-rw-r--r--src/Collapse/example/edge_collapse_example_basic.txt5
-rw-r--r--src/Collapse/include/gudhi/Flag_complex_edge_collapser.h378
-rw-r--r--src/Collapse/test/CMakeLists.txt9
-rw-r--r--src/Collapse/test/collapse_unit_test.cpp198
-rw-r--r--src/Collapse/utilities/CMakeLists.txt33
-rw-r--r--src/Collapse/utilities/collapse.md63
-rw-r--r--src/Collapse/utilities/distance_matrix_edge_collapse_rips_persistence.cpp152
-rw-r--r--src/Collapse/utilities/point_cloud_edge_collapse_rips_persistence.cpp181
-rw-r--r--src/cmake/modules/GUDHI_third_party_libraries.cmake5
-rw-r--r--src/cmake/modules/GUDHI_user_version_target.cmake7
-rw-r--r--src/common/doc/main_page.md30
-rw-r--r--src/common/include/gudhi/graph_simplicial_complex.h5
-rw-r--r--src/python/CMakeLists.txt47
-rw-r--r--src/python/doc/alpha_complex_user.rst51
-rw-r--r--src/python/doc/bottleneck_distance_user.rst19
-rw-r--r--src/python/doc/clustering.inc12
-rw-r--r--src/python/doc/clustering.rst73
-rw-r--r--src/python/doc/img/spiral-color.pngbin0 -> 222425 bytes
-rw-r--r--src/python/doc/index.rst5
-rw-r--r--src/python/doc/installation.rst37
-rw-r--r--src/python/gudhi/alpha_complex.pyx29
-rw-r--r--src/python/gudhi/bottleneck.cc3
-rw-r--r--src/python/gudhi/clustering/__init__.py0
-rw-r--r--src/python/gudhi/clustering/_tomato.cc277
-rw-r--r--src/python/gudhi/clustering/tomato.py321
-rw-r--r--src/python/gudhi/hera/__init__.py7
-rw-r--r--src/python/gudhi/hera/bottleneck.cc54
-rw-r--r--src/python/gudhi/hera/wasserstein.cc (renamed from src/python/gudhi/hera.cc)2
-rw-r--r--src/python/gudhi/persistence_graphical_tools.py33
-rw-r--r--src/python/gudhi/representations/metrics.py17
-rw-r--r--src/python/gudhi/simplex_tree.pyx5
-rw-r--r--src/python/gudhi/wasserstein/wasserstein.py10
-rw-r--r--src/python/include/Alpha_complex_factory.h139
-rw-r--r--src/python/include/Alpha_complex_interface.h92
-rw-r--r--src/python/include/pybind11_diagram_utils.h8
-rw-r--r--src/python/introduction.rst24
-rw-r--r--src/python/setup.py.in27
-rwxr-xr-xsrc/python/test/test_alpha_complex.py83
-rwxr-xr-xsrc/python/test/test_bottleneck_distance.py6
-rwxr-xr-xsrc/python/test/test_tomato.py65
47 files changed, 2673 insertions, 215 deletions
diff --git a/src/Alpha_complex/include/gudhi/Alpha_complex_3d.h b/src/Alpha_complex/include/gudhi/Alpha_complex_3d.h
index c19ebb79..f56e12d0 100644
--- a/src/Alpha_complex/include/gudhi/Alpha_complex_3d.h
+++ b/src/Alpha_complex/include/gudhi/Alpha_complex_3d.h
@@ -14,6 +14,9 @@
#include <boost/version.hpp>
#include <boost/variant.hpp>
+#include <boost/range/size.hpp>
+#include <boost/range/combine.hpp>
+#include <boost/range/adaptor/transformed.hpp>
#include <gudhi/Debug_utils.h>
#include <gudhi/Alpha_complex_options.h>
@@ -277,8 +280,8 @@ Weighted_alpha_complex_3d::Weighted_point_3 wp0(Weighted_alpha_complex_3d::Bare_
Alpha_complex_3d(const InputPointRange& points) {
static_assert(!Periodic, "This constructor is not available for periodic versions of Alpha_complex_3d");
- alpha_shape_3_ptr_ = std::unique_ptr<Alpha_shape_3>(
- new Alpha_shape_3(std::begin(points), std::end(points), 0, Alpha_shape_3::GENERAL));
+ alpha_shape_3_ptr_ = std::make_unique<Alpha_shape_3>(
+ std::begin(points), std::end(points), 0, Alpha_shape_3::GENERAL);
}
/** \brief Alpha_complex constructor from a list of points and associated weights.
@@ -299,20 +302,15 @@ Weighted_alpha_complex_3d::Weighted_point_3 wp0(Weighted_alpha_complex_3d::Bare_
Alpha_complex_3d(const InputPointRange& points, WeightRange weights) {
static_assert(Weighted, "This constructor is not available for non-weighted versions of Alpha_complex_3d");
static_assert(!Periodic, "This constructor is not available for periodic versions of Alpha_complex_3d");
- GUDHI_CHECK((weights.size() == points.size()),
+ // FIXME: this test is only valid if we have a forward range
+ GUDHI_CHECK(boost::size(weights) == boost::size(points),
std::invalid_argument("Points number in range different from weights range number"));
- std::vector<Weighted_point_3> weighted_points_3;
+ auto weighted_points_3 = boost::range::combine(points, weights)
+ | boost::adaptors::transformed([](auto const&t){return Weighted_point_3(boost::get<0>(t), boost::get<1>(t));});
- std::size_t index = 0;
- weighted_points_3.reserve(points.size());
- while ((index < weights.size()) && (index < points.size())) {
- weighted_points_3.push_back(Weighted_point_3(points[index], weights[index]));
- index++;
- }
-
- alpha_shape_3_ptr_ = std::unique_ptr<Alpha_shape_3>(
- new Alpha_shape_3(std::begin(weighted_points_3), std::end(weighted_points_3), 0, Alpha_shape_3::GENERAL));
+ alpha_shape_3_ptr_ = std::make_unique<Alpha_shape_3>(
+ std::begin(weighted_points_3), std::end(weighted_points_3), 0, Alpha_shape_3::GENERAL);
}
/** \brief Alpha_complex constructor from a list of points and an iso-cuboid coordinates.
@@ -356,7 +354,7 @@ Weighted_alpha_complex_3d::Weighted_point_3 wp0(Weighted_alpha_complex_3d::Bare_
// alpha shape construction from points. CGAL has a strange behavior in REGULARIZED mode. This is the default mode
// Maybe need to set it to GENERAL mode
- alpha_shape_3_ptr_ = std::unique_ptr<Alpha_shape_3>(new Alpha_shape_3(pdt, 0, Alpha_shape_3::GENERAL));
+ alpha_shape_3_ptr_ = std::make_unique<Alpha_shape_3>(pdt, 0, Alpha_shape_3::GENERAL);
}
/** \brief Alpha_complex constructor from a list of points, associated weights and an iso-cuboid coordinates.
@@ -388,31 +386,27 @@ Weighted_alpha_complex_3d::Weighted_point_3 wp0(Weighted_alpha_complex_3d::Bare_
FT z_min, FT x_max, FT y_max, FT z_max) {
static_assert(Weighted, "This constructor is not available for non-weighted versions of Alpha_complex_3d");
static_assert(Periodic, "This constructor is not available for non-periodic versions of Alpha_complex_3d");
- GUDHI_CHECK((weights.size() == points.size()),
+ // FIXME: this test is only valid if we have a forward range
+ GUDHI_CHECK(boost::size(weights) == boost::size(points),
std::invalid_argument("Points number in range different from weights range number"));
// Checking if the cuboid is the same in x,y and z direction. If not, CGAL will not process it.
GUDHI_CHECK(
(x_max - x_min == y_max - y_min) && (x_max - x_min == z_max - z_min) && (z_max - z_min == y_max - y_min),
std::invalid_argument("The size of the cuboid in every directions is not the same."));
- std::vector<Weighted_point_3> weighted_points_3;
-
- std::size_t index = 0;
- weighted_points_3.reserve(points.size());
-
#ifdef GUDHI_DEBUG
// Defined in GUDHI_DEBUG to avoid unused variable warning for GUDHI_CHECK
FT maximal_possible_weight = 0.015625 * (x_max - x_min) * (x_max - x_min);
#endif
- while ((index < weights.size()) && (index < points.size())) {
- GUDHI_CHECK((weights[index] < maximal_possible_weight) && (weights[index] >= 0),
- std::invalid_argument("Invalid weight at index " + std::to_string(index + 1) +
- ". Must be positive and less than maximal possible weight = 1/64*cuboid length "
- "squared, which is not an acceptable input."));
- weighted_points_3.push_back(Weighted_point_3(points[index], weights[index]));
- index++;
- }
+ auto weighted_points_3 = boost::range::combine(points, weights)
+ | boost::adaptors::transformed([=](auto const&t){
+ auto w = boost::get<1>(t);
+ GUDHI_CHECK((w < maximal_possible_weight) && (w >= 0),
+ std::invalid_argument("Invalid weight " + std::to_string(w) +
+ ". Must be non-negative and less than maximal possible weight = 1/64*cuboid length squared."));
+ return Weighted_point_3(boost::get<0>(t), w);
+ });
// Define the periodic cube
Dt pdt(typename Kernel::Iso_cuboid_3(x_min, y_min, z_min, x_max, y_max, z_max));
@@ -426,7 +420,7 @@ Weighted_alpha_complex_3d::Weighted_point_3 wp0(Weighted_alpha_complex_3d::Bare_
// alpha shape construction from points. CGAL has a strange behavior in REGULARIZED mode. This is the default mode
// Maybe need to set it to GENERAL mode
- alpha_shape_3_ptr_ = std::unique_ptr<Alpha_shape_3>(new Alpha_shape_3(pdt, 0, Alpha_shape_3::GENERAL));
+ alpha_shape_3_ptr_ = std::make_unique<Alpha_shape_3>(pdt, 0, Alpha_shape_3::GENERAL);
}
/** \brief Inserts all Delaunay triangulation into the simplicial complex.
@@ -471,6 +465,10 @@ Weighted_alpha_complex_3d::Weighted_point_3 wp0(Weighted_alpha_complex_3d::Bare_
#ifdef DEBUG_TRACES
std::clog << "filtration_with_alpha_values returns : " << objects.size() << " objects" << std::endl;
#endif // DEBUG_TRACES
+ if (objects.size() == 0) {
+ std::cerr << "Alpha_complex_3d create_complex - no triangulation as points are on a 2d plane\n";
+ return false; // ----- >>
+ }
using Alpha_value_iterator = typename std::vector<FT>::const_iterator;
Alpha_value_iterator alpha_value_iterator = alpha_values.begin();
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 561aa049..9e4d78ac 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -26,6 +26,7 @@ add_gudhi_module(Bitmap_cubical_complex)
add_gudhi_module(Bottleneck_distance)
add_gudhi_module(Cech_complex)
add_gudhi_module(Contraction)
+add_gudhi_module(Collapse)
add_gudhi_module(Hasse_complex)
add_gudhi_module(Persistence_representations)
add_gudhi_module(Persistent_cohomology)
diff --git a/src/Collapse/doc/dominated_edge.png b/src/Collapse/doc/dominated_edge.png
new file mode 100644
index 00000000..5900a55a
--- /dev/null
+++ b/src/Collapse/doc/dominated_edge.png
Binary files differ
diff --git a/src/Collapse/doc/intro_edge_collapse.h b/src/Collapse/doc/intro_edge_collapse.h
new file mode 100644
index 00000000..81edd79f
--- /dev/null
+++ b/src/Collapse/doc/intro_edge_collapse.h
@@ -0,0 +1,101 @@
+/* This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+ * See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+ * Author(s): Siddharth Pritam
+ *
+ * Copyright (C) 2020 Inria
+ *
+ * Modification(s):
+ * - YYYY/MM Author: Description of the modification
+ */
+
+#ifndef DOC_EDGE_COLLAPSE_INTRO_EDGE_COLLAPSE_H_
+#define DOC_EDGE_COLLAPSE_INTRO_EDGE_COLLAPSE_H_
+
+namespace Gudhi {
+
+namespace collapse {
+
+/** \defgroup edge_collapse Edge collapse
+ *
+ * \author Siddharth Pritam
+ *
+ * @{
+ *
+ * This module implements edge collapse of a filtered flag complex, in particular it reduces a filtration of
+ * Vietoris-Rips complex from its graph to another smaller flag filtration with same persistence.
+ * Where a filtration is a sequence of simplicial (here Rips) complexes connected with inclusions.
+ *
+ * \section edge_collapse_definition Edge collapse definition
+ *
+ * An edge \f$e\f$ in a simplicial complex \f$K\f$ is called a <b>dominated edge</b> if the link of \f$e\f$ in
+ * \f$K\f$, \f$lk_K(e)\f$ is a simplicial cone, that is, there exists a vertex \f$v^{\prime} \notin e\f$ and a
+ * subcomplex \f$L\f$ in \f$K\f$, such that \f$lk_K(e) = v^{\prime}L\f$. We say that the vertex \f$v^{\prime}\f$ is
+ * {dominating} \f$e\f$ and \f$e\f$ is {dominated} by \f$v^{\prime}\f$.
+ * An <b> elementary egde collapse </b> is the removal of a dominated edge \f$e\f$ from \f$K\f$,
+ * which we denote with \f$K\f$ \f${\searrow\searrow}^1 \f$ \f$K\setminus e\f$.
+ * The symbol \f$\mathbf{K\setminus e}\f$ (deletion of \f$e\f$ from \f$K\f$) refers to the subcomplex of \f$K\f$ which
+ * has all simplices of \f$K\f$ except \f$e\f$ and the ones containing \f$e\f$.
+ * There is an <b>edge collapse</b> from a simplicial complex \f$K\f$ to its subcomplex \f$L\f$,
+ * if there exists a series of elementary edge collapses from \f$K\f$ to \f$L\f$, denoted as \f$K\f$
+ * \f${\searrow\searrow}\f$ \f$L\f$.
+ *
+ * An edge collapse is a homotopy preserving operation, and it can be further expressed as sequence of the classical
+ * elementary simple collapse.
+ * A complex without any dominated edge is called a \f$1\f$- minimal complex and the core \f$K^1\f$ of simplicial
+ * complex is a minimal complex such that \f$K\f$ \f${\searrow\searrow}\f$ \f$K^1\f$.
+ * Computation of a core (not unique) involves computation of dominated edges and the dominated edges can be easily
+ * characterized as follows:
+ *
+ * -- For general simplicial complex: An edge \f$e \in K\f$ is dominated by another vertex \f$v^{\prime} \in K\f$,
+ * <i>if and only if</i> all the maximal simplices of \f$K\f$ that contain \f$e\f$ also contain \f$v^{\prime}\f$
+ *
+ * -- For a flag complex: An edge \f$e \in K\f$ is dominated by another vertex \f$v^{\prime} \in K\f$, <i>if and only
+ * if</i> all the vertices in \f$K\f$ that has an edge with both vertices of \f$e\f$ also has an edge with
+ * \f$v^{\prime}\f$.
+ *
+ * The algorithm to compute the smaller induced filtration is described in Section 5 \cite edgecollapsesocg2020.
+ * Edge collapse can be successfully employed to reduce any given filtration of flag complexes to a smaller induced
+ * filtration which preserves the persistent homology of the original filtration and is a flag complex as well.
+ *
+ * The general idea is that we consider edges in the filtered graph and sort them according to their filtration value
+ * giving them a total order.
+ * Each edge gets a unique index denoted as \f$i\f$ in this order. To reduce the filtration, we move forward with
+ * increasing filtration value
+ * in the graph and check if the current edge \f$e_i\f$ is dominated in the current graph \f$G_i := \{e_1, .. e_i\} \f$
+ * or not.
+ * If the edge \f$e_i\f$ is dominated we remove it from the filtration and move forward to the next edge \f$e_{i+1}\f$.
+ * If \f$e_i\f$ is non-dominated then we keep it in the reduced filtration and then go backward in the current graph
+ * \f$G_i\f$ to look for new non-dominated edges that was dominated before but might become non-dominated at this
+ * point.
+ * If an edge \f$e_j, j < i \f$ during the backward search is found to be non-dominated, we include \f$e_j\f$ in to the
+ * reduced filtration and we set its new filtration value to be \f$i\f$ that is the index of \f$e_i\f$.
+ * The precise mechanism for this reduction has been described in Section 5 \cite edgecollapsesocg2020.
+ * Here we implement this mechanism for a filtration of Rips complex.
+ * After perfoming the reduction the filtration reduces to a flag-filtration with the same persistence as the original
+ * filtration.
+ *
+ * \subsection edgecollapseexample Basic edge collapse
+ *
+ * This example calls `Gudhi::collapse::flag_complex_collapse_edges()` from a proximity graph represented as a list of
+ * `Filtered_edge`.
+ * Then it collapses edges and displays a new list of `Filtered_edge` (with less edges)
+ * that will preserve the persistence homology computation.
+ *
+ * \include Collapse/edge_collapse_basic_example.cpp
+ *
+ * When launching the example:
+ *
+ * \code $> ./Edge_collapse_example_basic
+ * \endcode
+ *
+ * the program output is:
+ *
+ * \include Collapse/edge_collapse_example_basic.txt
+ */
+/** @} */ // end defgroup strong_collapse
+
+} // namespace collapse
+
+} // namespace Gudhi
+
+#endif // DOC_EDGE_COLLAPSE_INTRO_EDGE_COLLAPSE_H_
diff --git a/src/Collapse/example/CMakeLists.txt b/src/Collapse/example/CMakeLists.txt
new file mode 100644
index 00000000..ba0e75e3
--- /dev/null
+++ b/src/Collapse/example/CMakeLists.txt
@@ -0,0 +1,23 @@
+project(Edge_collapse_examples)
+
+# Point cloud
+add_executable ( Edge_collapse_example_basic edge_collapse_basic_example.cpp )
+
+if (TBB_FOUND)
+ target_link_libraries(Edge_collapse_example_basic ${TBB_LIBRARIES})
+endif()
+
+add_test(NAME Edge_collapse_example_basic COMMAND $<TARGET_FILE:Edge_collapse_example_basic>)
+
+# Point cloud
+add_executable ( Edge_collapse_conserve_persistence edge_collapse_conserve_persistence.cpp )
+
+if (TBB_FOUND)
+ target_link_libraries(Edge_collapse_conserve_persistence ${TBB_LIBRARIES})
+endif()
+
+add_test(NAME Edge_collapse_conserve_persistence_1 COMMAND $<TARGET_FILE:Edge_collapse_conserve_persistence>
+ "${CMAKE_SOURCE_DIR}/data/points/tore3D_300.off" "0.2")
+
+add_test(NAME Edge_collapse_conserve_persistence_2 COMMAND $<TARGET_FILE:Edge_collapse_conserve_persistence>
+ "${CMAKE_SOURCE_DIR}/data/points/tore3D_300.off" "1.8")
diff --git a/src/Collapse/example/edge_collapse_basic_example.cpp b/src/Collapse/example/edge_collapse_basic_example.cpp
new file mode 100644
index 00000000..1b3dc1b5
--- /dev/null
+++ b/src/Collapse/example/edge_collapse_basic_example.cpp
@@ -0,0 +1,36 @@
+#include <gudhi/Flag_complex_edge_collapser.h>
+
+#include <iostream>
+#include <vector>
+#include <tuple>
+
+int main() {
+ // Type definitions
+ using Filtration_value = float;
+ using Vertex_handle = short;
+ using Filtered_edge = std::tuple<Vertex_handle, Vertex_handle, Filtration_value>;
+ using Filtered_edge_list = std::vector<Filtered_edge>;
+
+ // 1 2
+ // o---o
+ // |\ /|
+ // | x |
+ // |/ \|
+ // o---o
+ // 0 3
+ Filtered_edge_list graph = {{0, 1, 1.},
+ {1, 2, 1.},
+ {2, 3, 1.},
+ {3, 0, 1.},
+ {0, 2, 2.},
+ {1, 3, 2.}};
+
+ auto remaining_edges = Gudhi::collapse::flag_complex_collapse_edges(graph);
+
+ for (auto filtered_edge_from_collapse : remaining_edges) {
+ std::cout << "fn[" << std::get<0>(filtered_edge_from_collapse) << ", " << std::get<1>(filtered_edge_from_collapse)
+ << "] = " << std::get<2>(filtered_edge_from_collapse) << std::endl;
+ }
+
+ return 0;
+}
diff --git a/src/Collapse/example/edge_collapse_conserve_persistence.cpp b/src/Collapse/example/edge_collapse_conserve_persistence.cpp
new file mode 100644
index 00000000..b2c55e7a
--- /dev/null
+++ b/src/Collapse/example/edge_collapse_conserve_persistence.cpp
@@ -0,0 +1,159 @@
+/* This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+ * See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+ * Author(s): Vincent Rouvreau
+ *
+ * Copyright (C) 2020 Inria
+ *
+ * Modification(s):
+ * - YYYY/MM Author: Description of the modification
+ */
+
+#include <gudhi/Flag_complex_edge_collapser.h>
+#include <gudhi/Simplex_tree.h>
+#include <gudhi/Persistent_cohomology.h>
+#include <gudhi/distance_functions.h>
+#include <gudhi/Points_off_io.h>
+#include <gudhi/graph_simplicial_complex.h>
+
+#include <boost/range/adaptor/transformed.hpp>
+
+#include<utility> // for std::pair
+#include<vector>
+#include<tuple>
+
+// Types definition
+
+using Simplex_tree = Gudhi::Simplex_tree<>;
+using Filtration_value = Simplex_tree::Filtration_value;
+using Vertex_handle = Simplex_tree::Vertex_handle;
+using Point = std::vector<Filtration_value>;
+using Vector_of_points = std::vector<Point>;
+
+using Proximity_graph = Gudhi::Proximity_graph<Simplex_tree>;
+
+using Field_Zp = Gudhi::persistent_cohomology::Field_Zp;
+using Persistent_cohomology = Gudhi::persistent_cohomology::Persistent_cohomology<Simplex_tree, Field_Zp>;
+
+using Persistence_interval = std::tuple<int, Filtration_value, Filtration_value>;
+/*
+ * Compare two intervals by dimension, then by length.
+ */
+struct cmp_intervals_by_length {
+ explicit cmp_intervals_by_length(Simplex_tree * sc)
+ : sc_(sc) { }
+
+ template<typename Persistent_interval>
+ bool operator()(const Persistent_interval & p1, const Persistent_interval & p2) {
+ return (sc_->filtration(get < 1 > (p1)) - sc_->filtration(get < 0 > (p1))
+ > sc_->filtration(get < 1 > (p2)) - sc_->filtration(get < 0 > (p2)));
+ }
+ Simplex_tree* sc_;
+};
+
+std::vector<Persistence_interval> get_persistence_intervals(Simplex_tree& st, int ambient_dim) {
+ std::vector<Persistence_interval> persistence_intervals;
+ st.expansion(ambient_dim);
+
+ // Sort the simplices in the order of the filtration
+ st.initialize_filtration();
+ // Compute the persistence diagram of the complex
+ Persistent_cohomology pcoh(st);
+ // initializes the coefficient field for homology - must be a prime number
+ int p = 11;
+ pcoh.init_coefficients(p);
+
+ // Default min_interval_length = 0.
+ pcoh.compute_persistent_cohomology();
+ // Custom sort and output persistence
+ cmp_intervals_by_length cmp(&st);
+ auto persistent_pairs = pcoh.get_persistent_pairs();
+ std::sort(std::begin(persistent_pairs), std::end(persistent_pairs), cmp);
+ for (auto pair : persistent_pairs) {
+ persistence_intervals.emplace_back(st.dimension(get<0>(pair)),
+ st.filtration(get<0>(pair)),
+ st.filtration(get<1>(pair)));
+ }
+ return persistence_intervals;
+}
+
+int main(int argc, char* argv[]) {
+ if (argc != 3) {
+ std::cerr << "This program requires an OFF file and minimal threshold value as parameter\n";
+ std::cerr << "For instance: ./Edge_collapse_conserve_persistence ../../data/points/tore3D_300.off 1.\n";
+ exit(-1); // ----- >>
+ }
+
+ std::string off_file_points {argv[1]};
+ double threshold {atof(argv[2])};
+
+ Gudhi::Points_off_reader<Point> off_reader(off_file_points);
+ if (!off_reader.is_valid()) {
+ std::cerr << "Unable to read file " << off_file_points << "\n";
+ exit(-1); // ----- >>
+ }
+
+ Vector_of_points point_vector = off_reader.get_point_cloud();
+ if (point_vector.size() <= 0) {
+ std::cerr << "Empty point cloud." << std::endl;
+ exit(-1); // ----- >>
+ }
+
+ Proximity_graph proximity_graph = Gudhi::compute_proximity_graph<Simplex_tree>(off_reader.get_point_cloud(),
+ threshold,
+ Gudhi::Euclidean_distance());
+
+ if (num_edges(proximity_graph) <= 0) {
+ std::cerr << "Total number of egdes are zero." << std::endl;
+ exit(-1);
+ }
+
+ int ambient_dim = point_vector[0].size();
+
+ // ***** Simplex tree from a flag complex built after collapse *****
+ auto remaining_edges = Gudhi::collapse::flag_complex_collapse_edges(
+ boost::adaptors::transform(edges(proximity_graph), [&](auto&&edge){
+ return std::make_tuple(static_cast<Vertex_handle>(source(edge, proximity_graph)),
+ static_cast<Vertex_handle>(target(edge, proximity_graph)),
+ get(Gudhi::edge_filtration_t(), proximity_graph, edge));
+ })
+ );
+
+ Simplex_tree stree_from_collapse;
+ for (Vertex_handle vertex = 0; static_cast<std::size_t>(vertex) < point_vector.size(); vertex++) {
+ // insert the vertex with a 0. filtration value just like a Rips
+ stree_from_collapse.insert_simplex({vertex}, 0.);
+ }
+ for (auto remaining_edge : remaining_edges) {
+ stree_from_collapse.insert_simplex({std::get<0>(remaining_edge), std::get<1>(remaining_edge)},
+ std::get<2>(remaining_edge));
+ }
+
+ std::vector<Persistence_interval> persistence_intervals_from_collapse = get_persistence_intervals(stree_from_collapse, ambient_dim);
+
+ // ***** Simplex tree from the complete flag complex *****
+ Simplex_tree stree_wo_collapse;
+ stree_wo_collapse.insert_graph(proximity_graph);
+
+ std::vector<Persistence_interval> persistence_intervals_wo_collapse = get_persistence_intervals(stree_wo_collapse, ambient_dim);
+
+ // ***** Comparison *****
+ if (persistence_intervals_wo_collapse.size() != persistence_intervals_from_collapse.size()) {
+ std::cerr << "Number of persistence pairs with collapse is " << persistence_intervals_from_collapse.size() << std::endl;
+ std::cerr << "Number of persistence pairs without collapse is " << persistence_intervals_wo_collapse.size() << std::endl;
+ exit(-1);
+ }
+
+ int return_value = 0;
+ auto ppwoc_ptr = persistence_intervals_wo_collapse.begin();
+ for (auto ppfc: persistence_intervals_from_collapse) {
+ if (ppfc != *ppwoc_ptr) {
+ return_value++;
+ std::cerr << "Without collapse: "
+ << std::get<0>(*ppwoc_ptr) << " " << std::get<1>(*ppwoc_ptr) << " " << std::get<2>(*ppwoc_ptr)
+ << " - With collapse: "
+ << std::get<0>(ppfc) << " " << std::get<1>(ppfc) << " " << std::get<2>(ppfc) << std::endl;
+ }
+ ppwoc_ptr++;
+ }
+ return return_value;
+}
diff --git a/src/Collapse/example/edge_collapse_example_basic.txt b/src/Collapse/example/edge_collapse_example_basic.txt
new file mode 100644
index 00000000..acecacaf
--- /dev/null
+++ b/src/Collapse/example/edge_collapse_example_basic.txt
@@ -0,0 +1,5 @@
+fn[0, 1] = 1
+fn[1, 2] = 1
+fn[2, 3] = 1
+fn[3, 0] = 1
+fn[0, 2] = 2
diff --git a/src/Collapse/include/gudhi/Flag_complex_edge_collapser.h b/src/Collapse/include/gudhi/Flag_complex_edge_collapser.h
new file mode 100644
index 00000000..b6b7f7c1
--- /dev/null
+++ b/src/Collapse/include/gudhi/Flag_complex_edge_collapser.h
@@ -0,0 +1,378 @@
+/* This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+ * See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+ * Author(s): Siddharth Pritam
+ *
+ * Copyright (C) 2020 Inria
+ *
+ * Modification(s):
+ * - 2020/03 Vincent Rouvreau: integration to the gudhi library
+ * - YYYY/MM Author: Description of the modification
+ */
+
+#ifndef FLAG_COMPLEX_EDGE_COLLAPSER_H_
+#define FLAG_COMPLEX_EDGE_COLLAPSER_H_
+
+#include <gudhi/Debug_utils.h>
+
+#include <boost/functional/hash.hpp>
+#include <boost/iterator/iterator_facade.hpp>
+
+#include <Eigen/Sparse>
+
+#ifdef GUDHI_USE_TBB
+#include <tbb/parallel_sort.h>
+#endif
+
+#include <iostream>
+#include <utility> // for std::pair
+#include <vector>
+#include <unordered_map>
+#include <unordered_set>
+#include <set>
+#include <tuple> // for std::tie
+#include <algorithm> // for std::includes
+#include <iterator> // for std::inserter
+#include <type_traits> // for std::decay
+
+namespace Gudhi {
+
+namespace collapse {
+
+/** \private
+ *
+ * \brief Flag complex sparse matrix data structure.
+ *
+ * \details
+ * This class stores a <a target="_blank" href="https://en.wikipedia.org/wiki/Clique_complex">Flag complex</a>
+ * in an <a target="_blank" href="https://eigen.tuxfamily.org/dox/group__TutorialSparse.html">Eigen sparse matrix</a>.
+ *
+ * \tparam Vertex type must be a signed integer type. It admits a total order <.
+ * \tparam Filtration type for the value of the filtration function. Must be comparable with <.
+ */
+template<typename Vertex, typename Filtration>
+class Flag_complex_edge_collapser {
+ public:
+ /** \brief Re-define Vertex as Vertex_handle type to ease the interface with `Gudhi::Proximity_graph`. */
+ using Vertex_handle = Vertex;
+ /** \brief Re-define Filtration as Filtration_value type to ease the interface with `Gudhi::Proximity_graph`. */
+ using Filtration_value = Filtration;
+
+ private:
+ // internal numbering of vertices and edges
+ using IVertex = std::size_t;
+ using Edge_index = std::size_t;
+ using IEdge = std::pair<IVertex, IVertex>;
+
+ // The sparse matrix data type
+ // (Eigen::SparseMatrix<Edge_index, Eigen::RowMajor> has slow insertions)
+ using Sparse_vector = Eigen::SparseVector<Edge_index>;
+ using Sparse_row_matrix = std::vector<Sparse_vector>;
+
+ // Range of neighbors of a vertex
+ template<bool closed>
+ struct Neighbours {
+ class iterator : public boost::iterator_facade<iterator,
+ IVertex, /* value_type */
+ std::input_iterator_tag, // or boost::single_pass_traversal_tag
+ IVertex /* reference */ >
+ {
+ public:
+ iterator():ptr(nullptr){}
+ iterator(Neighbours const*p):ptr(p){find_valid();}
+ private:
+ friend class boost::iterator_core_access;
+ Neighbours const*ptr;
+ void increment(){
+ ++ptr->it;
+ find_valid();
+ }
+ void find_valid(){
+ auto& it = ptr->it;
+ do {
+ if(!it) { ptr=nullptr; break; }
+ if(IVertex(it.index()) == ptr->u) {
+ if(closed) break;
+ else continue;
+ }
+ Edge_index e = it.value();
+ if(e <= ptr->ec->current_backward || ptr->ec->critical_edge_indicator_[e]) break;
+ } while(++it, true);
+ }
+ bool equal(iterator const& other) const { return ptr == other.ptr; }
+ IVertex dereference() const { return ptr->it.index(); }
+ };
+ typedef iterator const_iterator;
+ mutable typename Sparse_vector::InnerIterator it;
+ Flag_complex_edge_collapser const*ec;
+ IVertex u;
+ iterator begin() const { return this; }
+ iterator end() const { return {}; }
+ explicit Neighbours(Flag_complex_edge_collapser const*p,IVertex u):it(p->sparse_row_adjacency_matrix_[u]),ec(p),u(u){}
+ };
+
+ // A range of row indices
+ using IVertex_vector = std::vector<IVertex>;
+
+ public:
+ /** \brief Filtered_edge is a type to store an edge with its filtration value. */
+ using Filtered_edge = std::tuple<Vertex_handle, Vertex_handle, Filtration_value>;
+
+ private:
+ // Map from row index to its vertex handle
+ std::vector<Vertex_handle> row_to_vertex_;
+
+ // Index of the current edge in the backwards walk. Edges <= current_backward are part of the temporary graph,
+ // while edges > current_backward are removed unless critical_edge_indicator_.
+ Edge_index current_backward = -1;
+
+ // Map from IEdge to its index
+ std::unordered_map<IEdge, Edge_index, boost::hash<IEdge>> iedge_to_index_map_;
+
+ // Boolean vector to indicate if the edge is critical.
+ std::vector<bool> critical_edge_indicator_;
+
+ // Map from vertex handle to its row index
+ std::unordered_map<Vertex_handle, IVertex> vertex_to_row_;
+
+ // Stores the Sparse matrix of Filtration values representing the original graph.
+ // The matrix rows and columns are indexed by IVertex.
+ Sparse_row_matrix sparse_row_adjacency_matrix_;
+
+ // The input, a vector of filtered edges.
+ std::vector<Filtered_edge> f_edge_vector_;
+
+ // Edge is the actual edge (u,v), with Vertex_handle u and v, not IVertex.
+ bool edge_is_dominated(Vertex_handle u, Vertex_handle v) const
+ {
+ const IVertex rw_u = vertex_to_row_.at(u);
+ const IVertex rw_v = vertex_to_row_.at(v);
+#ifdef DEBUG_TRACES
+ std::cout << "The edge {" << u << ", " << v << "} is going for domination check." << std::endl;
+#endif // DEBUG_TRACES
+ auto common_neighbours = open_common_neighbours_row_index(rw_u, rw_v);
+#ifdef DEBUG_TRACES
+ std::cout << "And its common neighbours are." << std::endl;
+ for (auto neighbour : common_neighbours) {
+ std::cout << row_to_vertex_[neighbour] << ", " ;
+ }
+ std::cout<< std::endl;
+#endif // DEBUG_TRACES
+ if (common_neighbours.size() == 1)
+ return true;
+ else
+ for (auto rw_c : common_neighbours) {
+ auto neighbours_c = neighbours_row_index<true>(rw_c);
+ // If neighbours_c contains the common neighbours.
+ if (std::includes(neighbours_c.begin(), neighbours_c.end(),
+ common_neighbours.begin(), common_neighbours.end()))
+ return true;
+ }
+ return false;
+ }
+
+ // Returns the edges connecting u and v (extremities of crit) to their common neighbors (not themselves)
+ std::set<Edge_index> three_clique_indices(Edge_index crit) {
+ std::set<Edge_index> edge_indices;
+
+ Vertex_handle u = std::get<0>(f_edge_vector_[crit]);
+ Vertex_handle v = std::get<1>(f_edge_vector_[crit]);
+
+#ifdef DEBUG_TRACES
+ std::cout << "The current critical edge to re-check criticality with filt value is : f {" << u << "," << v
+ << "} = " << std::get<2>(f_edge_vector_[crit]) << std::endl;
+#endif // DEBUG_TRACES
+ auto rw_u = vertex_to_row_[u];
+ auto rw_v = vertex_to_row_[v];
+
+ IVertex_vector common_neighbours = open_common_neighbours_row_index(rw_u, rw_v);
+
+ for (auto rw_c : common_neighbours) {
+ IEdge e_with_new_nbhr_v = std::minmax(rw_u, rw_c);
+ IEdge e_with_new_nbhr_u = std::minmax(rw_v, rw_c);
+ edge_indices.emplace(iedge_to_index_map_[e_with_new_nbhr_v]);
+ edge_indices.emplace(iedge_to_index_map_[e_with_new_nbhr_u]);
+ }
+ return edge_indices;
+ }
+
+ // Detect and set all edges that are becoming critical
+ template<typename FilteredEdgeOutput>
+ void set_edge_critical(Edge_index indx, Filtration_value filt, FilteredEdgeOutput filtered_edge_output) {
+#ifdef DEBUG_TRACES
+ std::cout << "The curent index with filtration value " << indx << ", " << filt << " is primary critical" <<
+ std::endl;
+#endif // DEBUG_TRACES
+ std::set<Edge_index> effected_indices = three_clique_indices(indx);
+ // Cannot use boost::adaptors::reverse in such dynamic cases apparently
+ for (auto it = effected_indices.rbegin(); it != effected_indices.rend(); ++it) {
+ current_backward = *it;
+ Vertex_handle u = std::get<0>(f_edge_vector_[current_backward]);
+ Vertex_handle v = std::get<1>(f_edge_vector_[current_backward]);
+ // If current_backward is not critical so it should be processed, otherwise it stays in the graph
+ if (!critical_edge_indicator_[current_backward]) {
+ if (!edge_is_dominated(u, v)) {
+#ifdef DEBUG_TRACES
+ std::cout << "The curent index became critical " << current_backward << std::endl;
+#endif // DEBUG_TRACES
+ critical_edge_indicator_[current_backward] = true;
+ filtered_edge_output(u, v, filt);
+ std::set<Edge_index> inner_effected_indcs = three_clique_indices(current_backward);
+ for (auto inr_idx : inner_effected_indcs) {
+ if(inr_idx < current_backward) // && !critical_edge_indicator_[inr_idx]
+ effected_indices.emplace(inr_idx);
+ }
+#ifdef DEBUG_TRACES
+ std::cout << "The following edge is critical with filt value: {" << u << "," << v << "}; "
+ << filt << std::endl;
+#endif // DEBUG_TRACES
+ }
+ }
+ }
+ // Clear the implicit "removed from graph" data structure
+ current_backward = -1;
+ }
+
+ // Returns list of neighbors of a particular vertex.
+ template<bool closed>
+ auto neighbours_row_index(IVertex rw_u) const
+ {
+ return Neighbours<closed>(this, rw_u);
+ }
+
+ // Returns the list of open neighbours of the edge :{u,v}.
+ IVertex_vector open_common_neighbours_row_index(IVertex rw_u, IVertex rw_v) const
+ {
+ auto non_zero_indices_u = neighbours_row_index<false>(rw_u);
+ auto non_zero_indices_v = neighbours_row_index<false>(rw_v);
+ IVertex_vector common;
+ std::set_intersection(non_zero_indices_u.begin(), non_zero_indices_u.end(), non_zero_indices_v.begin(),
+ non_zero_indices_v.end(), std::back_inserter(common));
+
+ return common;
+ }
+
+ // Insert a vertex in the data structure
+ IVertex insert_vertex(Vertex_handle vertex) {
+ auto n = row_to_vertex_.size();
+ auto result = vertex_to_row_.emplace(vertex, n);
+ // If it was not already inserted - Value won't be updated by emplace if it is already present
+ if (result.second) {
+ // Expand the matrix. The size of rows is irrelevant.
+ sparse_row_adjacency_matrix_.emplace_back((std::numeric_limits<Eigen::Index>::max)());
+ // Initializing the diagonal element of the adjency matrix corresponding to rw_b.
+ sparse_row_adjacency_matrix_[n].insert(n) = -1; // not an edge
+ // Must be done after reading its size()
+ row_to_vertex_.push_back(vertex);
+ }
+ return result.first->second;
+ }
+
+ // Insert an edge in the data structure
+ // @exception std::invalid_argument In debug mode, if u == v
+ IEdge insert_new_edge(Vertex_handle u, Vertex_handle v, Edge_index idx)
+ {
+ GUDHI_CHECK((u != v), std::invalid_argument("Flag_complex_edge_collapser::insert_new_edge with u == v"));
+ // The edge must not be added before, it should be a new edge.
+ IVertex rw_u = insert_vertex(u);
+ IVertex rw_v = insert_vertex(v);
+#ifdef DEBUG_TRACES
+ std::cout << "Inserting the edge " << u <<", " << v << std::endl;
+#endif // DEBUG_TRACES
+ sparse_row_adjacency_matrix_[rw_u].insert(rw_v) = idx;
+ sparse_row_adjacency_matrix_[rw_v].insert(rw_u) = idx;
+ return std::minmax(rw_u, rw_v);
+ }
+
+ public:
+ /** \brief Flag_complex_edge_collapser constructor from a range of filtered edges.
+ *
+ * @param[in] edges Range of Filtered edges range.There is no need the range to be sorted, as it will be performed in
+ * `Flag_complex_edge_collapser::process_edges`.
+ *
+ * \tparam FilteredEdgeRange must be a range for which std::begin and std::end return iterators on a
+ * `Flag_complex_edge_collapser::Filtered_edge`.
+ */
+ template<typename FilteredEdgeRange>
+ Flag_complex_edge_collapser(const FilteredEdgeRange& edges)
+ : f_edge_vector_(std::begin(edges), std::end(edges)) { }
+
+ /** \brief Performs edge collapse in a increasing sequence of the filtration value.
+ *
+ * \tparam filtered_edge_output is a functor that is called on the output edges, in non-decreasing order of
+ * filtration, as filtered_edge_output(u, v, f) where u and v are Vertex_handle representing the extremities of the
+ * edge, and f is its new Filtration_value.
+ */
+ template<typename FilteredEdgeOutput>
+ void process_edges(FilteredEdgeOutput filtered_edge_output) {
+ // Sort edges
+ auto sort_by_filtration = [](const Filtered_edge& edge_a, const Filtered_edge& edge_b) -> bool
+ {
+ return (std::get<2>(edge_a) < std::get<2>(edge_b));
+ };
+
+#ifdef GUDHI_USE_TBB
+ tbb::parallel_sort(f_edge_vector_.begin(), f_edge_vector_.end(), sort_by_filtration);
+#else
+ std::sort(f_edge_vector_.begin(), f_edge_vector_.end(), sort_by_filtration);
+#endif
+
+ for (Edge_index endIdx = 0; endIdx < f_edge_vector_.size(); endIdx++) {
+ Filtered_edge fec = f_edge_vector_[endIdx];
+ Vertex_handle u = std::get<0>(fec);
+ Vertex_handle v = std::get<1>(fec);
+ Filtration_value filt = std::get<2>(fec);
+
+ // Inserts the edge in the sparse matrix to update the graph (G_i)
+ IEdge ie = insert_new_edge(u, v, endIdx);
+
+ iedge_to_index_map_.emplace(ie, endIdx);
+ critical_edge_indicator_.push_back(false);
+
+ if (!edge_is_dominated(u, v)) {
+ critical_edge_indicator_[endIdx] = true;
+ filtered_edge_output(u, v, filt);
+ if (endIdx > 1)
+ set_edge_critical(endIdx, filt, filtered_edge_output);
+ }
+ }
+ }
+
+};
+
+/** \brief Implicitly constructs a flag complex from edges as an input, collapses edges while preserving the persistent
+ * homology and returns the remaining edges as a range.
+ *
+ * \param[in] edges Range of Filtered edges.There is no need the range to be sorted, as it will be performed.
+ *
+ * \tparam FilteredEdgeRange furnishes `std::begin` and `std::end` methods and returns an iterator on a
+ * FilteredEdge of type `std::tuple<Vertex_handle, Vertex_handle, Filtration_value>` where `Vertex_handle` is the type
+ * of a vertex index and `Filtration_value` is the type of an edge filtration value.
+ *
+ * \return Remaining edges after collapse as a range of
+ * `std::tuple<Vertex_handle, Vertex_handle, Filtration_value>`.
+ *
+ * \ingroup edge_collapse
+ *
+ */
+template<class FilteredEdgeRange> auto flag_complex_collapse_edges(const FilteredEdgeRange& edges) {
+ auto first_edge_itr = std::begin(edges);
+ using Vertex_handle = std::decay_t<decltype(std::get<0>(*first_edge_itr))>;
+ using Filtration_value = std::decay_t<decltype(std::get<2>(*first_edge_itr))>;
+ using Edge_collapser = Flag_complex_edge_collapser<Vertex_handle, Filtration_value>;
+ std::vector<typename Edge_collapser::Filtered_edge> remaining_edges;
+ if (first_edge_itr != std::end(edges)) {
+ Edge_collapser edge_collapser(edges);
+ edge_collapser.process_edges(
+ [&remaining_edges](Vertex_handle u, Vertex_handle v, Filtration_value filtration) {
+ // insert the edge
+ remaining_edges.emplace_back(u, v, filtration);
+ });
+ }
+ return remaining_edges;
+}
+
+} // namespace collapse
+
+} // namespace Gudhi
+
+#endif // FLAG_COMPLEX_EDGE_COLLAPSER_H_
diff --git a/src/Collapse/test/CMakeLists.txt b/src/Collapse/test/CMakeLists.txt
new file mode 100644
index 00000000..c7eafb46
--- /dev/null
+++ b/src/Collapse/test/CMakeLists.txt
@@ -0,0 +1,9 @@
+project(Collapse_tests)
+
+include(GUDHI_boost_test)
+
+add_executable ( Collapse_test_unit collapse_unit_test.cpp )
+if (TBB_FOUND)
+ target_link_libraries(Collapse_test_unit ${TBB_LIBRARIES})
+endif()
+gudhi_add_boost_test(Collapse_test_unit)
diff --git a/src/Collapse/test/collapse_unit_test.cpp b/src/Collapse/test/collapse_unit_test.cpp
new file mode 100644
index 00000000..b8876246
--- /dev/null
+++ b/src/Collapse/test/collapse_unit_test.cpp
@@ -0,0 +1,198 @@
+/* This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+ * See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+ * Author(s): Vincent Rouvreau
+ *
+ * Copyright (C) 2020 Inria
+ *
+ * Modification(s):
+ * - YYYY/MM Author: Description of the modification
+ */
+
+
+#define BOOST_TEST_DYN_LINK
+#define BOOST_TEST_MODULE "collapse"
+#include <boost/test/unit_test.hpp>
+#include <boost/mpl/list.hpp>
+#include <boost/range/adaptor/transformed.hpp>
+
+#include <gudhi/Flag_complex_edge_collapser.h>
+#include <gudhi/distance_functions.h>
+#include <gudhi/graph_simplicial_complex.h>
+
+#include <iostream>
+#include <tuple>
+#include <vector>
+#include <array>
+#include <cmath>
+
+struct Simplicial_complex {
+ using Vertex_handle = short;
+ using Filtration_value = float;
+};
+
+using Vertex_handle = Simplicial_complex::Vertex_handle;
+using Filtration_value = Simplicial_complex::Filtration_value;
+using Filtered_edge = std::tuple<Vertex_handle, Vertex_handle, Filtration_value>;
+using Filtered_edge_list = std::vector<Filtered_edge>;
+
+template<typename Filtered_edge_range>
+bool find_edge_in_list(const Filtered_edge& edge, const Filtered_edge_range& edge_list) {
+ for (auto edge_from_list : edge_list) {
+ if (edge_from_list == edge)
+ return true;
+ }
+ return false;
+}
+
+template<typename Filtered_edge_range>
+void trace_and_check_collapse(const Filtered_edge_range& filtered_edges, const Filtered_edge_list& removed_edges) {
+ std::cout << "BEFORE COLLAPSE - Total number of edges: " << filtered_edges.size() << std::endl;
+ BOOST_CHECK(filtered_edges.size() > 0);
+ for (auto filtered_edge : filtered_edges) {
+ std::cout << "f[" << std::get<0>(filtered_edge) << ", " << std::get<1>(filtered_edge) << "] = "
+ << std::get<2>(filtered_edge) << std::endl;
+ }
+
+ std::cout << "COLLAPSE - keep edges: " << std::endl;
+ auto remaining_edges = Gudhi::collapse::flag_complex_collapse_edges(filtered_edges);
+
+ std::cout << "AFTER COLLAPSE - Total number of edges: " << remaining_edges.size() << std::endl;
+ BOOST_CHECK(remaining_edges.size() <= filtered_edges.size());
+ for (auto filtered_edge_from_collapse : remaining_edges) {
+ std::cout << "f[" << std::get<0>(filtered_edge_from_collapse) << ", " << std::get<1>(filtered_edge_from_collapse)
+ << "] = " << std::get<2>(filtered_edge_from_collapse) << std::endl;
+ // Check each edge from collapse is in the input
+ BOOST_CHECK(find_edge_in_list(filtered_edge_from_collapse, filtered_edges));
+ }
+
+ std::cout << "CHECK COLLAPSE - Total number of removed edges: " << removed_edges.size() << std::endl;
+ for (auto removed_filtered_edge : removed_edges) {
+ std::cout << "f[" << std::get<0>(removed_filtered_edge) << ", " << std::get<1>(removed_filtered_edge) << "] = "
+ << std::get<2>(removed_filtered_edge) << std::endl;
+ // Check each removed edge from collapse is in the input
+ BOOST_CHECK(!find_edge_in_list(removed_filtered_edge, remaining_edges));
+ }
+
+}
+
+BOOST_AUTO_TEST_CASE(collapse) {
+ std::cout << "***** COLLAPSE *****" << std::endl;
+ // 1 2
+ // o---o
+ // | |
+ // | |
+ // | |
+ // o---o
+ // 0 3
+ Filtered_edge_list edges {{0, 1, 1.},
+ {1, 2, 1.},
+ {2, 3, 1.},
+ {3, 0, 1.}};
+ trace_and_check_collapse(edges, {});
+
+ // 1 2
+ // o---o
+ // |\ /|
+ // | x |
+ // |/ \|
+ // o---o
+ // 0 3
+ edges.emplace_back(0, 2, 2.);
+ edges.emplace_back(1, 3, 2.);
+ trace_and_check_collapse(edges, {{1, 3, 2.}});
+
+ // 1 2 4
+ // o---o---o
+ // |\ /| |
+ // | x | |
+ // |/ \| |
+ // o---o---o
+ // 0 3 5
+ edges.emplace_back(2, 4, 3.);
+ edges.emplace_back(4, 5, 3.);
+ edges.emplace_back(5, 3, 3.);
+ trace_and_check_collapse(edges, {{1, 3, 2.}});
+
+ // 1 2 4
+ // o---o---o
+ // |\ /|\ /|
+ // | x | x |
+ // |/ \|/ \|
+ // o---o---o
+ // 0 3 5
+ edges.emplace_back(2, 5, 4.);
+ edges.emplace_back(4, 3, 4.);
+ trace_and_check_collapse(edges, {{1, 3, 2.}, {4, 3, 4.}});
+
+ // 1 2 4
+ // o---o---o
+ // |\ /|\ /|
+ // | x | x | + [0,4] and [1,5]
+ // |/ \|/ \|
+ // o---o---o
+ // 0 3 5
+ edges.emplace_back(1, 5, 5.);
+ edges.emplace_back(0, 4, 5.);
+ trace_and_check_collapse(edges, {{1, 3, 2.}, {4, 3, 4.}, {0, 4, 5.}});
+}
+
+BOOST_AUTO_TEST_CASE(collapse_from_array) {
+ std::cout << "***** COLLAPSE FROM ARRAY *****" << std::endl;
+ // 1 2
+ // o---o
+ // |\ /|
+ // | x |
+ // |/ \|
+ // o---o
+ // 0 3
+ std::array<Filtered_edge, 6> f_edge_array = {{{0, 1, 1.},
+ {1, 2, 1.},
+ {2, 3, 1.},
+ {3, 0, 1.},
+ {0, 2, 2.},
+ {1, 3, 2.}}};
+ trace_and_check_collapse(f_edge_array, {{1, 3, 2.}});
+}
+
+BOOST_AUTO_TEST_CASE(collapse_from_proximity_graph) {
+ std::cout << "***** COLLAPSE FROM PROXIMITY GRAPH *****" << std::endl;
+ // 1 2
+ // o---o
+ // |\ /|
+ // | x |
+ // |/ \|
+ // o---o
+ // 0 3
+ std::vector<std::vector<Filtration_value>> point_cloud = {{0., 0.},
+ {0., 1.},
+ {1., 0.},
+ {1., 1.} };
+
+ Filtration_value threshold = std::numeric_limits<Filtration_value>::infinity();
+ using Proximity_graph = Gudhi::Proximity_graph<Simplicial_complex>;
+ Proximity_graph proximity_graph = Gudhi::compute_proximity_graph<Simplicial_complex>(point_cloud,
+ threshold,
+ Gudhi::Euclidean_distance());
+
+ auto remaining_edges = Gudhi::collapse::flag_complex_collapse_edges(
+ boost::adaptors::transform(edges(proximity_graph), [&](auto&&edge){
+ return std::make_tuple(static_cast<Vertex_handle>(source(edge, proximity_graph)),
+ static_cast<Vertex_handle>(target(edge, proximity_graph)),
+ get(Gudhi::edge_filtration_t(), proximity_graph, edge));
+ })
+ );
+
+ BOOST_CHECK(remaining_edges.size() == 5);
+
+ std::size_t filtration_is_edge_length_nb = 0;
+ std::size_t filtration_is_diagonal_length_nb = 0;
+ float epsilon = std::numeric_limits<Filtration_value>::epsilon();
+ for (auto filtered_edge : remaining_edges) {
+ if (std::get<2>(filtered_edge) == 1.)
+ filtration_is_edge_length_nb++;
+ if (std::fabs(std::get<2>(filtered_edge) - std::sqrt(2.)) <= epsilon)
+ filtration_is_diagonal_length_nb++;
+ }
+ BOOST_CHECK(filtration_is_edge_length_nb == 4);
+ BOOST_CHECK(filtration_is_diagonal_length_nb == 1);
+}
diff --git a/src/Collapse/utilities/CMakeLists.txt b/src/Collapse/utilities/CMakeLists.txt
new file mode 100644
index 00000000..c742144b
--- /dev/null
+++ b/src/Collapse/utilities/CMakeLists.txt
@@ -0,0 +1,33 @@
+project(Collapse_utilities)
+
+# From a point cloud
+add_executable ( point_cloud_edge_collapse_rips_persistence point_cloud_edge_collapse_rips_persistence.cpp )
+target_link_libraries(point_cloud_edge_collapse_rips_persistence Boost::program_options)
+
+if (TBB_FOUND)
+ target_link_libraries(point_cloud_edge_collapse_rips_persistence ${TBB_LIBRARIES})
+endif()
+add_test(NAME Edge_collapse_utilities_point_cloud_rips_persistence COMMAND $<TARGET_FILE:point_cloud_edge_collapse_rips_persistence>
+ "${CMAKE_SOURCE_DIR}/data/points/tore3D_1307.off" "-r" "0.25" "-m" "0.5" "-d" "3" "-p" "3" "-o" "off_results.pers")
+
+install(TARGETS point_cloud_edge_collapse_rips_persistence DESTINATION bin)
+
+# From a distance matrix
+add_executable ( distance_matrix_edge_collapse_rips_persistence distance_matrix_edge_collapse_rips_persistence.cpp )
+target_link_libraries(distance_matrix_edge_collapse_rips_persistence Boost::program_options)
+
+if (TBB_FOUND)
+ target_link_libraries(distance_matrix_edge_collapse_rips_persistence ${TBB_LIBRARIES})
+endif()
+add_test(NAME Edge_collapse_utilities_distance_matrix_rips_persistence COMMAND $<TARGET_FILE:distance_matrix_edge_collapse_rips_persistence>
+ "${CMAKE_SOURCE_DIR}/data/distance_matrix/tore3D_1307_distance_matrix.csv" "-r" "0.25" "-m" "0.5" "-d" "3" "-p" "3" "-o" "csv_results.pers")
+
+install(TARGETS distance_matrix_edge_collapse_rips_persistence DESTINATION bin)
+
+if (DIFF_PATH)
+ add_test(Edge_collapse_utilities_diff_persistence ${DIFF_PATH}
+ "off_results.pers" "csv_results.pers")
+ set_tests_properties(Edge_collapse_utilities_diff_persistence PROPERTIES DEPENDS
+ "Edge_collapse_utilities_point_cloud_rips_persistence;Edge_collapse_utilities_distance_matrix_rips_persistence")
+
+endif()
diff --git a/src/Collapse/utilities/collapse.md b/src/Collapse/utilities/collapse.md
new file mode 100644
index 00000000..1f41bb1f
--- /dev/null
+++ b/src/Collapse/utilities/collapse.md
@@ -0,0 +1,63 @@
+---
+layout: page
+title: "Collapse"
+meta_title: "Edge collapse"
+teaser: ""
+permalink: /collapse/
+---
+{::comment}
+Leave the lines above as it is required by the web site generator 'Jekyll'
+{:/comment}
+
+
+## point_cloud_edge_collapse_rips_persistence ##
+This program computes the Rips graph defined on a set of input points, using Euclidean distance, and collapses edges.
+This program finally computes persistent homology with coefficient field *Z/pZ* of the Rips complex built on top of these collapse edges.
+The output diagram contains one bar per line, written with the convention:
+
+`p dim birth death`
+
+where `dim` is the dimension of the homological feature, `birth` and `death` are respectively the birth and death of the feature, and `p` is the characteristic of the field *Z/pZ* used for homology coefficients (`p` must be a prime number).
+
+**Usage**
+
+`point_cloud_edge_collapse_rips_persistence [options] <OFF input file>`
+
+**Allowed options**
+
+* `-h [ --help ]` Produce help message
+* `-o [ --output-file ]` Name of file in which the persistence diagram is written. Default print in standard output.
+* `-r [ --max-edge-length ]` (default = inf) Maximal length of an edge for the Rips complex construction.
+* `-d [ --cpx-dimension ]` (default = 1) Maximal dimension of the Rips complex we want to compute.
+* `-p [ --field-charac ]` (default = 11) Characteristic p of the coefficient field Z/pZ for computing homology.
+* `-m [ --min-persistence ]` (default = 0) Minimal lifetime of homology feature to be recorded. Enter a negative value to see zero length intervals.
+* `-i [ --edge-collapse-iterations ]` (default = 1) Number of iterations edge collapse is performed.
+
+Beware: this program may use a lot of RAM and take a lot of time if `max-edge-length` is set to a large value.
+
+**Example 1 with Z/2Z coefficients**
+
+`point_cloud_edge_collapse_rips_persistence ../../data/points/tore3D_1307.off -r 0.25 -m 0.5 -d 3 -p 2`
+
+**Example 2 with Z/3Z coefficients**
+
+`point_cloud_edge_collapse_rips_persistence ../../data/points/tore3D_1307.off -r 0.25 -m 0.5 -d 3 -p 3`
+
+
+## distance_matrix_edge_collapse_rips_persistence ##
+
+Same as `point_cloud_edge_collapse_rips_persistence` but taking a distance matrix as input.
+
+**Usage**
+
+`distance_matrix_edge_collapse_rips_persistence [options] <CSV input file>`
+
+where
+`<CSV input file>` is the path to the file containing a distance matrix. Can be square or lower triangular matrix. Separator is ';'.
+The code do not check if it is dealing with a distance matrix. It is the user responsibility to provide a valid input.
+Please refer to data/distance_matrix/lower_triangular_distance_matrix.csv for an example of a file.
+
+**Example**
+
+`distance_matrix_edge_collapse_rips_persistence data/distance_matrix/full_square_distance_matrix.csv -r 15 -d 3 -p 3 -m 0`
+
diff --git a/src/Collapse/utilities/distance_matrix_edge_collapse_rips_persistence.cpp b/src/Collapse/utilities/distance_matrix_edge_collapse_rips_persistence.cpp
new file mode 100644
index 00000000..11ee5871
--- /dev/null
+++ b/src/Collapse/utilities/distance_matrix_edge_collapse_rips_persistence.cpp
@@ -0,0 +1,152 @@
+/* This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+ * See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+ * Author(s): Siddharth Pritam, Vincent Rouvreau
+ *
+ * Copyright (C) 2020 Inria
+ *
+ * Modification(s):
+ * - YYYY/MM Author: Description of the modification
+ */
+
+#include <gudhi/Flag_complex_edge_collapser.h>
+#include <gudhi/Simplex_tree.h>
+#include <gudhi/Persistent_cohomology.h>
+#include <gudhi/reader_utils.h>
+#include <gudhi/graph_simplicial_complex.h>
+
+#include <boost/program_options.hpp>
+#include <boost/range/adaptor/transformed.hpp>
+
+using Simplex_tree = Gudhi::Simplex_tree<Gudhi::Simplex_tree_options_fast_persistence>;
+using Filtration_value = Simplex_tree::Filtration_value;
+using Vertex_handle = Simplex_tree::Vertex_handle;
+
+using Filtered_edge = std::tuple<Vertex_handle, Vertex_handle, Filtration_value>;
+using Proximity_graph = Gudhi::Proximity_graph<Simplex_tree>;
+
+using Field_Zp = Gudhi::persistent_cohomology::Field_Zp;
+using Persistent_cohomology = Gudhi::persistent_cohomology::Persistent_cohomology<Simplex_tree, Field_Zp>;
+using Distance_matrix = std::vector<std::vector<Filtration_value>>;
+
+void program_options(int argc, char* argv[], std::string& csv_matrix_file, std::string& filediag,
+ Filtration_value& threshold, int& dim_max, int& p, int& edge_collapse_iter_nb,
+ Filtration_value& min_persistence);
+
+int main(int argc, char* argv[]) {
+ std::string csv_matrix_file;
+ std::string filediag;
+ Filtration_value threshold;
+ int dim_max = 2;
+ int p;
+ int edge_collapse_iter_nb;
+ Filtration_value min_persistence;
+
+ program_options(argc, argv, csv_matrix_file, filediag, threshold, dim_max, p, edge_collapse_iter_nb,
+ min_persistence);
+
+ Distance_matrix distances = Gudhi::read_lower_triangular_matrix_from_csv_file<Filtration_value>(csv_matrix_file);
+ std::cout << "Read the distance matrix succesfully, of size: " << distances.size() << std::endl;
+
+ Proximity_graph proximity_graph = Gudhi::compute_proximity_graph<Simplex_tree>(boost::irange((size_t)0,
+ distances.size()),
+ threshold,
+ [&distances](size_t i, size_t j) {
+ return distances[j][i];
+ });
+
+ auto edges_from_graph = boost::adaptors::transform(edges(proximity_graph), [&](auto&&edge){
+ return std::make_tuple(source(edge, proximity_graph),
+ target(edge, proximity_graph),
+ get(Gudhi::edge_filtration_t(), proximity_graph, edge));
+ });
+ std::vector<Filtered_edge> edges_list(edges_from_graph.begin(), edges_from_graph.end());
+ std::vector<Filtered_edge> remaining_edges;
+ for (int iter = 0; iter < edge_collapse_iter_nb; iter++) {
+ auto remaining_edges = Gudhi::collapse::flag_complex_collapse_edges(edges_list);
+ edges_list = std::move(remaining_edges);
+ remaining_edges.clear();
+ }
+
+ Simplex_tree stree;
+ for (Vertex_handle vertex = 0; static_cast<std::size_t>(vertex) < distances.size(); vertex++) {
+ // insert the vertex with a 0. filtration value just like a Rips
+ stree.insert_simplex({vertex}, 0.);
+ }
+ for (auto filtered_edge : edges_list) {
+ stree.insert_simplex({std::get<0>(filtered_edge), std::get<1>(filtered_edge)}, std::get<2>(filtered_edge));
+ }
+
+ stree.expansion(dim_max);
+
+ std::cout << "The complex contains " << stree.num_simplices() << " simplices after collapse. \n";
+ std::cout << " and has dimension " << stree.dimension() << " \n";
+
+ // Sort the simplices in the order of the filtration
+ stree.initialize_filtration();
+ // Compute the persistence diagram of the complex
+ Persistent_cohomology pcoh(stree);
+ // initializes the coefficient field for homology
+ pcoh.init_coefficients(3);
+
+ pcoh.compute_persistent_cohomology(min_persistence);
+ if (filediag.empty()) {
+ pcoh.output_diagram();
+ } else {
+ std::ofstream out(filediag);
+ pcoh.output_diagram(out);
+ out.close();
+ }
+ return 0;
+}
+
+void program_options(int argc, char* argv[], std::string& csv_matrix_file, std::string& filediag,
+ Filtration_value& threshold, int& dim_max, int& p, int& edge_collapse_iter_nb,
+ Filtration_value& min_persistence) {
+ namespace po = boost::program_options;
+ po::options_description hidden("Hidden options");
+ hidden.add_options()(
+ "input-file", po::value<std::string>(&csv_matrix_file),
+ "Name of file containing a distance matrix. Can be square or lower triangular matrix. Separator is ';'.");
+
+ po::options_description visible("Allowed options", 100);
+ visible.add_options()("help,h", "produce help message")(
+ "output-file,o", po::value<std::string>(&filediag)->default_value(std::string()),
+ "Name of file in which the persistence diagram is written. Default print in std::cout")(
+ "max-edge-length,r",
+ po::value<Filtration_value>(&threshold)->default_value(std::numeric_limits<Filtration_value>::infinity()),
+ "Maximal length of an edge for the Rips complex construction.")(
+ "cpx-dimension,d", po::value<int>(&dim_max)->default_value(1),
+ "Maximal dimension of the Rips complex we want to compute.")(
+ "field-charac,p", po::value<int>(&p)->default_value(11),
+ "Characteristic p of the coefficient field Z/pZ for computing homology.")(
+ "edge-collapse-iterations,i", po::value<int>(&edge_collapse_iter_nb)->default_value(1),
+ "Number of iterations edge collapse is performed.")(
+ "min-persistence,m", po::value<Filtration_value>(&min_persistence),
+ "Minimal lifetime of homology feature to be recorded. Default is 0. Enter a negative value to see zero length "
+ "intervals");
+
+ po::positional_options_description pos;
+ pos.add("input-file", 1);
+
+ po::options_description all;
+ all.add(visible).add(hidden);
+
+ po::variables_map vm;
+ po::store(po::command_line_parser(argc, argv).options(all).positional(pos).run(), vm);
+ po::notify(vm);
+
+ if (vm.count("help") || !vm.count("input-file")) {
+ std::cout << std::endl;
+ std::cout << "Compute the persistent homology with coefficient field Z/pZ \n";
+ std::cout << "of a Rips complex after edge collapse defined on a set of distance matrix.\n \n";
+ std::cout << "The output diagram contains one bar per line, written with the convention: \n";
+ std::cout << " p dim b d \n";
+ std::cout << "where dim is the dimension of the homological feature,\n";
+ std::cout << "b and d are respectively the birth and death of the feature and \n";
+ std::cout << "p is the characteristic of the field Z/pZ used for homology coefficients." << std::endl << std::endl;
+
+ std::cout << "Usage: " << argv[0] << " [options] input-file" << std::endl << std::endl;
+ std::cout << visible << std::endl;
+ exit(-1);
+ }
+}
diff --git a/src/Collapse/utilities/point_cloud_edge_collapse_rips_persistence.cpp b/src/Collapse/utilities/point_cloud_edge_collapse_rips_persistence.cpp
new file mode 100644
index 00000000..0eea742c
--- /dev/null
+++ b/src/Collapse/utilities/point_cloud_edge_collapse_rips_persistence.cpp
@@ -0,0 +1,181 @@
+/* This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+ * See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+ * Author(s): Siddharth Pritam, Vincent Rouvreau
+ *
+ * Copyright (C) 2020 Inria
+ *
+ * Modification(s):
+ * - YYYY/MM Author: Description of the modification
+ */
+
+#include <gudhi/Flag_complex_edge_collapser.h>
+#include <gudhi/Simplex_tree.h>
+#include <gudhi/Persistent_cohomology.h>
+#include <gudhi/distance_functions.h>
+#include <gudhi/Points_off_io.h>
+#include <gudhi/graph_simplicial_complex.h>
+
+#include <boost/program_options.hpp>
+#include <boost/range/adaptor/transformed.hpp>
+
+#include<utility> // for std::pair
+#include<vector>
+#include<tuple>
+
+// Types definition
+
+using Simplex_tree = Gudhi::Simplex_tree<>;
+using Filtration_value = Simplex_tree::Filtration_value;
+using Vertex_handle = Simplex_tree::Vertex_handle;
+using Point = std::vector<Filtration_value>;
+using Vector_of_points = std::vector<Point>;
+
+using Filtered_edge = std::tuple<Vertex_handle, Vertex_handle, Filtration_value>;
+using Proximity_graph = Gudhi::Proximity_graph<Simplex_tree>;
+
+using Field_Zp = Gudhi::persistent_cohomology::Field_Zp;
+using Persistent_cohomology = Gudhi::persistent_cohomology::Persistent_cohomology<Simplex_tree, Field_Zp>;
+
+void program_options(int argc, char* argv[], std::string& off_file_points, std::string& filediag,
+ Filtration_value& threshold, int& dim_max, int& p, int& edge_collapse_iter_nb,
+ Filtration_value& min_persistence);
+
+int main(int argc, char* argv[]) {
+ std::string off_file_points;
+ std::string filediag;
+ double threshold;
+ int dim_max;
+ int p;
+ int edge_collapse_iter_nb;
+ double min_persistence;
+
+ program_options(argc, argv, off_file_points, filediag, threshold, dim_max, p, edge_collapse_iter_nb, min_persistence);
+
+ std::cout << "The current input values to run the program is: " << std::endl;
+ std::cout << "min_persistence, threshold, max_complex_dimension, off_file_points, filediag"
+ << std::endl;
+ std::cout << min_persistence << ", " << threshold << ", " << dim_max
+ << ", " << off_file_points << ", " << filediag << std::endl;
+
+ Gudhi::Points_off_reader<Point> off_reader(off_file_points);
+ if (!off_reader.is_valid()) {
+ std::cerr << "Unable to read file " << off_file_points << "\n";
+ exit(-1); // ----- >>
+ }
+
+ Vector_of_points point_vector = off_reader.get_point_cloud();
+ if (point_vector.size() <= 0) {
+ std::cerr << "Empty point cloud." << std::endl;
+ exit(-1); // ----- >>
+ }
+
+ std::cout << "Successfully read " << point_vector.size() << " point_vector.\n";
+ std::cout << "Ambient dimension is " << point_vector[0].size() << ".\n";
+
+ Proximity_graph proximity_graph = Gudhi::compute_proximity_graph<Simplex_tree>(point_vector,
+ threshold,
+ Gudhi::Euclidean_distance());
+
+ if (num_edges(proximity_graph) <= 0) {
+ std::cerr << "Total number of egdes are zero." << std::endl;
+ exit(-1);
+ }
+
+ auto edges_from_graph = boost::adaptors::transform(edges(proximity_graph), [&](auto&&edge){
+ return std::make_tuple(source(edge, proximity_graph),
+ target(edge, proximity_graph),
+ get(Gudhi::edge_filtration_t(), proximity_graph, edge));
+ });
+ std::vector<Filtered_edge> edges_list(edges_from_graph.begin(), edges_from_graph.end());
+
+ std::vector<Filtered_edge> remaining_edges;
+ for (int iter = 0; iter < edge_collapse_iter_nb; iter++) {
+ auto remaining_edges = Gudhi::collapse::flag_complex_collapse_edges(edges_list);
+ edges_list = std::move(remaining_edges);
+ remaining_edges.clear();
+ }
+
+ Simplex_tree stree;
+ for (Vertex_handle vertex = 0; static_cast<std::size_t>(vertex) < point_vector.size(); vertex++) {
+ // insert the vertex with a 0. filtration value just like a Rips
+ stree.insert_simplex({vertex}, 0.);
+ }
+
+ for (auto filtered_edge : edges_list) {
+ stree.insert_simplex({std::get<0>(filtered_edge), std::get<1>(filtered_edge)}, std::get<2>(filtered_edge));
+ }
+
+ stree.expansion(dim_max);
+
+ std::cout << "The complex contains " << stree.num_simplices() << " simplices after collapse. \n";
+ std::cout << " and has dimension " << stree.dimension() << " \n";
+
+ // Sort the simplices in the order of the filtration
+ stree.initialize_filtration();
+ // Compute the persistence diagram of the complex
+ Persistent_cohomology pcoh(stree);
+ // initializes the coefficient field for homology
+ pcoh.init_coefficients(p);
+
+ pcoh.compute_persistent_cohomology(min_persistence);
+ if (filediag.empty()) {
+ pcoh.output_diagram();
+ } else {
+ std::ofstream out(filediag);
+ pcoh.output_diagram(out);
+ out.close();
+ }
+
+ return 0;
+}
+
+void program_options(int argc, char* argv[], std::string& off_file_points, std::string& filediag,
+ Filtration_value& threshold, int& dim_max, int& p, int& edge_collapse_iter_nb,
+ Filtration_value& min_persistence) {
+ namespace po = boost::program_options;
+ po::options_description hidden("Hidden options");
+ hidden.add_options()("input-file", po::value<std::string>(&off_file_points),
+ "Name of an OFF file containing a point set.\n");
+
+ po::options_description visible("Allowed options", 100);
+ visible.add_options()("help,h", "produce help message")(
+ "output-file,o", po::value<std::string>(&filediag)->default_value(std::string()),
+ "Name of file in which the persistence diagram is written. Default print in std::cout")(
+ "max-edge-length,r",
+ po::value<Filtration_value>(&threshold)->default_value(std::numeric_limits<Filtration_value>::infinity()),
+ "Maximal length of an edge for the Rips complex construction.")(
+ "cpx-dimension,d", po::value<int>(&dim_max)->default_value(1),
+ "Maximal dimension of the Rips complex we want to compute.")(
+ "field-charac,p", po::value<int>(&p)->default_value(11),
+ "Characteristic p of the coefficient field Z/pZ for computing homology.")(
+ "edge-collapse-iterations,i", po::value<int>(&edge_collapse_iter_nb)->default_value(1),
+ "Number of iterations edge collapse is performed.")(
+ "min-persistence,m", po::value<Filtration_value>(&min_persistence),
+ "Minimal lifetime of homology feature to be recorded. Default is 0. Enter a negative value to see zero length "
+ "intervals");
+
+ po::positional_options_description pos;
+ pos.add("input-file", 1);
+
+ po::options_description all;
+ all.add(visible).add(hidden);
+
+ po::variables_map vm;
+ po::store(po::command_line_parser(argc, argv).options(all).positional(pos).run(), vm);
+ po::notify(vm);
+
+ if (vm.count("help") || !vm.count("input-file")) {
+ std::cout << std::endl;
+ std::cout << "Compute the persistent homology with coefficient field Z/pZ \n";
+ std::cout << "of a Rips complex, after edge collapse, defined on a set of input points.\n \n";
+ std::cout << "The output diagram contains one bar per line, written with the convention: \n";
+ std::cout << " p dim b d \n";
+ std::cout << "where dim is the dimension of the homological feature,\n";
+ std::cout << "b and d are respectively the birth and death of the feature and \n";
+ std::cout << "p is the characteristic of the field Z/pZ used for homology coefficients." << std::endl << std::endl;
+
+ std::cout << "Usage: " << argv[0] << " [options] input-file" << std::endl << std::endl;
+ std::cout << visible << std::endl;
+ exit(-1);
+ }
+}
diff --git a/src/cmake/modules/GUDHI_third_party_libraries.cmake b/src/cmake/modules/GUDHI_third_party_libraries.cmake
index f92fe93e..a56a2756 100644
--- a/src/cmake/modules/GUDHI_third_party_libraries.cmake
+++ b/src/cmake/modules/GUDHI_third_party_libraries.cmake
@@ -68,7 +68,10 @@ if(CGAL_FOUND)
endif()
# For those who dislike bundled dependencies, this indicates where to find a preinstalled Hera.
-set(HERA_WASSERSTEIN_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/ext/hera/wasserstein/include CACHE PATH "Directory where one can find Hera's wasserstein.h")
+set(HERA_WASSERSTEIN_INTERNAL_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/ext/hera/wasserstein/include)
+set(HERA_WASSERSTEIN_INCLUDE_DIR ${HERA_WASSERSTEIN_INTERNAL_INCLUDE_DIR} CACHE PATH "Directory where one can find Hera's wasserstein.h")
+set(HERA_BOTTLENECK_INTERNAL_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/ext/hera/bottleneck/include)
+set(HERA_BOTTLENECK_INCLUDE_DIR ${HERA_BOTTLENECK_INTERNAL_INCLUDE_DIR} CACHE PATH "Directory where one can find Hera's bottleneck.h")
option(WITH_GUDHI_USE_TBB "Build with Intel TBB parallelization" ON)
diff --git a/src/cmake/modules/GUDHI_user_version_target.cmake b/src/cmake/modules/GUDHI_user_version_target.cmake
index e99bb42d..e4f39aae 100644
--- a/src/cmake/modules/GUDHI_user_version_target.cmake
+++ b/src/cmake/modules/GUDHI_user_version_target.cmake
@@ -67,8 +67,11 @@ add_custom_command(TARGET user_version PRE_BUILD COMMAND ${CMAKE_COMMAND} -E
add_custom_command(TARGET user_version PRE_BUILD COMMAND ${CMAKE_COMMAND} -E
copy_directory ${CMAKE_SOURCE_DIR}/src/GudhUI ${GUDHI_USER_VERSION_DIR}/GudhUI)
-add_custom_command(TARGET user_version PRE_BUILD COMMAND ${CMAKE_COMMAND} -E
- copy_directory ${CMAKE_SOURCE_DIR}/ext/hera/wasserstein/include ${GUDHI_USER_VERSION_DIR}/ext/hera/wasserstein/include)
+if(HERA_WASSERSTEIN_INCLUDE_DIR STREQUAL HERA_WASSERSTEIN_INTERNAL_INCLUDE_DIR OR
+ HERA_BOTTLENECK_INCLUDE_DIR STREQUAL HERA_BOTTLENECK_INTERNAL_INCLUDE_DIR)
+ add_custom_command(TARGET user_version PRE_BUILD COMMAND ${CMAKE_COMMAND} -E
+ copy_directory ${CMAKE_SOURCE_DIR}/ext/hera ${GUDHI_USER_VERSION_DIR}/ext/hera)
+endif()
set(GUDHI_DIRECTORIES "doc;example;concept;utilities")
diff --git a/src/common/doc/main_page.md b/src/common/doc/main_page.md
index a33d98cd..e19af537 100644
--- a/src/common/doc/main_page.md
+++ b/src/common/doc/main_page.md
@@ -217,6 +217,36 @@
</tr>
</table>
+### Edge collapse
+
+<table>
+ <tr>
+ <td width="35%" rowspan=2>
+ \image html "dominated_edge.png"
+ </td>
+ <td width="50%">
+ Edge collapse is able to reduce any flag filtration to a smaller flag filtration with the same persistence, using
+ only the 1-skeletons of a simplicial complex.
+ The reduction is exact and the persistence homology of the reduced sequence is identical to the persistence
+ homology of the input sequence. The resulting method is simple and extremely efficient.
+
+ Computation of edge collapse and persistent homology of a filtered flag complex via edge collapse as described in
+ \cite edgecollapsesocg2020.
+ </td>
+ <td width="15%">
+ <b>Author:</b> Siddharth Pritam<br>
+ <b>Introduced in:</b> GUDHI 3.3.0<br>
+ <b>Copyright:</b> MIT<br>
+ <b>Requires:</b> \ref eigen
+ </td>
+ </tr>
+ <tr>
+ <td colspan=2 height="25">
+ <b>User manual:</b> \ref edge_collapse
+ </td>
+ </tr>
+</table>
+
### Witness complex
<table>
diff --git a/src/common/include/gudhi/graph_simplicial_complex.h b/src/common/include/gudhi/graph_simplicial_complex.h
index b8508697..da9dee7d 100644
--- a/src/common/include/gudhi/graph_simplicial_complex.h
+++ b/src/common/include/gudhi/graph_simplicial_complex.h
@@ -19,6 +19,9 @@
#include <tuple> // for std::tie
namespace Gudhi {
+/** @file
+ * @brief Graph simplicial complex methods
+ */
/* Edge tag for Boost PropertyGraph. */
struct edge_filtration_t {
@@ -46,6 +49,8 @@ using Proximity_graph = typename boost::adjacency_list < boost::vecS, boost::vec
* If points contains n elements, the proximity graph is the graph with n vertices, and an edge [u,v] iff the
* distance function between points u and v is smaller than threshold.
*
+ * \tparam SimplicialComplexForProximityGraph furnishes `Filtration_value` and `Vertex_handle` type definitions.
+ *
* \tparam ForwardPointRange furnishes `.begin()` and `.end()` methods.
*
* \tparam Distance furnishes `operator()(const Point& p1, const Point& p2)`, where
diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt
index 96dd3f6f..4f26481e 100644
--- a/src/python/CMakeLists.txt
+++ b/src/python/CMakeLists.txt
@@ -14,6 +14,15 @@ function( add_GUDHI_PYTHON_lib THE_LIB )
endif(EXISTS ${THE_LIB})
endfunction( add_GUDHI_PYTHON_lib )
+function( add_GUDHI_PYTHON_lib_dir THE_LIB_DIR )
+ # deals when it is not set - error on windows
+ if(EXISTS ${THE_LIB_DIR})
+ set(GUDHI_PYTHON_LIBRARY_DIRS "${GUDHI_PYTHON_LIBRARY_DIRS}'${THE_LIB_DIR}', " PARENT_SCOPE)
+ else()
+ message("add_GUDHI_PYTHON_lib_dir - '${THE_LIB_DIR}' does not exist")
+ endif()
+endfunction( add_GUDHI_PYTHON_lib_dir )
+
# THE_TEST is the python test file name (without .py extension) containing tests functions
function( add_gudhi_py_test THE_TEST )
if(PYTEST_FOUND)
@@ -36,6 +45,7 @@ if(PYTHONINTERP_FOUND)
add_gudhi_debug_info("Pybind11 version ${PYBIND11_VERSION}")
set(GUDHI_PYTHON_MODULES "${GUDHI_PYTHON_MODULES}'bottleneck', ")
set(GUDHI_PYTHON_MODULES_EXTRA "${GUDHI_PYTHON_MODULES_EXTRA}'hera', ")
+ set(GUDHI_PYTHON_MODULES_EXTRA "${GUDHI_PYTHON_MODULES_EXTRA}'clustering', ")
endif()
if(CYTHON_FOUND)
set(GUDHI_PYTHON_MODULES "${GUDHI_PYTHON_MODULES}'off_reader', ")
@@ -130,7 +140,9 @@ if(PYTHONINTERP_FOUND)
set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'reader_utils', ")
set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'witness_complex', ")
set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'strong_witness_complex', ")
- set(GUDHI_PYBIND11_MODULES "${GUDHI_PYBIND11_MODULES}'hera', ")
+ set(GUDHI_PYBIND11_MODULES "${GUDHI_PYBIND11_MODULES}'clustering/_tomato', ")
+ set(GUDHI_PYBIND11_MODULES "${GUDHI_PYBIND11_MODULES}'hera/wasserstein', ")
+ set(GUDHI_PYBIND11_MODULES "${GUDHI_PYBIND11_MODULES}'hera/bottleneck', ")
if (NOT CGAL_VERSION VERSION_LESS 4.11.0)
set(GUDHI_PYBIND11_MODULES "${GUDHI_PYBIND11_MODULES}'bottleneck', ")
set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'nerve_gic', ")
@@ -151,7 +163,7 @@ if(PYTHONINTERP_FOUND)
else(CGAL_HEADER_ONLY)
add_gudhi_debug_info("CGAL version ${CGAL_VERSION}")
add_GUDHI_PYTHON_lib("${CGAL_LIBRARY}")
- set(GUDHI_PYTHON_LIBRARY_DIRS "${GUDHI_PYTHON_LIBRARY_DIRS}'${CGAL_LIBRARIES_DIR}', ")
+ add_GUDHI_PYTHON_lib_dir("${CGAL_LIBRARIES_DIR}")
message("** Add CGAL ${CGAL_LIBRARIES_DIR}")
# If CGAL is not header only, CGAL library may link with boost system,
if(CMAKE_BUILD_TYPE MATCHES Debug)
@@ -159,7 +171,7 @@ if(PYTHONINTERP_FOUND)
else()
add_GUDHI_PYTHON_lib("${Boost_SYSTEM_LIBRARY_RELEASE}")
endif()
- set(GUDHI_PYTHON_LIBRARY_DIRS "${GUDHI_PYTHON_LIBRARY_DIRS}'${Boost_LIBRARY_DIRS}', ")
+ add_GUDHI_PYTHON_lib_dir("${Boost_LIBRARY_DIRS}")
message("** Add Boost ${Boost_LIBRARY_DIRS}")
endif(CGAL_HEADER_ONLY)
# GMP and GMPXX are not required, but if present, CGAL will link with them.
@@ -167,13 +179,17 @@ if(PYTHONINTERP_FOUND)
add_gudhi_debug_info("GMP_LIBRARIES = ${GMP_LIBRARIES}")
set(GUDHI_PYTHON_EXTRA_COMPILE_ARGS "${GUDHI_PYTHON_EXTRA_COMPILE_ARGS}'-DCGAL_USE_GMP', ")
add_GUDHI_PYTHON_lib("${GMP_LIBRARIES}")
- set(GUDHI_PYTHON_LIBRARY_DIRS "${GUDHI_PYTHON_LIBRARY_DIRS}'${GMP_LIBRARIES_DIR}', ")
+ if(NOT GMP_LIBRARIES_DIR)
+ get_filename_component(GMP_LIBRARIES_DIR ${GMP_LIBRARIES} PATH)
+ message("GMP_LIBRARIES_DIR from GMP_LIBRARIES set to ${GMP_LIBRARIES_DIR}")
+ endif(NOT GMP_LIBRARIES_DIR)
+ add_GUDHI_PYTHON_lib_dir("${GMP_LIBRARIES_DIR}")
message("** Add gmp ${GMP_LIBRARIES_DIR}")
if(GMPXX_FOUND)
add_gudhi_debug_info("GMPXX_LIBRARIES = ${GMPXX_LIBRARIES}")
set(GUDHI_PYTHON_EXTRA_COMPILE_ARGS "${GUDHI_PYTHON_EXTRA_COMPILE_ARGS}'-DCGAL_USE_GMPXX', ")
add_GUDHI_PYTHON_lib("${GMPXX_LIBRARIES}")
- set(GUDHI_PYTHON_LIBRARY_DIRS "${GUDHI_PYTHON_LIBRARY_DIRS}'${GMPXX_LIBRARIES_DIR}', ")
+ add_GUDHI_PYTHON_lib_dir("${GMPXX_LIBRARIES_DIR}")
message("** Add gmpxx ${GMPXX_LIBRARIES_DIR}")
endif(GMPXX_FOUND)
endif(GMP_FOUND)
@@ -184,9 +200,10 @@ if(PYTHONINTERP_FOUND)
# In case CGAL is not header only, all MPFR variables are set except MPFR_LIBRARIES_DIR - Just set it
if(NOT MPFR_LIBRARIES_DIR)
get_filename_component(MPFR_LIBRARIES_DIR ${MPFR_LIBRARIES} PATH)
+ message("MPFR_LIBRARIES_DIR from MPFR_LIBRARIES set to ${MPFR_LIBRARIES_DIR}")
endif(NOT MPFR_LIBRARIES_DIR)
- set(GUDHI_PYTHON_LIBRARY_DIRS "${GUDHI_PYTHON_LIBRARY_DIRS}'${MPFR_LIBRARIES_DIR}', ")
- message("** Add mpfr ${MPFR_LIBRARIES}")
+ add_GUDHI_PYTHON_lib_dir("${MPFR_LIBRARIES_DIR}")
+ message("** Add mpfr ${MPFR_LIBRARIES_DIR}")
endif(MPFR_FOUND)
endif(CGAL_FOUND)
@@ -213,7 +230,7 @@ if(PYTHONINTERP_FOUND)
add_GUDHI_PYTHON_lib("${TBB_RELEASE_LIBRARY}")
add_GUDHI_PYTHON_lib("${TBB_MALLOC_RELEASE_LIBRARY}")
endif()
- set(GUDHI_PYTHON_LIBRARY_DIRS "${GUDHI_PYTHON_LIBRARY_DIRS}'${TBB_LIBRARY_DIRS}', ")
+ add_GUDHI_PYTHON_lib_dir("${TBB_LIBRARY_DIRS}")
message("** Add tbb ${TBB_LIBRARY_DIRS}")
set(GUDHI_PYTHON_INCLUDE_DIRS "${GUDHI_PYTHON_INCLUDE_DIRS}'${TBB_INCLUDE_DIRS}', ")
endif()
@@ -234,8 +251,13 @@ if(PYTHONINTERP_FOUND)
file(COPY "gudhi/representations" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi/")
file(COPY "gudhi/wasserstein" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi")
file(COPY "gudhi/point_cloud" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi")
+ file(COPY "gudhi/clustering" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi" FILES_MATCHING PATTERN "*.py")
file(COPY "gudhi/weighted_rips_complex.py" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi")
file(COPY "gudhi/dtm_rips_complex.py" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi")
+ file(COPY "gudhi/hera/__init__.py" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi/hera")
+
+ # Some files for pip package
+ file(COPY "introduction.rst" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/")
add_custom_command(
OUTPUT gudhi.so
@@ -355,7 +377,9 @@ if(PYTHONINTERP_FOUND)
COMMAND ${CMAKE_COMMAND} -E env "PYTHONPATH=${CMAKE_CURRENT_BINARY_DIR}"
${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/example/bottleneck_basic_example.py")
- add_gudhi_py_test(test_bottleneck_distance)
+ if (PYBIND11_FOUND)
+ add_gudhi_py_test(test_bottleneck_distance)
+ endif()
# Cover complex
file(COPY ${CMAKE_SOURCE_DIR}/data/points/human.off DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/)
@@ -492,6 +516,11 @@ if(PYTHONINTERP_FOUND)
add_gudhi_py_test(test_dtm)
endif()
+ # Tomato
+ if(SCIPY_FOUND AND SKLEARN_FOUND AND PYBIND11_FOUND)
+ add_gudhi_py_test(test_tomato)
+ endif()
+
# Weighted Rips
if(SCIPY_FOUND)
add_gudhi_py_test(test_weighted_rips_complex)
diff --git a/src/python/doc/alpha_complex_user.rst b/src/python/doc/alpha_complex_user.rst
index 097470c1..fffcb3db 100644
--- a/src/python/doc/alpha_complex_user.rst
+++ b/src/python/doc/alpha_complex_user.rst
@@ -34,8 +34,8 @@ Remarks
the computation of filtration values can exceptionally be arbitrarily bad. In all cases, we still guarantee that the
output is a valid filtration (faces have a filtration value no larger than their cofaces).
* For performances reasons, it is advised to use Alpha_complex with `CGAL <installation.html#cgal>`_ :math:`\geq` 5.0.0.
-
-For performances reasons, it is advised to use CGAL :math:`\geq` 5.0.0.
+* The vertices in the output simplex tree are not guaranteed to match the order of the input points. One can use
+ :func:`~gudhi.AlphaComplex.get_point` to get the initial point back.
Example from points
-------------------
@@ -178,49 +178,22 @@ In the following example, a threshold of :math:`\alpha^2 = 32.0` is used.
Example from OFF file
^^^^^^^^^^^^^^^^^^^^^
-This example builds the Delaunay triangulation from the points given by an OFF file, and initializes the alpha complex
-with it.
-
+This example builds the alpha complex from 300 random points on a 2-torus.
-Then, it is asked to display information about the alpha complex:
+Then, it computes the persistence diagram and displays it:
-.. testcode::
+.. plot::
+ :include-source:
+ import matplotlib.pyplot as plt
import gudhi
alpha_complex = gudhi.AlphaComplex(off_file=gudhi.__root_source_dir__ + \
- '/data/points/alphacomplexdoc.off')
- simplex_tree = alpha_complex.create_simplex_tree(max_alpha_square=32.0)
+ '/data/points/tore3D_300.off')
+ simplex_tree = alpha_complex.create_simplex_tree()
result_str = 'Alpha complex is of dimension ' + repr(simplex_tree.dimension()) + ' - ' + \
repr(simplex_tree.num_simplices()) + ' simplices - ' + \
repr(simplex_tree.num_vertices()) + ' vertices.'
print(result_str)
- fmt = '%s -> %.2f'
- for filtered_value in simplex_tree.get_filtration():
- print(fmt % tuple(filtered_value))
-
-the program output is:
-
-.. testoutput::
-
- Alpha complex is of dimension 2 - 20 simplices - 7 vertices.
- [0] -> 0.00
- [1] -> 0.00
- [2] -> 0.00
- [3] -> 0.00
- [4] -> 0.00
- [5] -> 0.00
- [6] -> 0.00
- [2, 3] -> 6.25
- [4, 5] -> 7.25
- [0, 2] -> 8.50
- [0, 1] -> 9.25
- [1, 3] -> 10.00
- [1, 2] -> 11.25
- [1, 2, 3] -> 12.50
- [0, 1, 2] -> 13.00
- [5, 6] -> 13.25
- [2, 4] -> 20.00
- [4, 6] -> 22.74
- [4, 5, 6] -> 22.74
- [3, 6] -> 30.25
-
+ diag = simplex_tree.persistence()
+ gudhi.plot_persistence_diagram(diag)
+ plt.show()
diff --git a/src/python/doc/bottleneck_distance_user.rst b/src/python/doc/bottleneck_distance_user.rst
index 89da89d3..6c6e08d9 100644
--- a/src/python/doc/bottleneck_distance_user.rst
+++ b/src/python/doc/bottleneck_distance_user.rst
@@ -9,14 +9,23 @@ Definition
.. include:: bottleneck_distance_sum.inc
-This implementation is based on ideas from "Geometry Helps in Bottleneck Matching and Related Problems"
-:cite:`DBLP:journals/algorithmica/EfratIK01`. Another relevant publication, although it was not used is
-"Geometry Helps to Compare Persistence Diagrams" :cite:`Kerber:2017:GHC:3047249.3064175`.
+This implementation by François Godi is based on ideas from "Geometry Helps in Bottleneck Matching and Related Problems"
+:cite:`DBLP:journals/algorithmica/EfratIK01` and requires `CGAL <installation.html#cgal>`_ (`GPL v3 </licensing/>`_).
-Function
---------
.. autofunction:: gudhi.bottleneck_distance
+This other implementation comes from `Hera
+<https://bitbucket.org/grey_narn/hera/src/master/>`_ (BSD-3-Clause) which is
+based on "Geometry Helps to Compare Persistence Diagrams"
+:cite:`Kerber:2017:GHC:3047249.3064175` by Michael Kerber, Dmitriy
+Morozov, and Arnur Nigmetov.
+
+.. warning::
+ Beware that its approximation allows for a multiplicative error, while the function above uses an additive error.
+
+.. autofunction:: gudhi.hera.bottleneck_distance
+
+
Distance computation
--------------------
diff --git a/src/python/doc/clustering.inc b/src/python/doc/clustering.inc
new file mode 100644
index 00000000..2d07ae88
--- /dev/null
+++ b/src/python/doc/clustering.inc
@@ -0,0 +1,12 @@
+.. table::
+ :widths: 30 40 30
+
+ +--------------------------+-------------------------------------------------------+---------------------------------+
+ | .. figure:: | Clustering tools. | :Author: Marc Glisse |
+ | img/spiral-color.png | | |
+ | | | :Since: GUDHI 3.3.0 |
+ | | | |
+ | | | :License: MIT |
+ +--------------------------+-------------------------------------------------------+---------------------------------+
+ | * :doc:`clustering` |
+ +--------------------------+-----------------------------------------------------------------------------------------+
diff --git a/src/python/doc/clustering.rst b/src/python/doc/clustering.rst
new file mode 100644
index 00000000..c5a57d3c
--- /dev/null
+++ b/src/python/doc/clustering.rst
@@ -0,0 +1,73 @@
+:orphan:
+
+.. To get rid of WARNING: document isn't included in any toctree
+
+=================
+Clustering manual
+=================
+
+We provide an implementation of ToMATo :cite:`tomato`, a persistence-based clustering algorithm. In short, this algorithm uses a density estimator and a neighborhood graph, starts with a mode-seeking phase (naive hill-climbing) to build initial clusters, and finishes by merging clusters based on their prominence.
+
+The merging phase depends on a parameter, which is the minimum prominence a cluster needs to avoid getting merged into another, bigger cluster. This parameter determines the number of clusters, and for convenience we allow you to choose instead the number of clusters. Decreasing the prominence threshold defines a hierarchy of clusters: if 2 points are in separate clusters when we have k clusters, they are still in different clusters for k+1 clusters.
+
+As a by-product, we produce the persistence diagram of the merge tree of the initial clusters. This is a convenient graphical tool to help decide how many clusters we want.
+
+.. plot::
+ :context:
+ :include-source:
+
+ import gudhi
+ f = open(gudhi.__root_source_dir__ + '/data/points/spiral_2d.csv', 'r')
+ import numpy as np
+ data = np.loadtxt(f)
+ import matplotlib.pyplot as plt
+ plt.scatter(data[:,0],data[:,1],marker='.',s=1)
+ plt.show()
+
+.. plot::
+ :context: close-figs
+ :include-source:
+
+ from gudhi.clustering.tomato import Tomato
+ t = Tomato()
+ t.fit(data)
+ t.plot_diagram()
+
+As one can see in `t.n_clusters_`, the algorithm found 6316 initial clusters. The diagram shows their prominence as their distance to the diagonal. There is always one point infinitely far: there is at least one cluster. Among the others, one point seems significantly farther from the diagonal than the others, which indicates that splitting the points into 2 clusters may be a sensible idea.
+
+.. plot::
+ :context: close-figs
+ :include-source:
+
+ t.n_clusters_=2
+ plt.scatter(data[:,0],data[:,1],marker='.',s=1,c=t.labels_)
+ plt.show()
+
+Of course this is just the result for one set of parameters. We can ask for a different density estimator and a different neighborhood graph computed with different parameters.
+
+.. plot::
+ :context: close-figs
+ :include-source:
+
+ t = Tomato(density_type='DTM', k=100)
+ t.fit(data)
+ t.plot_diagram()
+
+Makes the number of clusters clearer, and changes a bit the shape of the clusters.
+
+A quick look at the corresponding density estimate
+
+.. plot::
+ :context: close-figs
+ :include-source:
+
+ plt.scatter(data[:,0],data[:,1],marker='.',s=1,c=t.weights_)
+ plt.show()
+
+The code provides a few density estimators and graph constructions for convenience when first experimenting, but it is actually expected that advanced users provide their own graph and density estimates instead of point coordinates.
+
+Since the algorithm essentially computes basins of attraction, it is also encouraged to use it on functions that do not represent densities at all.
+
+.. autoclass:: gudhi.clustering.tomato.Tomato
+ :members:
+ :special-members: __init__
diff --git a/src/python/doc/img/spiral-color.png b/src/python/doc/img/spiral-color.png
new file mode 100644
index 00000000..21b62dfc
--- /dev/null
+++ b/src/python/doc/img/spiral-color.png
Binary files differ
diff --git a/src/python/doc/index.rst b/src/python/doc/index.rst
index 05bc18b4..040e57a4 100644
--- a/src/python/doc/index.rst
+++ b/src/python/doc/index.rst
@@ -86,3 +86,8 @@ Point cloud utilities
*********************
.. include:: point_cloud_sum.inc
+
+Clustering
+**********
+
+.. include:: clustering.inc
diff --git a/src/python/doc/installation.rst b/src/python/doc/installation.rst
index a66e910e..525ca84e 100644
--- a/src/python/doc/installation.rst
+++ b/src/python/doc/installation.rst
@@ -5,20 +5,47 @@
Installation
############
-Conda
-*****
-The easiest way to install the Python version of GUDHI is using
-`conda <https://gudhi.inria.fr/conda/>`_.
+Packages
+********
+The easiest way to install the Python version of GUDHI is using pre-built packages.
+We recommend `conda <https://gudhi.inria.fr/conda/>`_
+
+.. code-block:: bash
+
+ conda install -c conda-forge gudhi
+
+Gudhi is also available on `PyPI <https://pypi.org/project/gudhi/>`_
+
+.. code-block:: bash
+
+ pip install gudhi
+
+Third party packages are also available, for instance on Debian or Ubuntu
+
+.. code-block:: bash
+
+ apt install python3-gudhi
+
+In all cases, you may still want to install some of the optional `run time dependencies`_.
Compiling
*********
+These instructions are for people who want to compile gudhi from source, they are
+unnecessary if you installed a binary package of Gudhi as above. They assume that
+you have downloaded a `release <https://github.com/GUDHI/gudhi-devel/releases>`_,
+with a name like `gudhi.3.2.0.tar.gz`, then run `tar xf gudhi.3.2.0.tar.gz`, which
+created a directory `gudhi.3.2.0`, hereinafter referred to as `/path-to-gudhi/`.
+If you are instead using a git checkout, beware that the paths are a bit
+different, and in particular the `python/` subdirectory is actually `src/python/`
+there.
+
The library uses c++14 and requires `Boost <https://www.boost.org/>`_ :math:`\geq` 1.56.0,
`CMake <https://www.cmake.org/>`_ :math:`\geq` 3.1 to generate makefiles,
`NumPy <http://numpy.org>`_, `Cython <https://www.cython.org/>`_ and
`pybind11 <https://github.com/pybind/pybind11>`_ to compile
the GUDHI Python module.
It is a multi-platform library and compiles on Linux, Mac OSX and Visual
-Studio 2017.
+Studio 2017 or later.
On `Windows <https://wiki.python.org/moin/WindowsCompilers>`_ , only Python
:math:`\geq` 3.5 are available because of the required Visual Studio version.
diff --git a/src/python/gudhi/alpha_complex.pyx b/src/python/gudhi/alpha_complex.pyx
index a356384d..ea128743 100644
--- a/src/python/gudhi/alpha_complex.pyx
+++ b/src/python/gudhi/alpha_complex.pyx
@@ -20,6 +20,7 @@ import os
from gudhi.simplex_tree cimport *
from gudhi.simplex_tree import SimplexTree
+from gudhi import read_points_from_off_file
__author__ = "Vincent Rouvreau"
__copyright__ = "Copyright (C) 2016 Inria"
@@ -27,11 +28,9 @@ __license__ = "GPL v3"
cdef extern from "Alpha_complex_interface.h" namespace "Gudhi":
cdef cppclass Alpha_complex_interface "Gudhi::alpha_complex::Alpha_complex_interface":
- Alpha_complex_interface(vector[vector[double]] points, bool fast_version) nogil except +
- # bool from_file is a workaround for cython to find the correct signature
- Alpha_complex_interface(string off_file, bool fast_version, bool from_file) nogil except +
+ Alpha_complex_interface(vector[vector[double]] points, bool fast_version, bool exact_version) nogil except +
vector[double] get_point(int vertex) nogil except +
- void create_simplex_tree(Simplex_tree_interface_full_featured* simplex_tree, double max_alpha_square, bool exact_version, bool default_filtration_value) nogil except +
+ void create_simplex_tree(Simplex_tree_interface_full_featured* simplex_tree, double max_alpha_square, bool default_filtration_value) nogil except +
# AlphaComplex python interface
cdef class AlphaComplex:
@@ -54,7 +53,6 @@ cdef class AlphaComplex:
"""
cdef Alpha_complex_interface * this_ptr
- cdef bool exact
# Fake constructor that does nothing but documenting the constructor
def __init__(self, points=None, off_file='', precision='safe'):
@@ -76,21 +74,20 @@ cdef class AlphaComplex:
def __cinit__(self, points = None, off_file = '', precision = 'safe'):
assert precision in ['fast', 'safe', 'exact'], "Alpha complex precision can only be 'fast', 'safe' or 'exact'"
cdef bool fast = precision == 'fast'
- self.exact = precision == 'exact'
+ cdef bool exact = precision == 'exact'
cdef vector[vector[double]] pts
if off_file:
if os.path.isfile(off_file):
- self.this_ptr = new Alpha_complex_interface(off_file.encode('utf-8'), fast, True)
+ points = read_points_from_off_file(off_file = off_file)
else:
print("file " + off_file + " not found.")
- else:
- if points is None:
- # Empty Alpha construction
- points=[]
- pts = points
- with nogil:
- self.this_ptr = new Alpha_complex_interface(pts, fast)
+ if points is None:
+ # Empty Alpha construction
+ points=[]
+ pts = points
+ with nogil:
+ self.this_ptr = new Alpha_complex_interface(pts, fast, exact)
def __dealloc__(self):
if self.this_ptr != NULL:
@@ -102,7 +99,7 @@ cdef class AlphaComplex:
return self.this_ptr != NULL
def get_point(self, vertex):
- """This function returns the point corresponding to a given vertex.
+ """This function returns the point corresponding to a given vertex from the :class:`~gudhi.SimplexTree`.
:param vertex: The vertex.
:type vertex: int
@@ -128,5 +125,5 @@ cdef class AlphaComplex:
cdef bool compute_filtration = default_filtration_value == True
with nogil:
self.this_ptr.create_simplex_tree(<Simplex_tree_interface_full_featured*>stree_int_ptr,
- mas, self.exact, compute_filtration)
+ mas, compute_filtration)
return stree
diff --git a/src/python/gudhi/bottleneck.cc b/src/python/gudhi/bottleneck.cc
index 838bf9eb..8a3d669a 100644
--- a/src/python/gudhi/bottleneck.cc
+++ b/src/python/gudhi/bottleneck.cc
@@ -33,7 +33,8 @@ PYBIND11_MODULE(bottleneck, m) {
py::arg("diagram_1"), py::arg("diagram_2"),
py::arg("e") = py::none(),
R"pbdoc(
- This function returns the point corresponding to a given vertex.
+ Compute the Bottleneck distance between two diagrams.
+ Points at infinity and on the diagonal are supported.
:param diagram_1: The first diagram.
:type diagram_1: numpy array of shape (m,2)
diff --git a/src/python/gudhi/clustering/__init__.py b/src/python/gudhi/clustering/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/src/python/gudhi/clustering/__init__.py
diff --git a/src/python/gudhi/clustering/_tomato.cc b/src/python/gudhi/clustering/_tomato.cc
new file mode 100644
index 00000000..a76a2c3a
--- /dev/null
+++ b/src/python/gudhi/clustering/_tomato.cc
@@ -0,0 +1,277 @@
+/* This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+ * See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+ * Author(s): Marc Glisse
+ *
+ * Copyright (C) 2020 Inria
+ *
+ * Modification(s):
+ * - YYYY/MM Author: Description of the modification
+ */
+
+#include <boost/container/flat_map.hpp>
+#include <boost/pending/disjoint_sets.hpp>
+#include <boost/property_map/property_map.hpp>
+#include <boost/property_map/transform_value_property_map.hpp>
+#include <boost/property_map/vector_property_map.hpp>
+#include <boost/property_map/function_property_map.hpp>
+#include <boost/iterator/counting_iterator.hpp>
+#include <boost/range/irange.hpp>
+#include <boost/range/adaptor/transformed.hpp>
+#include <vector>
+#include <unordered_map>
+#include <pybind11/pybind11.h>
+#include <pybind11/numpy.h>
+#include <iostream>
+
+namespace py = pybind11;
+
+template <class T, class = std::enable_if_t<std::is_integral<T>::value>>
+int getint(int i) {
+ return i;
+}
+// Gcc-8 has a bug that breaks this version, fixed in gcc-9
+// template<class T, class=decltype(std::declval<T>().template cast<int>())>
+// int getint(T i){return i.template cast<int>();}
+template <class T>
+auto getint(T i) -> decltype(i.template cast<int>()) {
+ return i.template cast<int>();
+}
+
+// Raw clusters are clusters obtained through single-linkage, no merging.
+
+typedef int Point_index;
+typedef int Cluster_index;
+struct Merge {
+ Cluster_index first, second;
+ double persist;
+};
+
+template <class Neighbors, class Density, class Order, class ROrder>
+auto tomato(Point_index num_points, Neighbors const& neighbors, Density const& density, Order const& order,
+ ROrder const& rorder) {
+ // point index --> index of raw cluster it belongs to
+ std::vector<Cluster_index> raw_cluster;
+ raw_cluster.reserve(num_points);
+ // cluster index --> index of top point in the cluster
+ Cluster_index n_raw_clusters = 0; // current number of raw clusters seen
+ //
+ std::vector<Merge> merges;
+ struct Data {
+ Cluster_index parent;
+ int rank;
+ Point_index max;
+ }; // information on a cluster
+ std::vector<Data> ds_base;
+ // boost::vector_property_map does resize(size+1) for every new element, don't use it
+ auto ds_data =
+ boost::make_function_property_map<std::size_t>([&ds_base](std::size_t n) -> Data& { return ds_base[n]; });
+ auto ds_parent =
+ boost::make_transform_value_property_map([](auto& p) -> Cluster_index& { return p.parent; }, ds_data);
+ auto ds_rank = boost::make_transform_value_property_map([](auto& p) -> int& { return p.rank; }, ds_data);
+ boost::disjoint_sets<decltype(ds_rank), decltype(ds_parent)> ds(
+ ds_rank, ds_parent); // on the clusters, not directly the points
+ std::vector<std::array<double, 2>> persistence; // diagram (finite points)
+ boost::container::flat_map<Cluster_index, Cluster_index>
+ adj_clusters; // first: the merged cluster, second: the raw cluster
+ // we only care about the raw cluster, we could use a vector to store the second, store first into a set, and only
+ // insert in the vector if merged is absent from the set
+
+ for (Point_index i = 0; i < num_points; ++i) {
+ // auto&& ngb = neighbors[order[i]];
+ // TODO: have a specialization where we don't need python types and py::cast
+ // TODO: move py::cast and getint into Neighbors
+ py::object ngb = neighbors[py::cast(order[i])]; // auto&& also seems to work
+ adj_clusters.clear();
+ Point_index j = i; // highest neighbor
+ // for(Point_index k : ngb)
+ for (auto k_py : ngb) {
+ Point_index k = rorder[getint(k_py)];
+ if (k >= i || k < 0) // ???
+ continue;
+ if (k < j) j = k;
+ Cluster_index rk = raw_cluster[k];
+ adj_clusters.emplace(ds.find_set(rk), rk);
+ // does not insert if ck=ds.find_set(rk) already seen
+ // which rk we keep from those with the same ck is arbitrary
+ }
+ assert((Point_index)raw_cluster.size() == i);
+ if (i == j) { // local maximum -> new cluster
+ Cluster_index c = n_raw_clusters++;
+ ds_base.emplace_back(); // could be integrated in ds_data, but then we would check the size for every access
+ ds.make_set(c);
+ ds_base[c].max = i; // max
+ raw_cluster.push_back(c);
+ continue;
+ } else { // add i to the cluster of j
+ assert(j < i);
+ raw_cluster.push_back(raw_cluster[j]);
+ // FIXME: we are adding point i to the raw cluster of j, but that one might not be in adj_clusters, so we may
+ // merge clusters A and B through a point of C. It is strange, but I don't know if it can really cause problems.
+ // We could just not set j at all and use arbitrarily the first element of adj_clusters.
+ }
+ // possibly merge clusters
+ // we could sort, in case there are several merges, but that doesn't seem so useful
+ Cluster_index rj = raw_cluster[j];
+ Cluster_index cj = ds.find_set(rj);
+ Cluster_index orig_cj = cj;
+ for (auto ckk : adj_clusters) {
+ Cluster_index rk = ckk.second;
+ Cluster_index ck = ckk.first;
+ if (ck == orig_cj) continue;
+ assert(ck == ds.find_set(rk));
+ Point_index j = ds_base[cj].max;
+ Point_index k = ds_base[ck].max;
+ Point_index young = std::max(j, k);
+ Point_index old = std::min(j, k);
+ auto d_young = density[order[young]];
+ auto d_i = density[order[i]];
+ assert(d_young >= d_i);
+ // Always merge (the non-hierarchical algorithm would only conditionally merge here
+ persistence.push_back({d_young, d_i});
+ assert(ds.find_set(rj) != ds.find_set(rk));
+ ds.link(cj, ck);
+ cj = ds.find_set(cj);
+ ds_base[cj].max = old; // just one parent, no need for find_set
+ // record the raw clusters, we don't know what will have already been merged.
+ merges.push_back({rj, rk, d_young - d_i});
+ }
+ }
+ {
+ boost::counting_iterator<int> b(0), e(ds_base.size());
+ ds.compress_sets(b, e);
+ // Now we stop using find_sets and look at the parent directly
+ // rank is reused to rename clusters contiguously 0, 1, etc
+ }
+ // Maximum for each connected component
+ std::vector<double> max_cc;
+ for (Cluster_index i = 0; i < n_raw_clusters; ++i) {
+ if (ds_base[i].parent == i) max_cc.push_back(density[order[ds_base[i].max]]);
+ }
+ assert((Cluster_index)(merges.size() + max_cc.size()) == n_raw_clusters);
+
+ // TODO: create a "noise" cluster, merging all those not prominent enough?
+
+ // Replay the merges, in increasing order of prominence, to build the hierarchy
+ std::sort(merges.begin(), merges.end(), [](Merge const& a, Merge const& b) { return a.persist < b.persist; });
+ std::vector<std::array<Cluster_index, 2>> children;
+ children.reserve(merges.size());
+ {
+ struct Dat {
+ Cluster_index parent;
+ int rank;
+ Cluster_index name;
+ };
+ std::vector<Dat> ds_bas(2 * n_raw_clusters - 1);
+ Cluster_index i;
+ auto ds_dat =
+ boost::make_function_property_map<std::size_t>([&ds_bas](std::size_t n) -> Dat& { return ds_bas[n]; });
+ auto ds_par = boost::make_transform_value_property_map([](auto& p) -> Cluster_index& { return p.parent; }, ds_dat);
+ auto ds_ran = boost::make_transform_value_property_map([](auto& p) -> int& { return p.rank; }, ds_dat);
+ boost::disjoint_sets<decltype(ds_ran), decltype(ds_par)> ds(ds_ran, ds_par);
+ for (i = 0; i < n_raw_clusters; ++i) {
+ ds.make_set(i);
+ ds_bas[i].name = i;
+ }
+ for (Merge const& m : merges) {
+ Cluster_index j = ds.find_set(m.first);
+ Cluster_index k = ds.find_set(m.second);
+ assert(j != k);
+ children.push_back({ds_bas[j].name, ds_bas[k].name});
+ ds.make_set(i);
+ ds.link(i, j);
+ ds.link(i, k);
+ ds_bas[ds.find_set(i)].name = i;
+ ++i;
+ }
+ }
+
+ std::vector<Cluster_index> raw_cluster_ordered(num_points);
+ for (int i = 0; i < num_points; ++i) raw_cluster_ordered[i] = raw_cluster[rorder[i]];
+ // return raw_cluster, children, persistence
+ // TODO avoid copies: https://github.com/pybind/pybind11/issues/1042
+ return py::make_tuple(py::array(raw_cluster_ordered.size(), raw_cluster_ordered.data()),
+ py::array(children.size(), children.data()), py::array(persistence.size(), persistence.data()),
+ py::array(max_cc.size(), max_cc.data()));
+}
+
+auto merge(py::array_t<Cluster_index, py::array::c_style> children, Cluster_index n_leaves, Cluster_index n_final) {
+ if (n_final > n_leaves) {
+ std::cerr << "The number of clusters required " << n_final << " is larger than the number of mini-clusters " << n_leaves << '\n';
+ n_final = n_leaves; // or return something special and let Tomato use leaf_labels_?
+ }
+ py::buffer_info cbuf = children.request();
+ if ((cbuf.ndim != 2 || cbuf.shape[1] != 2) && (cbuf.ndim != 1 || cbuf.shape[0] != 0))
+ throw std::runtime_error("internal error: children have to be (n,2) or empty");
+ const int n_merges = cbuf.shape[0];
+ Cluster_index* d = (Cluster_index*)cbuf.ptr;
+ if (n_merges + n_final < n_leaves) {
+ std::cerr << "The number of clusters required " << n_final << " is smaller than the number of connected components " << n_leaves - n_merges << '\n';
+ n_final = n_leaves - n_merges;
+ }
+ struct Dat {
+ Cluster_index parent;
+ int rank;
+ int name;
+ };
+ std::vector<Dat> ds_bas(2 * n_leaves - 1);
+ auto ds_dat = boost::make_function_property_map<std::size_t>([&ds_bas](std::size_t n) -> Dat& { return ds_bas[n]; });
+ auto ds_par = boost::make_transform_value_property_map([](auto& p) -> Cluster_index& { return p.parent; }, ds_dat);
+ auto ds_ran = boost::make_transform_value_property_map([](auto& p) -> int& { return p.rank; }, ds_dat);
+ boost::disjoint_sets<decltype(ds_ran), decltype(ds_par)> ds(ds_ran, ds_par);
+ Cluster_index i;
+ for (i = 0; i < n_leaves; ++i) {
+ ds.make_set(i);
+ ds_bas[i].name = -1;
+ }
+ for (Cluster_index m = 0; m < n_leaves - n_final; ++m) {
+ Cluster_index j = ds.find_set(d[2 * m]);
+ Cluster_index k = ds.find_set(d[2 * m + 1]);
+ assert(j != k);
+ ds.make_set(i);
+ ds.link(i, j);
+ ds.link(i, k);
+ ++i;
+ }
+ Cluster_index next_cluster_name = 0;
+ std::vector<Cluster_index> ret;
+ ret.reserve(n_leaves);
+ for (Cluster_index j = 0; j < n_leaves; ++j) {
+ Cluster_index k = ds.find_set(j);
+ if (ds_bas[k].name == -1) ds_bas[k].name = next_cluster_name++;
+ ret.push_back(ds_bas[k].name);
+ }
+ return py::array(ret.size(), ret.data());
+}
+
+// TODO: Do a special version when ngb is a numpy array, where we can cast to int[k][n] ?
+// py::isinstance<py::array_t<std::int32_t>> (ou py::isinstance<py::array> et tester dtype) et flags&c_style
+// ou overload (en virant forcecast?)
+// aussi le faire au cas où on n'aurait pas un tableau, mais où chaque liste de voisins serait un tableau ?
+auto hierarchy(py::handle ngb, py::array_t<double, py::array::c_style | py::array::forcecast> density) {
+ // used to be py::iterable ngb, but that's inconvenient if it doesn't come pre-sorted
+ // use py::handle and check if [] (aka __getitem__) works? But then we need to build an object to pass it to []
+ // (I _think_ handle is ok and we don't need object here)
+ py::buffer_info wbuf = density.request();
+ if (wbuf.ndim != 1) throw std::runtime_error("density must be 1D");
+ const int n = wbuf.shape[0];
+ double* d = (double*)wbuf.ptr;
+ // Vector { 0, 1, ..., n-1 }
+ std::vector<Point_index> order(boost::counting_iterator<Point_index>(0), boost::counting_iterator<Point_index>(n));
+ // Permutation of the indices to get points in decreasing order of density
+ std::sort(std::begin(order), std::end(order), [=](Point_index i, Point_index j) { return d[i] > d[j]; });
+ // Inverse permutation
+ std::vector<Point_index> rorder(n);
+ for (Point_index i : boost::irange(0, n)) rorder[order[i]] = i;
+ // Used as:
+ // order[i] is the index of the point with i-th largest density
+ // rorder[i] is the rank of the i-th point in order of decreasing density
+ // TODO: put a wrapper on ngb and d so we don't need to pass (r)order (there is still the issue of reordering the
+ // output)
+ return tomato(n, ngb, d, order, rorder);
+}
+
+PYBIND11_MODULE(_tomato, m) {
+ m.doc() = "Internals of tomato clustering";
+ m.def("hierarchy", &hierarchy, "does the clustering");
+ m.def("merge", &merge, "merge clusters");
+}
diff --git a/src/python/gudhi/clustering/tomato.py b/src/python/gudhi/clustering/tomato.py
new file mode 100644
index 00000000..fbba3cc8
--- /dev/null
+++ b/src/python/gudhi/clustering/tomato.py
@@ -0,0 +1,321 @@
+# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+# Author(s): Marc Glisse
+#
+# Copyright (C) 2020 Inria
+#
+# Modification(s):
+# - YYYY/MM Author: Description of the modification
+
+import numpy
+from ..point_cloud.knn import KNearestNeighbors
+from ..point_cloud.dtm import DTMDensity
+from ._tomato import *
+
+# The fit/predict interface is not so well suited...
+
+
+class Tomato:
+ """
+ This clustering algorithm needs a neighborhood graph on the points, and an estimation of the density at each point.
+ A few possible graph constructions and density estimators are provided for convenience, but it is perfectly natural
+ to provide your own.
+
+ :Requires: `SciPy <installation.html#scipy>`_, `Scikit-learn <installation.html#scikit-learn>`_ or others
+ (see :class:`~gudhi.point_cloud.knn.KNearestNeighbors`) in function of the options.
+
+ Attributes
+ ----------
+ n_clusters_: int
+ The number of clusters. Writing to it automatically adjusts `labels_`.
+ merge_threshold_: float
+ minimum prominence of a cluster so it doesn't get merged. Writing to it automatically adjusts `labels_`.
+ n_leaves_: int
+ number of leaves (unstable clusters) in the hierarchical tree
+ leaf_labels_: ndarray of shape (n_samples,)
+ cluster labels for each point, at the very bottom of the hierarchy
+ labels_: ndarray of shape (n_samples,)
+ cluster labels for each point, after merging
+ diagram_: ndarray of shape (`n_leaves_`, 2)
+ persistence diagram (only the finite points)
+ max_weight_per_cc_: ndarray of shape (n_connected_components,)
+ maximum of the density function on each connected component. This corresponds to the abscissa of infinite
+ points in the diagram
+ children_: ndarray of shape (`n_leaves_`-n_connected_components, 2)
+ The children of each non-leaf node. Values less than `n_leaves_` correspond to leaves of the tree.
+ A node i greater than or equal to `n_leaves_` is a non-leaf node and has children children_[i - `n_leaves_`].
+ Alternatively at the i-th iteration, children[i][0] and children[i][1] are merged to form node `n_leaves_` + i
+ weights_: ndarray of shape (n_samples,)
+ weights of the points, as computed by the density estimator or provided by the user
+ params_: dict
+ Parameters like metric, etc
+ """
+
+ def __init__(
+ self,
+ graph_type="knn",
+ density_type="logDTM",
+ n_clusters=None,
+ merge_threshold=None,
+ # eliminate_threshold=None,
+ # eliminate_threshold (float): minimum max weight of a cluster so it doesn't get eliminated
+ **params
+ ):
+ """
+ Args:
+ graph_type (str): 'manual', 'knn' or 'radius'. Default is 'knn'.
+ density_type (str): 'manual', 'DTM', 'logDTM', 'KDE' or 'logKDE'. When you have many points,
+ 'KDE' and 'logKDE' tend to be slower. Default is 'logDTM'.
+ metric (str|Callable): metric used when calculating the distance between instances in a feature array.
+ Defaults to Minkowski of parameter p.
+ kde_params (dict): if density_type is 'KDE' or 'logKDE', additional parameters passed directly to
+ sklearn.neighbors.KernelDensity.
+ k (int): number of neighbors for a knn graph (including the vertex itself). Defaults to 10.
+ k_DTM (int): number of neighbors for the DTM density estimation (including the vertex itself).
+ Defaults to k.
+ r (float): size of a neighborhood if graph_type is 'radius'. Also used as default bandwidth in kde_params.
+ eps (float): (1+eps) approximation factor when computing distances (ignored in many cases).
+ n_clusters (int): number of clusters requested. Defaults to None, i.e. no merging occurs and we get
+ the maximal number of clusters.
+ merge_threshold (float): minimum prominence of a cluster so it doesn't get merged.
+ symmetrize_graph (bool): whether we should add edges to make the neighborhood graph symmetric.
+ This can be useful with k-NN for small k. Defaults to false.
+ p (float): norm L^p on input points. Defaults to 2.
+ q (float): order used to compute the distance to measure. Defaults to dim.
+ Beware that when the dimension is large, this can easily cause overflows.
+ dim (float): final exponent in DTM density estimation, representing the dimension. Defaults to the
+ dimension, or 2 when the dimension cannot be read from the input (metric is "precomputed").
+ n_jobs (int): Number of jobs to schedule for parallel processing on the CPU.
+ If -1 is given all processors are used. Default: 1.
+ params: extra parameters are passed to :class:`~gudhi.point_cloud.knn.KNearestNeighbors` and
+ :class:`~gudhi.point_cloud.dtm.DTMDensity`.
+ """
+ # Should metric='precomputed' mean input_type='distance_matrix'?
+ # Should we be able to pass metric='minkowski' (what None does currently)?
+ self.graph_type_ = graph_type
+ self.density_type_ = density_type
+ self.params_ = params
+ self.__n_clusters = n_clusters
+ self.__merge_threshold = merge_threshold
+ # self.eliminate_threshold_ = eliminate_threshold
+ if n_clusters and merge_threshold:
+ raise ValueError("Cannot specify both a merge threshold and a number of clusters")
+
+ def fit(self, X, y=None, weights=None):
+ """
+ Args:
+ X ((n,d)-array of float|(n,n)-array of float|Sequence[Iterable[int]]): coordinates of the points,
+ or distance matrix (full, not just a triangle) if metric is "precomputed", or list of neighbors
+ for each point (points are represented by their index, starting from 0) if graph_type is "manual".
+ The number of points is currently limited to about 2 billion.
+ weights (ndarray of shape (n_samples)): if density_type is 'manual', a density estimate at each point
+ y: Not used, present here for API consistency with scikit-learn by convention.
+ """
+ # TODO: First detect if this is a new call with the same data (only threshold changed?)
+ # TODO: less code duplication (subroutines?), less spaghetti, but don't compute neighbors twice if not needed. Clear error message for missing or contradictory parameters.
+ if weights is not None:
+ density_type = "manual"
+ else:
+ density_type = self.density_type_
+ if density_type == "manual":
+ raise ValueError("If density_type is 'manual', you must provide weights to fit()")
+
+ if self.graph_type_ == "manual":
+ self.neighbors_ = X
+ # FIXME: uniformize "message 'option'" vs 'message "option"'
+ assert density_type == "manual", 'If graph_type is "manual", density_type must be as well'
+ else:
+ metric = self.params_.get("metric", "minkowski")
+ if metric != "precomputed":
+ self.points_ = X
+
+ # Slight complication to avoid computing knn twice.
+ need_knn = 0
+ need_knn_ngb = False
+ need_knn_dist = False
+ if self.graph_type_ == "knn":
+ k_graph = self.params_.get("k", 10)
+ # If X has fewer than k points...
+ if k_graph > len(X):
+ k_graph = len(X)
+ need_knn = k_graph
+ need_knn_ngb = True
+ if self.density_type_ in ["DTM", "logDTM"]:
+ k = self.params_.get("k", 10)
+ k_DTM = self.params_.get("k_DTM", k)
+ # If X has fewer than k points...
+ if k_DTM > len(X):
+ k_DTM = len(X)
+ need_knn = max(need_knn, k_DTM)
+ need_knn_dist = True
+ # if we ask for more neighbors for the graph than the DTM, getting the distances is a slight waste,
+ # but it looks negligible
+ if need_knn > 0:
+ knn_args = dict(self.params_)
+ knn_args["k"] = need_knn
+ knn = KNearestNeighbors(return_index=need_knn_ngb, return_distance=need_knn_dist, **knn_args).fit_transform(
+ X
+ )
+ if need_knn_ngb:
+ if need_knn_dist:
+ self.neighbors_ = knn[0][:, 0:k_graph]
+ knn_dist = knn[1]
+ else:
+ self.neighbors_ = knn
+ elif need_knn_dist:
+ knn_dist = knn
+ if self.density_type_ in ["DTM", "logDTM"]:
+ dim = self.params_.get("dim")
+ if dim is None:
+ dim = len(X[0]) if metric != "precomputed" else 2
+ q = self.params_.get("q", dim)
+ weights = DTMDensity(k=k_DTM, metric="neighbors", dim=dim, q=q).fit_transform(knn_dist)
+ if self.density_type_ == "logDTM":
+ weights = numpy.log(weights)
+
+ if self.graph_type_ == "radius":
+ if metric in ["minkowski", "euclidean", "manhattan", "chebyshev"]:
+ from scipy.spatial import cKDTree
+
+ tree = cKDTree(X)
+ # TODO: handle "l1" and "l2" aliases?
+ p = self.params_.get("p")
+ if metric == "euclidean":
+ assert p is None or p == 2, "p=" + str(p) + " is not consistent with metric='euclidean'"
+ p = 2
+ elif metric == "manhattan":
+ assert p is None or p == 1, "p=" + str(p) + " is not consistent with metric='manhattan'"
+ p = 1
+ elif metric == "chebyshev":
+ assert p is None or p == numpy.inf, "p=" + str(p) + " is not consistent with metric='chebyshev'"
+ p = numpy.inf
+ elif p is None:
+ p = 2 # the default
+ eps = self.params_.get("eps", 0)
+ self.neighbors_ = tree.query_ball_tree(tree, r=self.params_["r"], p=p, eps=eps)
+
+ # TODO: sklearn's NearestNeighbors.radius_neighbors can handle more metrics efficiently via its BallTree
+ # (don't bother with the _graph variant, it just calls radius_neighbors).
+ elif metric != "precomputed":
+ from sklearn.metrics import pairwise_distances
+
+ X = pairwise_distances(X, metric=metric, n_jobs=self.params_.get("n_jobs"))
+ metric = "precomputed"
+
+ if metric == "precomputed":
+ # TODO: parallelize? May not be worth it.
+ X = numpy.asarray(X)
+ r = self.params_["r"]
+ self.neighbors_ = [numpy.flatnonzero(l <= r) for l in X]
+
+ if self.density_type_ in {"KDE", "logKDE"}:
+ # Slow...
+ assert (
+ self.graph_type_ != "manual" and metric != "precomputed"
+ ), "Scikit-learn's KernelDensity requires point coordinates"
+ kde_params = dict(self.params_.get("kde_params", dict()))
+ kde_params.setdefault("metric", metric)
+ r = self.params_.get("r")
+ if r is not None:
+ kde_params.setdefault("bandwidth", r)
+ # Should we default rtol to eps?
+ from sklearn.neighbors import KernelDensity
+
+ weights = KernelDensity(**kde_params).fit(self.points_).score_samples(self.points_)
+ if self.density_type_ == "KDE":
+ weights = numpy.exp(weights)
+
+ # TODO: do it at the C++ level and/or in parallel if this is too slow?
+ if self.params_.get("symmetrize_graph"):
+ self.neighbors_ = [set(line) for line in self.neighbors_]
+ for i, line in enumerate(self.neighbors_):
+ line.discard(i)
+ for j in line:
+ self.neighbors_[j].add(i)
+
+ self.weights_ = weights
+ # This is where the main computation happens
+ self.leaf_labels_, self.children_, self.diagram_, self.max_weight_per_cc_ = hierarchy(self.neighbors_, weights)
+ self.n_leaves_ = len(self.max_weight_per_cc_) + len(self.children_)
+ assert self.leaf_labels_.max() + 1 == len(self.max_weight_per_cc_) + len(self.children_)
+ # TODO: deduplicate this code with the setters below
+ if self.__merge_threshold:
+ assert not self.__n_clusters
+ self.__n_clusters = numpy.count_nonzero(
+ self.diagram_[:, 0] - self.diagram_[:, 1] > self.__merge_threshold
+ ) + len(self.max_weight_per_cc_)
+ if self.__n_clusters:
+ # TODO: set corresponding merge_threshold?
+ renaming = merge(self.children_, self.n_leaves_, self.__n_clusters)
+ self.labels_ = renaming[self.leaf_labels_]
+ # In case the user asked for something impossible.
+ # TODO: check for impossible situations before calling merge.
+ self.__n_clusters = self.labels_.max() + 1
+ else:
+ self.labels_ = self.leaf_labels_
+ self.__n_clusters = self.n_leaves_
+ return self
+
+ def fit_predict(self, X, y=None, weights=None):
+ """
+ Equivalent to fit(), and returns the `labels_`.
+ """
+ return self.fit(X, y, weights).labels_
+
+ # TODO: add argument k or threshold? Have a version where you can click and it shows the line and the corresponding k?
+ def plot_diagram(self):
+ """
+ """
+ import matplotlib.pyplot as plt
+
+ l = self.max_weight_per_cc_.min()
+ r = self.max_weight_per_cc_.max()
+ if self.diagram_.size > 0:
+ plt.plot(self.diagram_[:, 0], self.diagram_[:, 1], "ro")
+ l = min(l, self.diagram_[:, 1].min())
+ r = max(r, self.diagram_[:, 0].max())
+ if l == r:
+ if l > 0:
+ l, r = 0.9 * l, 1.1 * r
+ elif l < 0:
+ l, r = 1.1 * l, 0.9 * r
+ else:
+ l, r = -1.0, 1.0
+ plt.plot([l, r], [l, r])
+ plt.plot(
+ self.max_weight_per_cc_, numpy.full(self.max_weight_per_cc_.shape, 1.1 * l - 0.1 * r), "ro", color="green"
+ )
+ plt.show()
+
+ # Use set_params instead?
+ @property
+ def n_clusters_(self):
+ return self.__n_clusters
+
+ @n_clusters_.setter
+ def n_clusters_(self, n_clusters):
+ if n_clusters == self.__n_clusters:
+ return
+ self.__n_clusters = n_clusters
+ self.__merge_threshold = None
+ if hasattr(self, "leaf_labels_"):
+ renaming = merge(self.children_, self.n_leaves_, self.__n_clusters)
+ self.labels_ = renaming[self.leaf_labels_]
+ # In case the user asked for something impossible
+ self.__n_clusters = self.labels_.max() + 1
+
+ @property
+ def merge_threshold_(self):
+ return self.__merge_threshold
+
+ @merge_threshold_.setter
+ def merge_threshold_(self, merge_threshold):
+ if merge_threshold == self.__merge_threshold:
+ return
+ if hasattr(self, "leaf_labels_"):
+ self.n_clusters_ = numpy.count_nonzero(self.diagram_[:, 0] - self.diagram_[:, 1] > merge_threshold) + len(
+ self.max_weight_per_cc_
+ )
+ else:
+ self.__n_clusters = None
+ self.__merge_threshold = merge_threshold
diff --git a/src/python/gudhi/hera/__init__.py b/src/python/gudhi/hera/__init__.py
new file mode 100644
index 00000000..f70b92b9
--- /dev/null
+++ b/src/python/gudhi/hera/__init__.py
@@ -0,0 +1,7 @@
+from .wasserstein import wasserstein_distance
+from .bottleneck import bottleneck_distance
+
+
+__author__ = "Marc Glisse"
+__copyright__ = "Copyright (C) 2020 Inria"
+__license__ = "MIT"
diff --git a/src/python/gudhi/hera/bottleneck.cc b/src/python/gudhi/hera/bottleneck.cc
new file mode 100644
index 00000000..0cb562ce
--- /dev/null
+++ b/src/python/gudhi/hera/bottleneck.cc
@@ -0,0 +1,54 @@
+/* This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+ * See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+ * Author(s): Marc Glisse
+ *
+ * Copyright (C) 2020 Inria
+ *
+ * Modification(s):
+ * - YYYY/MM Author: Description of the modification
+ */
+
+#include <pybind11_diagram_utils.h>
+
+#ifdef _MSC_VER
+// https://github.com/grey-narn/hera/issues/3
+// ssize_t is a non-standard type (well, posix)
+using py::ssize_t;
+#endif
+
+#include <bottleneck.h> // Hera
+
+double bottleneck_distance(Dgm d1, Dgm d2, double delta)
+{
+ // I *think* the call to request() has to be before releasing the GIL.
+ auto diag1 = numpy_to_range_of_pairs(d1);
+ auto diag2 = numpy_to_range_of_pairs(d2);
+
+ py::gil_scoped_release release;
+
+ if (delta == 0)
+ return hera::bottleneckDistExact(diag1, diag2);
+ else
+ return hera::bottleneckDistApprox(diag1, diag2, delta);
+}
+
+PYBIND11_MODULE(bottleneck, m) {
+ m.def("bottleneck_distance", &bottleneck_distance,
+ py::arg("X"), py::arg("Y"),
+ py::arg("delta") = .01,
+ R"pbdoc(
+ Compute the Bottleneck distance between two diagrams.
+ Points at infinity are supported.
+
+ .. note::
+ Points on the diagonal are not supported and must be filtered out before calling this function.
+
+ Parameters:
+ X (n x 2 numpy array): First diagram
+ Y (n x 2 numpy array): Second diagram
+ delta (float): Relative error 1+delta
+
+ Returns:
+ float: (approximate) bottleneck distance d_B(X,Y)
+ )pbdoc");
+}
diff --git a/src/python/gudhi/hera.cc b/src/python/gudhi/hera/wasserstein.cc
index ea80a9a8..1a21f02f 100644
--- a/src/python/gudhi/hera.cc
+++ b/src/python/gudhi/hera/wasserstein.cc
@@ -33,7 +33,7 @@ double wasserstein_distance(
return hera::wasserstein_dist(diag1, diag2, params);
}
-PYBIND11_MODULE(hera, m) {
+PYBIND11_MODULE(wasserstein, m) {
m.def("wasserstein_distance", &wasserstein_distance,
py::arg("X"), py::arg("Y"),
py::arg("order") = 1,
diff --git a/src/python/gudhi/persistence_graphical_tools.py b/src/python/gudhi/persistence_graphical_tools.py
index 6a74a6ca..c6766c70 100644
--- a/src/python/gudhi/persistence_graphical_tools.py
+++ b/src/python/gudhi/persistence_graphical_tools.py
@@ -11,6 +11,7 @@
from os import path
from math import isfinite
import numpy as np
+from functools import lru_cache
from gudhi.reader_utils import read_persistence_intervals_in_dimension
from gudhi.reader_utils import read_persistence_intervals_grouped_by_dimension
@@ -56,6 +57,17 @@ def _array_handler(a):
else:
return a
+@lru_cache(maxsize=1)
+def _matplotlib_can_use_tex():
+ """This function returns True if matplotlib can deal with LaTeX, False otherwise.
+ The returned value is cached.
+ """
+ try:
+ from matplotlib import checkdep_usetex
+ return checkdep_usetex(True)
+ except ImportError:
+ print("This function is not available, you may be missing matplotlib.")
+
def plot_persistence_barcode(
persistence=[],
@@ -105,9 +117,10 @@ def plot_persistence_barcode(
try:
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
- from matplotlib import rc
- plt.rc('text', usetex=True)
- plt.rc('font', family='serif')
+ if _matplotlib_can_use_tex():
+ from matplotlib import rc
+ plt.rc('text', usetex=True)
+ plt.rc('font', family='serif')
if persistence_file != "":
if path.isfile(persistence_file):
@@ -250,9 +263,10 @@ def plot_persistence_diagram(
try:
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
- from matplotlib import rc
- plt.rc('text', usetex=True)
- plt.rc('font', family='serif')
+ if _matplotlib_can_use_tex():
+ from matplotlib import rc
+ plt.rc('text', usetex=True)
+ plt.rc('font', family='serif')
if persistence_file != "":
if path.isfile(persistence_file):
@@ -422,9 +436,10 @@ def plot_persistence_density(
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from scipy.stats import kde
- from matplotlib import rc
- plt.rc('text', usetex=True)
- plt.rc('font', family='serif')
+ if _matplotlib_can_use_tex():
+ from matplotlib import rc
+ plt.rc('text', usetex=True)
+ plt.rc('font', family='serif')
if persistence_file != "":
if dimension is None:
diff --git a/src/python/gudhi/representations/metrics.py b/src/python/gudhi/representations/metrics.py
index cf2e0879..142ddef1 100644
--- a/src/python/gudhi/representations/metrics.py
+++ b/src/python/gudhi/representations/metrics.py
@@ -350,23 +350,30 @@ class PersistenceFisherDistance(BaseEstimator, TransformerMixin):
"""
return _persistence_fisher_distance(diag1, diag2, bandwidth=self.bandwidth, kernel_approx=self.kernel_approx)
+
class WassersteinDistance(BaseEstimator, TransformerMixin):
"""
This is a class for computing the Wasserstein distance matrix from a list of persistence diagrams.
"""
- def __init__(self, order=2, internal_p=2, mode="pot", delta=0.01, n_jobs=None):
+
+ def __init__(self, order=1, internal_p=np.inf, mode="hera", delta=0.01, n_jobs=None):
"""
Constructor for the WassersteinDistance class.
Parameters:
- order (int): exponent for Wasserstein, default value is 2., see :func:`gudhi.wasserstein.wasserstein_distance`.
- internal_p (int): ground metric on the (upper-half) plane (i.e. norm l_p in R^2), default value is 2 (euclidean norm), see :func:`gudhi.wasserstein.wasserstein_distance`.
- mode (str): method for computing Wasserstein distance. Either "pot" or "hera".
+ order (int): exponent for Wasserstein, default value is 1., see :func:`gudhi.wasserstein.wasserstein_distance`.
+ internal_p (int): ground metric on the (upper-half) plane (i.e. norm l_p in R^2), default value is `np.inf`, see :func:`gudhi.wasserstein.wasserstein_distance`.
+ mode (str): method for computing Wasserstein distance. Either "pot" or "hera". Default set to "hera".
delta (float): relative error 1+delta. Used only if mode == "hera".
n_jobs (int): number of jobs to use for the computation. See :func:`pairwise_persistence_diagram_distances` for details.
"""
self.order, self.internal_p, self.mode = order, internal_p, mode
- self.metric = "pot_wasserstein" if mode == "pot" else "hera_wasserstein"
+ if mode == "pot":
+ self.metric = "pot_wasserstein"
+ elif mode == "hera":
+ self.metric = "hera_wasserstein"
+ else:
+ raise NameError("Unknown mode. Current available values for mode are 'hera' and 'pot'")
self.delta = delta
self.n_jobs = n_jobs
diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx
index 9cb24221..20e66d9f 100644
--- a/src/python/gudhi/simplex_tree.pyx
+++ b/src/python/gudhi/simplex_tree.pyx
@@ -225,13 +225,12 @@ cdef class SimplexTree:
preincrement(it)
def get_skeleton(self, dimension):
- """This function returns the (simplices of the) skeleton of a maximum
- given dimension.
+ """This function returns a generator with the (simplices of the) skeleton of a maximum given dimension.
:param dimension: The skeleton dimension value.
:type dimension: int.
:returns: The (simplices of the) skeleton of a maximum dimension.
- :rtype: list of tuples(simplex, filtration)
+ :rtype: generator with tuples(simplex, filtration)
"""
cdef Simplex_tree_skeleton_iterator it = self.get_ptr().get_skeleton_iterator_begin(dimension)
cdef Simplex_tree_skeleton_iterator end = self.get_ptr().get_skeleton_iterator_end(dimension)
diff --git a/src/python/gudhi/wasserstein/wasserstein.py b/src/python/gudhi/wasserstein/wasserstein.py
index 89ecab1c..b37d30bb 100644
--- a/src/python/gudhi/wasserstein/wasserstein.py
+++ b/src/python/gudhi/wasserstein/wasserstein.py
@@ -73,8 +73,8 @@ def _perstot_autodiff(X, order, internal_p):
def _perstot(X, order, internal_p, enable_autodiff):
'''
:param X: (n x 2) numpy.array (points of a given diagram).
- :param order: exponent for Wasserstein. Default value is 2.
- :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); Default value is 2 (Euclidean norm).
+ :param order: exponent for Wasserstein.
+ :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2).
:param enable_autodiff: If X is torch.tensor, tensorflow.Tensor or jax.numpy.ndarray, make the computation
transparent to automatic differentiation.
:type enable_autodiff: bool
@@ -88,7 +88,7 @@ def _perstot(X, order, internal_p, enable_autodiff):
return np.linalg.norm(_dist_to_diag(X, internal_p), ord=order)
-def wasserstein_distance(X, Y, matching=False, order=2., internal_p=2., enable_autodiff=False):
+def wasserstein_distance(X, Y, matching=False, order=1., internal_p=np.inf, enable_autodiff=False):
'''
:param X: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points
(i.e. with infinite coordinate).
@@ -96,9 +96,9 @@ def wasserstein_distance(X, Y, matching=False, order=2., internal_p=2., enable_a
:param matching: if True, computes and returns the optimal matching between X and Y, encoded as
a (n x 2) np.array [...[i,j]...], meaning the i-th point in X is matched to
the j-th point in Y, with the convention (-1) represents the diagonal.
- :param order: exponent for Wasserstein; Default value is 2.
+ :param order: exponent for Wasserstein; Default value is 1.
:param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2);
- Default value is 2 (Euclidean norm).
+ Default value is `np.inf`.
:param enable_autodiff: If X and Y are torch.tensor, tensorflow.Tensor or jax.numpy.ndarray, make the computation
transparent to automatic differentiation. This requires the package EagerPy and is currently incompatible
with `matching=True`.
diff --git a/src/python/include/Alpha_complex_factory.h b/src/python/include/Alpha_complex_factory.h
new file mode 100644
index 00000000..d699ad9b
--- /dev/null
+++ b/src/python/include/Alpha_complex_factory.h
@@ -0,0 +1,139 @@
+/* This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+ * See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+ * Author(s): Vincent Rouvreau
+ *
+ * Copyright (C) 2020 Inria
+ *
+ * Modification(s):
+ * - YYYY/MM Author: Description of the modification
+ */
+
+#ifndef INCLUDE_ALPHA_COMPLEX_FACTORY_H_
+#define INCLUDE_ALPHA_COMPLEX_FACTORY_H_
+
+#include <gudhi/Simplex_tree.h>
+#include <gudhi/Alpha_complex.h>
+#include <gudhi/Alpha_complex_3d.h>
+#include <gudhi/Alpha_complex_options.h>
+#include <CGAL/Epeck_d.h>
+#include <CGAL/Epick_d.h>
+
+#include <boost/range/adaptor/transformed.hpp>
+
+#include "Simplex_tree_interface.h"
+
+#include <iostream>
+#include <vector>
+#include <string>
+#include <memory> // for std::unique_ptr
+
+namespace Gudhi {
+
+namespace alpha_complex {
+
+template <typename CgalPointType>
+std::vector<double> pt_cgal_to_cython(CgalPointType const& point) {
+ std::vector<double> vd;
+ vd.reserve(point.dimension());
+ for (auto coord = point.cartesian_begin(); coord != point.cartesian_end(); coord++)
+ vd.push_back(CGAL::to_double(*coord));
+ return vd;
+}
+
+template <typename CgalPointType>
+static CgalPointType pt_cython_to_cgal(std::vector<double> const& vec) {
+ return CgalPointType(vec.size(), vec.begin(), vec.end());
+}
+
+class Abstract_alpha_complex {
+ public:
+ virtual std::vector<double> get_point(int vh) = 0;
+ virtual bool create_simplex_tree(Simplex_tree_interface<>* simplex_tree, double max_alpha_square,
+ bool default_filtration_value) = 0;
+};
+
+class Exact_Alphacomplex_dD : public Abstract_alpha_complex {
+ private:
+ using Kernel = CGAL::Epeck_d<CGAL::Dynamic_dimension_tag>;
+ using Point = typename Kernel::Point_d;
+
+ public:
+ Exact_Alphacomplex_dD(const std::vector<std::vector<double>>& points, bool exact_version)
+ : exact_version_(exact_version),
+ alpha_complex_(boost::adaptors::transform(points, pt_cython_to_cgal<Point>)) {
+ }
+
+ virtual std::vector<double> get_point(int vh) override {
+ Point const& point = alpha_complex_.get_point(vh);
+ return pt_cgal_to_cython(point);
+ }
+
+ virtual bool create_simplex_tree(Simplex_tree_interface<>* simplex_tree, double max_alpha_square,
+ bool default_filtration_value) override {
+ return alpha_complex_.create_complex(*simplex_tree, max_alpha_square, exact_version_, default_filtration_value);
+ }
+
+ private:
+ bool exact_version_;
+ Alpha_complex<Kernel> alpha_complex_;
+};
+
+class Inexact_Alphacomplex_dD : public Abstract_alpha_complex {
+ private:
+ using Kernel = CGAL::Epick_d<CGAL::Dynamic_dimension_tag>;
+ using Point = typename Kernel::Point_d;
+
+ public:
+ Inexact_Alphacomplex_dD(const std::vector<std::vector<double>>& points, bool exact_version)
+ : exact_version_(exact_version),
+ alpha_complex_(boost::adaptors::transform(points, pt_cython_to_cgal<Point>)) {
+ }
+
+ virtual std::vector<double> get_point(int vh) override {
+ Point const& point = alpha_complex_.get_point(vh);
+ return pt_cgal_to_cython(point);
+ }
+ virtual bool create_simplex_tree(Simplex_tree_interface<>* simplex_tree, double max_alpha_square,
+ bool default_filtration_value) override {
+ return alpha_complex_.create_complex(*simplex_tree, max_alpha_square, exact_version_, default_filtration_value);
+ }
+
+ private:
+ bool exact_version_;
+ Alpha_complex<Kernel> alpha_complex_;
+};
+
+template <complexity Complexity>
+class Alphacomplex_3D : public Abstract_alpha_complex {
+ private:
+ using Point = typename Alpha_complex_3d<Complexity, false, false>::Bare_point_3;
+
+ static Point pt_cython_to_cgal_3(std::vector<double> const& vec) {
+ return Point(vec[0], vec[1], vec[2]);
+ }
+
+ public:
+ Alphacomplex_3D(const std::vector<std::vector<double>>& points)
+ : alpha_complex_(boost::adaptors::transform(points, pt_cython_to_cgal_3)) {
+ }
+
+ virtual std::vector<double> get_point(int vh) override {
+ Point const& point = alpha_complex_.get_point(vh);
+ return pt_cgal_to_cython(point);
+ }
+
+ virtual bool create_simplex_tree(Simplex_tree_interface<>* simplex_tree, double max_alpha_square,
+ bool default_filtration_value) override {
+ return alpha_complex_.create_complex(*simplex_tree, max_alpha_square);
+ }
+
+ private:
+ Alpha_complex_3d<Complexity, false, false> alpha_complex_;
+};
+
+
+} // namespace alpha_complex
+
+} // namespace Gudhi
+
+#endif // INCLUDE_ALPHA_COMPLEX_FACTORY_H_
diff --git a/src/python/include/Alpha_complex_interface.h b/src/python/include/Alpha_complex_interface.h
index 3ac5db1f..23be194d 100644
--- a/src/python/include/Alpha_complex_interface.h
+++ b/src/python/include/Alpha_complex_interface.h
@@ -11,12 +11,8 @@
#ifndef INCLUDE_ALPHA_COMPLEX_INTERFACE_H_
#define INCLUDE_ALPHA_COMPLEX_INTERFACE_H_
-#include <gudhi/Simplex_tree.h>
-#include <gudhi/Alpha_complex.h>
-#include <CGAL/Epeck_d.h>
-#include <CGAL/Epick_d.h>
-
-#include <boost/range/adaptor/transformed.hpp>
+#include "Alpha_complex_factory.h"
+#include <gudhi/Alpha_complex_options.h>
#include "Simplex_tree_interface.h"
@@ -30,67 +26,51 @@ namespace Gudhi {
namespace alpha_complex {
class Alpha_complex_interface {
- private:
- using Exact_kernel = CGAL::Epeck_d<CGAL::Dynamic_dimension_tag>;
- using Inexact_kernel = CGAL::Epick_d<CGAL::Dynamic_dimension_tag>;
- using Point_exact_kernel = typename Exact_kernel::Point_d;
- using Point_inexact_kernel = typename Inexact_kernel::Point_d;
-
- template <typename CgalPointType>
- std::vector<double> pt_cgal_to_cython(CgalPointType& point) {
- std::vector<double> vd;
- for (auto coord = point.cartesian_begin(); coord != point.cartesian_end(); coord++)
- vd.push_back(CGAL::to_double(*coord));
- return vd;
- }
-
- template <typename CgalPointType>
- static CgalPointType pt_cython_to_cgal(std::vector<double> const& vec) {
- return CgalPointType(vec.size(), vec.begin(), vec.end());
- }
-
public:
- Alpha_complex_interface(const std::vector<std::vector<double>>& points, bool fast_version)
- : fast_version_(fast_version) {
- if (fast_version_) {
- ac_inexact_ptr_ = std::make_unique<Alpha_complex<Inexact_kernel>>(
- boost::adaptors::transform(points, pt_cython_to_cgal<Point_inexact_kernel>));
- } else {
- ac_exact_ptr_ = std::make_unique<Alpha_complex<Exact_kernel>>(
- boost::adaptors::transform(points, pt_cython_to_cgal<Point_exact_kernel>));
- }
- }
-
- Alpha_complex_interface(const std::string& off_file_name, bool fast_version, bool from_file = true)
- : fast_version_(fast_version) {
- if (fast_version_)
- ac_inexact_ptr_ = std::make_unique<Alpha_complex<Inexact_kernel>>(off_file_name);
- else
- ac_exact_ptr_ = std::make_unique<Alpha_complex<Exact_kernel>>(off_file_name);
+ Alpha_complex_interface(const std::vector<std::vector<double>>& points, bool fast_version, bool exact_version)
+ : points_(points),
+ fast_version_(fast_version),
+ exact_version_(exact_version) {
}
std::vector<double> get_point(int vh) {
- if (fast_version_) {
- Point_inexact_kernel const& point = ac_inexact_ptr_->get_point(vh);
- return pt_cgal_to_cython(point);
- } else {
- Point_exact_kernel const& point = ac_exact_ptr_->get_point(vh);
- return pt_cgal_to_cython(point);
- }
+ return alpha_ptr_->get_point(vh);
}
- void create_simplex_tree(Simplex_tree_interface<>* simplex_tree, double max_alpha_square, bool exact_version,
+ void create_simplex_tree(Simplex_tree_interface<>* simplex_tree, double max_alpha_square,
bool default_filtration_value) {
- if (fast_version_)
- ac_inexact_ptr_->create_complex(*simplex_tree, max_alpha_square, exact_version, default_filtration_value);
- else
- ac_exact_ptr_->create_complex(*simplex_tree, max_alpha_square, exact_version, default_filtration_value);
+ if (points_.size() > 0) {
+ std::size_t dimension = points_[0].size();
+ if (dimension == 3 && !default_filtration_value) {
+ if (fast_version_)
+ alpha_ptr_ = std::make_unique<Alphacomplex_3D<Gudhi::alpha_complex::complexity::FAST>>(points_);
+ else if (exact_version_)
+ alpha_ptr_ = std::make_unique<Alphacomplex_3D<Gudhi::alpha_complex::complexity::EXACT>>(points_);
+ else
+ alpha_ptr_ = std::make_unique<Alphacomplex_3D<Gudhi::alpha_complex::complexity::SAFE>>(points_);
+ if (!alpha_ptr_->create_simplex_tree(simplex_tree, max_alpha_square, default_filtration_value)) {
+ // create_simplex_tree will fail if all points are on a plane - Retry with dD by setting dimension to 2
+ dimension--;
+ alpha_ptr_.reset();
+ }
+ }
+ // Not ** else ** because we have to take into account if 3d fails
+ if (dimension != 3 || default_filtration_value) {
+ if (fast_version_) {
+ alpha_ptr_ = std::make_unique<Inexact_Alphacomplex_dD>(points_, exact_version_);
+ } else {
+ alpha_ptr_ = std::make_unique<Exact_Alphacomplex_dD>(points_, exact_version_);
+ }
+ alpha_ptr_->create_simplex_tree(simplex_tree, max_alpha_square, default_filtration_value);
+ }
+ }
}
private:
+ std::unique_ptr<Abstract_alpha_complex> alpha_ptr_;
+ std::vector<std::vector<double>> points_;
bool fast_version_;
- std::unique_ptr<Alpha_complex<Exact_kernel>> ac_exact_ptr_;
- std::unique_ptr<Alpha_complex<Inexact_kernel>> ac_inexact_ptr_;
+ bool exact_version_;
};
} // namespace alpha_complex
diff --git a/src/python/include/pybind11_diagram_utils.h b/src/python/include/pybind11_diagram_utils.h
index d9627258..2d5194f4 100644
--- a/src/python/include/pybind11_diagram_utils.h
+++ b/src/python/include/pybind11_diagram_utils.h
@@ -18,8 +18,8 @@ namespace py = pybind11;
typedef py::array_t<double> Dgm;
// Get m[i,0] and m[i,1] as a pair
-static auto pairify(void* p, ssize_t h, ssize_t w) {
- return [=](ssize_t i){
+static auto pairify(void* p, py::ssize_t h, py::ssize_t w) {
+ return [=](py::ssize_t i){
char* birth = (char*)p + i * h;
char* death = birth + w;
return std::make_pair(*(double*)birth, *(double*)death);
@@ -32,8 +32,8 @@ inline auto numpy_to_range_of_pairs(py::array_t<double> dgm) {
if((buf.ndim!=2 || buf.shape[1]!=2) && (buf.ndim!=1 || buf.shape[0]!=0))
throw std::runtime_error("Diagram must be an array of size n x 2");
// In the case of shape (0), avoid reading non-existing strides[1] even if we won't use it.
- ssize_t stride1 = buf.ndim == 2 ? buf.strides[1] : 0;
- auto cnt = boost::counting_range<ssize_t>(0, buf.shape[0]);
+ py::ssize_t stride1 = buf.ndim == 2 ? buf.strides[1] : 0;
+ auto cnt = boost::counting_range<py::ssize_t>(0, buf.shape[0]);
return boost::adaptors::transform(cnt, pairify(buf.ptr, buf.strides[0], stride1));
// Be careful that the returned range cannot contain references to dead temporaries.
}
diff --git a/src/python/introduction.rst b/src/python/introduction.rst
new file mode 100644
index 00000000..11c06ac5
--- /dev/null
+++ b/src/python/introduction.rst
@@ -0,0 +1,24 @@
+The Gudhi library is an open source library for Computational Topology and
+Topological Data Analysis (TDA). It offers state-of-the-art algorithms
+to construct various types of simplicial complexes, data structures to
+represent them, and algorithms to compute geometric approximations of shapes
+and persistent homology.
+
+The GUDHI library offers the following interoperable modules:
+
+* Complexes:
+ * Cubical
+ * Simplicial: Rips, Witness, Alpha and Čech complexes
+ * Cover: Nerve and Graph induced complexes
+* Data structures and basic operations:
+ * Simplex tree, Skeleton blockers and Toplex map
+ * Construction, update, filtration and simplification
+* Topological descriptors computation
+* Manifold reconstruction
+* Topological descriptors tools:
+ * Bottleneck distance
+ * Statistical tools
+ * Persistence diagram and barcode
+
+For more information about Topological Data Analysis and its workflow, please
+refer to the `Wikipedia TDA dedicated page <https://en.wikipedia.org/wiki/Topological_data_analysis>`_.
diff --git a/src/python/setup.py.in b/src/python/setup.py.in
index b9f4e3f0..98d058fc 100644
--- a/src/python/setup.py.in
+++ b/src/python/setup.py.in
@@ -48,10 +48,12 @@ ext_modules = cythonize(ext_modules)
for module in pybind11_modules:
my_include_dirs = include_dirs + [pybind11.get_include(False), pybind11.get_include(True)]
- if module == 'hera':
+ if module == 'hera/wasserstein':
my_include_dirs = ['@HERA_WASSERSTEIN_INCLUDE_DIR@'] + my_include_dirs
+ elif module == 'hera/bottleneck':
+ my_include_dirs = ['@HERA_BOTTLENECK_INCLUDE_DIR@'] + my_include_dirs
ext_modules.append(Extension(
- 'gudhi.' + module,
+ 'gudhi.' + module.replace('/', '.'),
sources = [source_dir + module + '.cc'],
language = 'c++',
include_dirs = my_include_dirs,
@@ -62,14 +64,29 @@ for module in pybind11_modules:
runtime_library_dirs=runtime_library_dirs,
))
+# read the contents of introduction.rst
+with open("introduction.rst", "r") as fh:
+ long_description = fh.read()
+
setup(
name = 'gudhi',
packages=find_packages(), # find_namespace_packages(include=["gudhi*"])
author='GUDHI Editorial Board',
author_email='gudhi-contact@lists.gforge.inria.fr',
version='@GUDHI_VERSION@',
- url='http://gudhi.gforge.inria.fr/',
+ url='https://gudhi.inria.fr/',
+ project_urls={
+ 'Bug Tracker': 'https://github.com/GUDHI/gudhi-devel/issues',
+ 'Documentation': 'https://gudhi.inria.fr/python/latest/',
+ 'Source Code': 'https://github.com/GUDHI/gudhi-devel',
+ 'License': 'https://gudhi.inria.fr/licensing/'
+ },
+ description='The Gudhi library is an open source library for ' \
+ 'Computational Topology and Topological Data Analysis (TDA).',
+ long_description_content_type='text/x-rst',
+ long_description=long_description,
ext_modules = ext_modules,
- install_requires = ['cython','numpy >= 1.9',],
- setup_requires = ['numpy >= 1.9','pybind11',],
+ install_requires = ['numpy >= 1.9',],
+ setup_requires = ['cython','numpy >= 1.9','pybind11',],
+ package_data={"": ["*.dll"], },
)
diff --git a/src/python/test/test_alpha_complex.py b/src/python/test/test_alpha_complex.py
index 943ad2c4..a4ee260b 100755
--- a/src/python/test/test_alpha_complex.py
+++ b/src/python/test/test_alpha_complex.py
@@ -8,7 +8,7 @@
- YYYY/MM Author: Description of the modification
"""
-from gudhi import AlphaComplex, SimplexTree
+import gudhi as gd
import math
import numpy as np
import pytest
@@ -25,17 +25,16 @@ __license__ = "MIT"
def _empty_alpha(precision):
- alpha_complex = AlphaComplex(points=[[0, 0]], precision = precision)
+ alpha_complex = gd.AlphaComplex(points=[[0, 0]], precision = precision)
assert alpha_complex.__is_defined() == True
def test_empty_alpha():
- _empty_alpha('fast')
- _empty_alpha('safe')
- _empty_alpha('exact')
+ for precision in ['fast', 'safe', 'exact']:
+ _empty_alpha(precision)
def _infinite_alpha(precision):
point_list = [[0, 0], [1, 0], [0, 1], [1, 1]]
- alpha_complex = AlphaComplex(points=point_list, precision = precision)
+ alpha_complex = gd.AlphaComplex(points=point_list, precision = precision)
assert alpha_complex.__is_defined() == True
simplex_tree = alpha_complex.create_simplex_tree()
@@ -84,13 +83,12 @@ def _infinite_alpha(precision):
assert False
def test_infinite_alpha():
- _infinite_alpha('fast')
- _infinite_alpha('safe')
- _infinite_alpha('exact')
+ for precision in ['fast', 'safe', 'exact']:
+ _infinite_alpha(precision)
def _filtered_alpha(precision):
point_list = [[0, 0], [1, 0], [0, 1], [1, 1]]
- filtered_alpha = AlphaComplex(points=point_list, precision = precision)
+ filtered_alpha = gd.AlphaComplex(points=point_list, precision = precision)
simplex_tree = filtered_alpha.create_simplex_tree(max_alpha_square=0.25)
@@ -128,9 +126,8 @@ def _filtered_alpha(precision):
assert simplex_tree.get_cofaces([0], 1) == [([0, 1], 0.25), ([0, 2], 0.25)]
def test_filtered_alpha():
- _filtered_alpha('fast')
- _filtered_alpha('safe')
- _filtered_alpha('exact')
+ for precision in ['fast', 'safe', 'exact']:
+ _filtered_alpha(precision)
def _safe_alpha_persistence_comparison(precision):
#generate periodic signal
@@ -144,10 +141,10 @@ def _safe_alpha_persistence_comparison(precision):
embedding2 = [[signal[i], delayed[i]] for i in range(len(time))]
#build alpha complex and simplex tree
- alpha_complex1 = AlphaComplex(points=embedding1, precision = precision)
+ alpha_complex1 = gd.AlphaComplex(points=embedding1, precision = precision)
simplex_tree1 = alpha_complex1.create_simplex_tree()
- alpha_complex2 = AlphaComplex(points=embedding2, precision = precision)
+ alpha_complex2 = gd.AlphaComplex(points=embedding2, precision = precision)
simplex_tree2 = alpha_complex2.create_simplex_tree()
diag1 = simplex_tree1.persistence()
@@ -165,7 +162,7 @@ def test_safe_alpha_persistence_comparison():
def _delaunay_complex(precision):
point_list = [[0, 0], [1, 0], [0, 1], [1, 1]]
- filtered_alpha = AlphaComplex(points=point_list, precision = precision)
+ filtered_alpha = gd.AlphaComplex(points=point_list, precision = precision)
simplex_tree = filtered_alpha.create_simplex_tree(default_filtration_value = True)
@@ -197,6 +194,54 @@ def _delaunay_complex(precision):
assert math.isnan(filtered_value[1])
def test_delaunay_complex():
- _delaunay_complex('fast')
- _delaunay_complex('safe')
- _delaunay_complex('exact')
+ for precision in ['fast', 'safe', 'exact']:
+ _delaunay_complex(precision)
+
+def _3d_points_on_a_plane(precision, default_filtration_value):
+ alpha = gd.AlphaComplex(off_file=gd.__root_source_dir__ + '/data/points/alphacomplexdoc.off',
+ precision = precision)
+
+ simplex_tree = alpha.create_simplex_tree(default_filtration_value = default_filtration_value)
+ assert simplex_tree.dimension() == 2
+ assert simplex_tree.num_vertices() == 7
+ assert simplex_tree.num_simplices() == 25
+
+def test_3d_points_on_a_plane():
+ for default_filtration_value in [True, False]:
+ for precision in ['fast', 'safe', 'exact']:
+ _3d_points_on_a_plane(precision, default_filtration_value)
+
+def _3d_tetrahedrons(precision):
+ points = 10*np.random.rand(10, 3)
+ alpha = gd.AlphaComplex(points=points, precision = precision)
+ st_alpha = alpha.create_simplex_tree(default_filtration_value = False)
+ # New AlphaComplex for get_point to work
+ delaunay = gd.AlphaComplex(points=points, precision = precision)
+ st_delaunay = delaunay.create_simplex_tree(default_filtration_value = True)
+
+ delaunay_tetra = []
+ for sk in st_delaunay.get_skeleton(4):
+ if len(sk[0]) == 4:
+ tetra = [delaunay.get_point(sk[0][0]),
+ delaunay.get_point(sk[0][1]),
+ delaunay.get_point(sk[0][2]),
+ delaunay.get_point(sk[0][3]) ]
+ delaunay_tetra.append(sorted(tetra, key=lambda tup: tup[0]))
+
+ alpha_tetra = []
+ for sk in st_alpha.get_skeleton(4):
+ if len(sk[0]) == 4:
+ tetra = [alpha.get_point(sk[0][0]),
+ alpha.get_point(sk[0][1]),
+ alpha.get_point(sk[0][2]),
+ alpha.get_point(sk[0][3]) ]
+ alpha_tetra.append(sorted(tetra, key=lambda tup: tup[0]))
+
+ # Check the tetrahedrons from one list are in the second one
+ assert len(alpha_tetra) == len(delaunay_tetra)
+ for tetra_from_del in delaunay_tetra:
+ assert tetra_from_del in alpha_tetra
+
+def test_3d_tetrahedrons():
+ for precision in ['fast', 'safe', 'exact']:
+ _3d_tetrahedrons(precision)
diff --git a/src/python/test/test_bottleneck_distance.py b/src/python/test/test_bottleneck_distance.py
index 70b2abad..6915bea8 100755
--- a/src/python/test/test_bottleneck_distance.py
+++ b/src/python/test/test_bottleneck_distance.py
@@ -9,6 +9,8 @@
"""
import gudhi
+import gudhi.hera
+import pytest
__author__ = "Vincent Rouvreau"
__copyright__ = "Copyright (C) 2016 Inria"
@@ -19,5 +21,7 @@ def test_basic_bottleneck():
diag1 = [[2.7, 3.7], [9.6, 14.0], [34.2, 34.974], [3.0, float("Inf")]]
diag2 = [[2.8, 4.45], [9.5, 14.1], [3.2, float("Inf")]]
- assert gudhi.bottleneck_distance(diag1, diag2, 0.1) == 0.8081763781405569
assert gudhi.bottleneck_distance(diag1, diag2) == 0.75
+ assert gudhi.bottleneck_distance(diag1, diag2, 0.1) == pytest.approx(0.75, abs=0.1)
+ assert gudhi.hera.bottleneck_distance(diag1, diag2, 0) == 0.75
+ assert gudhi.hera.bottleneck_distance(diag1, diag2, 0.1) == pytest.approx(0.75, rel=0.1)
diff --git a/src/python/test/test_tomato.py b/src/python/test/test_tomato.py
new file mode 100755
index 00000000..ecab03c4
--- /dev/null
+++ b/src/python/test/test_tomato.py
@@ -0,0 +1,65 @@
+""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+ See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+ Author(s): Marc Glisse
+
+ Copyright (C) 2020 Inria
+
+ Modification(s):
+ - YYYY/MM Author: Description of the modification
+"""
+
+from gudhi.clustering.tomato import Tomato
+import numpy as np
+import pytest
+import matplotlib.pyplot as plt
+
+# Disable graphics for testing purposes
+plt.show = lambda: None
+
+
+def test_tomato_1():
+ a = [(1, 2), (1.1, 1.9), (0.9, 1.8), (10, 0), (10.1, 0.05), (10.2, -0.1), (5.4, 0)]
+ t = Tomato(metric="euclidean", n_clusters=2, k=4, n_jobs=-1, eps=0.05)
+ assert np.array_equal(t.fit_predict(a), [1, 1, 1, 0, 0, 0, 0]) # or with swapped 0 and 1
+ assert np.array_equal(t.children_, [[0, 1]])
+
+ t = Tomato(density_type="KDE", r=1, k=4)
+ t.fit(a)
+ assert np.array_equal(t.leaf_labels_, [1, 1, 1, 0, 0, 0, 0]) # or with swapped 0 and 1
+ assert t.n_clusters_ == 2
+ t.merge_threshold_ = 10
+ assert t.n_clusters_ == 1
+ assert (t.labels_ == 0).all()
+
+ t = Tomato(graph_type="radius", r=0.1, metric="cosine", k=3)
+ assert np.array_equal(t.fit_predict(a), [1, 1, 1, 0, 0, 0, 0]) # or with swapped 0 and 1
+
+ t = Tomato(metric="euclidean", graph_type="radius", r=4.7, k=4)
+ t.fit(a)
+ assert t.max_weight_per_cc_.size == 2
+ assert np.array_equal(t.neighbors_, [[0, 1, 2], [0, 1, 2], [0, 1, 2], [3, 4, 5, 6], [3, 4, 5], [3, 4, 5], [3, 6]])
+ t.plot_diagram()
+
+ t = Tomato(graph_type="radius", r=4.7, k=4, symmetrize_graph=True)
+ t.fit(a)
+ assert t.max_weight_per_cc_.size == 2
+ assert [set(i) for i in t.neighbors_] == [{1, 2}, {0, 2}, {0, 1}, {4, 5, 6}, {3, 5}, {3, 4}, {3}]
+
+ t = Tomato(n_clusters=2, k=4, symmetrize_graph=True)
+ t.fit(a)
+ assert [set(i) for i in t.neighbors_] == [
+ {1, 2, 6},
+ {0, 2, 6},
+ {0, 1, 6},
+ {4, 5, 6},
+ {3, 5, 6},
+ {3, 4, 6},
+ {0, 1, 2, 3, 4, 5},
+ ]
+ t.plot_diagram()
+
+ t = Tomato(k=6, metric="manhattan")
+ t.fit(a)
+ assert t.diagram_.size == 0
+ assert t.max_weight_per_cc_.size == 1
+ t.plot_diagram()