diff options
Diffstat (limited to 'src')
74 files changed, 1710 insertions, 517 deletions
diff --git a/src/Alpha_complex/concept/SimplicialComplexForAlpha.h b/src/Alpha_complex/concept/SimplicialComplexForAlpha.h index 1c6c3b0c..c20c3201 100644 --- a/src/Alpha_complex/concept/SimplicialComplexForAlpha.h +++ b/src/Alpha_complex/concept/SimplicialComplexForAlpha.h @@ -72,6 +72,24 @@ struct SimplicialComplexForAlpha { /** \brief Return type of an insertion of a simplex */ typedef unspecified Insertion_result_type; + + /** \name Map interface + * Conceptually a `std::unordered_map<Simplex_handle,std::size_t>`. + * @{ */ + /** \brief Data stored for each simplex. + * + * Must be an integer type. */ + typedef unspecified Simplex_key; + /** \brief Returns a constant dummy number that is either negative, + * or at least as large as the number of simplices. Suggested value: -1. */ + Simplex_key null_key (); + /** \brief Returns the number stored for a simplex by `assign_key()`. + * + * If `assign_key()` has not been called, it must return `null_key()`. */ + Simplex_key key ( Simplex_handle sh ); + /** \brief Store a number for a simplex, which can later be retrieved with `key()`. */ + void assign_key(Simplex_handle sh, Simplex_key n); + /** @} */ }; } // namespace alpha_complex diff --git a/src/Alpha_complex/include/gudhi/Alpha_complex.h b/src/Alpha_complex/include/gudhi/Alpha_complex.h index 1b5d6997..ba91998d 100644 --- a/src/Alpha_complex/include/gudhi/Alpha_complex.h +++ b/src/Alpha_complex/include/gudhi/Alpha_complex.h @@ -112,9 +112,6 @@ class Alpha_complex { typedef typename Kernel::Side_of_bounded_sphere_d Is_Gabriel; typedef typename Kernel::Point_dimension_d Point_Dimension; - // Type required to compute squared radius, or side of bounded sphere on a vector of points. - typedef typename std::vector<Point_d> Vector_of_CGAL_points; - // Vertex_iterator type from CGAL. typedef typename Delaunay_triangulation::Vertex_iterator CGAL_vertex_iterator; @@ -124,6 +121,9 @@ class Alpha_complex { // Structure to switch from simplex tree vertex handle to CGAL vertex iterator. typedef typename std::vector< CGAL_vertex_iterator > Vector_vertex_iterator; + // Numeric type of coordinates in the kernel + typedef typename Kernel::FT FT; + private: /** \brief Vertex iterator vector to switch from simplex tree vertex handle to CGAL vertex iterator. * Vertex handles are inserted sequentially, starting at 0.*/ @@ -132,6 +132,8 @@ class Alpha_complex { Delaunay_triangulation* triangulation_; /** \brief Kernel for triangulation_ functions access.*/ Kernel kernel_; + /** \brief Cache for geometric constructions: circumcenter and squared radius of a simplex.*/ + std::vector<std::pair<Point_d, FT>> cache_, old_cache_; public: /** \brief Alpha_complex constructor from an OFF file name. @@ -246,6 +248,49 @@ class Alpha_complex { } } + /** \brief get_point_ returns the point corresponding to the vertex given as parameter. + * Only for internal use for faster access. + * + * @param[in] vertex Vertex handle of the point to retrieve. + * @return The point found. + */ + const Point_d& get_point_(std::size_t vertex) const { + return vertex_handle_to_iterator_[vertex]->point(); + } + + /// Return a reference to the circumcenter and circumradius, writing them in the cache if necessary. + template<class SimplicialComplexForAlpha> + auto& get_cache(SimplicialComplexForAlpha& cplx, typename SimplicialComplexForAlpha::Simplex_handle s) { + auto k = cplx.key(s); + if(k==cplx.null_key()){ + k = cache_.size(); + cplx.assign_key(s, k); + // Using a transform_range is slower, currently. + thread_local std::vector<Point_d> v; + v.clear(); + for (auto vertex : cplx.simplex_vertex_range(s)) + v.push_back(get_point_(vertex)); + Point_d c = kernel_.construct_circumcenter_d_object()(v.cbegin(), v.cend()); + FT r = kernel_.squared_distance_d_object()(c, v[0]); + cache_.emplace_back(std::move(c), std::move(r)); + } + return cache_[k]; + } + + /// Return the circumradius, either from the old cache or computed, without writing to the cache. + template<class SimplicialComplexForAlpha> + auto radius(SimplicialComplexForAlpha& cplx, typename SimplicialComplexForAlpha::Simplex_handle s) { + auto k = cplx.key(s); + if(k!=cplx.null_key()) + return old_cache_[k].second; + // Using a transform_range is slower, currently. + thread_local std::vector<Point_d> v; + v.clear(); + for (auto vertex : cplx.simplex_vertex_range(s)) + v.push_back(get_point_(vertex)); + return kernel_.compute_squared_radius_d_object()(v.cbegin(), v.cend()); + } + public: /** \brief Inserts all Delaunay triangulation into the simplicial complex. * It also computes the filtration values accordingly to the \ref createcomplexalgorithm if default_filtration_value @@ -324,52 +369,36 @@ class Alpha_complex { if (!default_filtration_value) { // -------------------------------------------------------------------------------------------- - // Will be re-used many times - Vector_of_CGAL_points pointVector; // ### For i : d -> 0 for (int decr_dim = triangulation_->maximal_dimension(); decr_dim >= 0; decr_dim--) { // ### Foreach Sigma of dim i for (Simplex_handle f_simplex : complex.skeleton_simplex_range(decr_dim)) { int f_simplex_dim = complex.dimension(f_simplex); if (decr_dim == f_simplex_dim) { - pointVector.clear(); - #ifdef DEBUG_TRACES - std::clog << "Sigma of dim " << decr_dim << " is"; - #endif // DEBUG_TRACES - for (auto vertex : complex.simplex_vertex_range(f_simplex)) { - pointVector.push_back(get_point(vertex)); - #ifdef DEBUG_TRACES - std::clog << " " << vertex; - #endif // DEBUG_TRACES - } - #ifdef DEBUG_TRACES - std::clog << std::endl; - #endif // DEBUG_TRACES // ### If filt(Sigma) is NaN : filt(Sigma) = alpha(Sigma) if (std::isnan(complex.filtration(f_simplex))) { Filtration_value alpha_complex_filtration = 0.0; // No need to compute squared_radius on a single point - alpha is 0.0 if (f_simplex_dim > 0) { - // squared_radius function initialization - Squared_Radius squared_radius = kernel_.compute_squared_radius_d_object(); - - CGAL::NT_converter<typename Geom_traits::FT, Filtration_value> cv; - auto sqrad = squared_radius(pointVector.begin(), pointVector.end()); - #if CGAL_VERSION_NR >= 1050000000 + auto const& sqrad = radius(complex, f_simplex); +#if CGAL_VERSION_NR >= 1050000000 if(exact) CGAL::exact(sqrad); - #endif +#endif + CGAL::NT_converter<FT, Filtration_value> cv; alpha_complex_filtration = cv(sqrad); } complex.assign_filtration(f_simplex, alpha_complex_filtration); - #ifdef DEBUG_TRACES +#ifdef DEBUG_TRACES std::clog << "filt(Sigma) is NaN : filt(Sigma) =" << complex.filtration(f_simplex) << std::endl; - #endif // DEBUG_TRACES +#endif // DEBUG_TRACES } // No need to propagate further, unweighted points all have value 0 if (decr_dim > 1) propagate_alpha_filtration(complex, f_simplex); } } + old_cache_ = std::move(cache_); + cache_.clear(); } // -------------------------------------------------------------------------------------------- @@ -388,9 +417,7 @@ class Alpha_complex { void propagate_alpha_filtration(SimplicialComplexForAlpha& complex, Simplex_handle f_simplex) { // From SimplicialComplexForAlpha type required to assign filtration values. typedef typename SimplicialComplexForAlpha::Filtration_value Filtration_value; -#ifdef DEBUG_TRACES typedef typename SimplicialComplexForAlpha::Vertex_handle Vertex_handle; -#endif // DEBUG_TRACES // ### Foreach Tau face of Sigma for (auto f_boundary : complex.boundary_simplex_range(f_simplex)) { @@ -414,33 +441,18 @@ class Alpha_complex { #endif // DEBUG_TRACES // ### Else } else { - // insert the Tau points in a vector for is_gabriel function - Vector_of_CGAL_points pointVector; -#ifdef DEBUG_TRACES - Vertex_handle vertexForGabriel = Vertex_handle(); -#endif // DEBUG_TRACES - for (auto vertex : complex.simplex_vertex_range(f_boundary)) { - pointVector.push_back(get_point(vertex)); - } - // Retrieve the Sigma point that is not part of Tau - parameter for is_gabriel function - Point_d point_for_gabriel; - for (auto vertex : complex.simplex_vertex_range(f_simplex)) { - point_for_gabriel = get_point(vertex); - if (std::find(pointVector.begin(), pointVector.end(), point_for_gabriel) == pointVector.end()) { -#ifdef DEBUG_TRACES - // vertex is not found in Tau - vertexForGabriel = vertex; -#endif // DEBUG_TRACES - // No need to continue loop - break; - } - } - // is_gabriel function initialization - Is_Gabriel is_gabriel = kernel_.side_of_bounded_sphere_d_object(); - bool is_gab = is_gabriel(pointVector.begin(), pointVector.end(), point_for_gabriel) - != CGAL::ON_BOUNDED_SIDE; + // Find which vertex of f_simplex is missing in f_boundary. We could actually write a variant of boundary_simplex_range that gives pairs (f_boundary, vertex). We rely on the fact that simplex_vertex_range is sorted. + auto longlist = complex.simplex_vertex_range(f_simplex); + auto shortlist = complex.simplex_vertex_range(f_boundary); + auto longiter = std::begin(longlist); + auto shortiter = std::begin(shortlist); + auto enditer = std::end(shortlist); + while(shortiter != enditer && *longiter == *shortiter) { ++longiter; ++shortiter; } + Vertex_handle extra = *longiter; + auto const& cache=get_cache(complex, f_boundary); + bool is_gab = kernel_.squared_distance_d_object()(cache.first, get_point_(extra)) >= cache.second; #ifdef DEBUG_TRACES - std::clog << " | Tau is_gabriel(Sigma)=" << is_gab << " - vertexForGabriel=" << vertexForGabriel << std::endl; + std::clog << " | Tau is_gabriel(Sigma)=" << is_gab << " - vertexForGabriel=" << extra << std::endl; #endif // DEBUG_TRACES // ### If Tau is not Gabriel of Sigma if (false == is_gab) { diff --git a/src/Alpha_complex/test/Alpha_complex_unit_test.cpp b/src/Alpha_complex/test/Alpha_complex_unit_test.cpp index da1d8004..4b37e4bd 100644 --- a/src/Alpha_complex/test/Alpha_complex_unit_test.cpp +++ b/src/Alpha_complex/test/Alpha_complex_unit_test.cpp @@ -188,9 +188,6 @@ BOOST_AUTO_TEST_CASE(Alpha_complex_from_points) { // Test after prune_above_filtration bool modified = simplex_tree.prune_above_filtration(0.6); - if (modified) { - simplex_tree.initialize_filtration(); - } BOOST_CHECK(modified); // Another way to check num_simplices diff --git a/src/Alpha_complex/utilities/alpha_complex_3d_persistence.cpp b/src/Alpha_complex/utilities/alpha_complex_3d_persistence.cpp index e93c412e..91899040 100644 --- a/src/Alpha_complex/utilities/alpha_complex_3d_persistence.cpp +++ b/src/Alpha_complex/utilities/alpha_complex_3d_persistence.cpp @@ -222,9 +222,6 @@ int main(int argc, char **argv) { break; } - // Sort the simplices in the order of the filtration - simplex_tree.initialize_filtration(); - std::clog << "Simplex_tree dim: " << simplex_tree.dimension() << std::endl; // Compute the persistence diagram of the complex Persistent_cohomology pcoh(simplex_tree, true); diff --git a/src/Alpha_complex/utilities/alpha_complex_persistence.cpp b/src/Alpha_complex/utilities/alpha_complex_persistence.cpp index be60ff78..7c898dfd 100644 --- a/src/Alpha_complex/utilities/alpha_complex_persistence.cpp +++ b/src/Alpha_complex/utilities/alpha_complex_persistence.cpp @@ -75,9 +75,6 @@ int main(int argc, char **argv) { std::clog << "Simplicial complex is of dimension " << simplex.dimension() << " - " << simplex.num_simplices() << " simplices - " << simplex.num_vertices() << " vertices." << std::endl; - // Sort the simplices in the order of the filtration - simplex.initialize_filtration(); - std::clog << "Simplex_tree dim: " << simplex.dimension() << std::endl; // Compute the persistence diagram of the complex Gudhi::persistent_cohomology::Persistent_cohomology<Simplex_tree, Gudhi::persistent_cohomology::Field_Zp> pcoh( diff --git a/src/Bottleneck_distance/example/alpha_rips_persistence_bottleneck_distance.cpp b/src/Bottleneck_distance/example/alpha_rips_persistence_bottleneck_distance.cpp index 4769eca3..ceb9e226 100644 --- a/src/Bottleneck_distance/example/alpha_rips_persistence_bottleneck_distance.cpp +++ b/src/Bottleneck_distance/example/alpha_rips_persistence_bottleneck_distance.cpp @@ -71,9 +71,6 @@ int main(int argc, char * argv[]) { std::clog << "The Rips complex contains " << rips_stree.num_simplices() << " simplices and has dimension " << rips_stree.dimension() << " \n"; - // Sort the simplices in the order of the filtration - rips_stree.initialize_filtration(); - // Compute the persistence diagram of the complex Persistent_cohomology rips_pcoh(rips_stree); // initializes the coefficient field for homology @@ -92,9 +89,6 @@ int main(int argc, char * argv[]) { std::clog << "The Alpha complex contains " << alpha_stree.num_simplices() << " simplices and has dimension " << alpha_stree.dimension() << " \n"; - // Sort the simplices in the order of the filtration - alpha_stree.initialize_filtration(); - // Compute the persistence diagram of the complex Persistent_cohomology alpha_pcoh(alpha_stree); // initializes the coefficient field for homology diff --git a/src/Persistent_cohomology/example/custom_persistence_sort.cpp b/src/Persistent_cohomology/example/custom_persistence_sort.cpp index 87e9c207..410cd987 100644 --- a/src/Persistent_cohomology/example/custom_persistence_sort.cpp +++ b/src/Persistent_cohomology/example/custom_persistence_sort.cpp @@ -86,9 +86,6 @@ int main(int argc, char **argv) { " - " << simplex.num_simplices() << " simplices - " << simplex.num_vertices() << " vertices." << std::endl; - // Sort the simplices in the order of the filtration - simplex.initialize_filtration(); - std::clog << "Simplex_tree dim: " << simplex.dimension() << std::endl; Persistent_cohomology pcoh(simplex); diff --git a/src/Persistent_cohomology/example/persistence_from_file.cpp b/src/Persistent_cohomology/example/persistence_from_file.cpp index 79108730..38c44514 100644 --- a/src/Persistent_cohomology/example/persistence_from_file.cpp +++ b/src/Persistent_cohomology/example/persistence_from_file.cpp @@ -59,9 +59,6 @@ int main(int argc, char * argv[]) { std::clog << std::endl; }*/ - // Sort the simplices in the order of the filtration - simplex_tree.initialize_filtration(); - // Compute the persistence diagram of the complex Persistent_cohomology< Simplex_tree<>, Field_Zp > pcoh(simplex_tree); // initializes the coefficient field for homology diff --git a/src/Persistent_cohomology/example/plain_homology.cpp b/src/Persistent_cohomology/example/plain_homology.cpp index 4d329020..236b67de 100644 --- a/src/Persistent_cohomology/example/plain_homology.cpp +++ b/src/Persistent_cohomology/example/plain_homology.cpp @@ -59,9 +59,6 @@ int main() { st.insert_simplex_and_subfaces(edge35); st.insert_simplex(vertex4); - // Sort the simplices in the order of the filtration - st.initialize_filtration(); - // Class for homology computation // By default, since the complex has dimension 1, only 0-dimensional homology would be computed. // Here we also want persistent homology to be computed for the maximal dimension in the complex (persistence_dim_max = true) diff --git a/src/Persistent_cohomology/example/rips_multifield_persistence.cpp b/src/Persistent_cohomology/example/rips_multifield_persistence.cpp index e2e2c0a5..2edf5bc4 100644 --- a/src/Persistent_cohomology/example/rips_multifield_persistence.cpp +++ b/src/Persistent_cohomology/example/rips_multifield_persistence.cpp @@ -59,9 +59,6 @@ int main(int argc, char * argv[]) { std::clog << "The complex contains " << simplex_tree.num_simplices() << " simplices \n"; std::clog << " and has dimension " << simplex_tree.dimension() << " \n"; - // Sort the simplices in the order of the filtration - simplex_tree.initialize_filtration(); - // Compute the persistence diagram of the complex Persistent_cohomology pcoh(simplex_tree); // initializes the coefficient field for homology diff --git a/src/Persistent_cohomology/example/rips_persistence_step_by_step.cpp b/src/Persistent_cohomology/example/rips_persistence_step_by_step.cpp index 7da9f15d..a503d983 100644 --- a/src/Persistent_cohomology/example/rips_persistence_step_by_step.cpp +++ b/src/Persistent_cohomology/example/rips_persistence_step_by_step.cpp @@ -76,9 +76,6 @@ int main(int argc, char * argv[]) { std::clog << "The complex contains " << st.num_simplices() << " simplices \n"; std::clog << " and has dimension " << st.dimension() << " \n"; - // Sort the simplices in the order of the filtration - st.initialize_filtration(); - // Compute the persistence diagram of the complex Persistent_cohomology pcoh(st); // initializes the coefficient field for homology diff --git a/src/Persistent_cohomology/include/gudhi/Persistent_cohomology.h b/src/Persistent_cohomology/include/gudhi/Persistent_cohomology.h index ca4bc10d..d34ee07d 100644 --- a/src/Persistent_cohomology/include/gudhi/Persistent_cohomology.h +++ b/src/Persistent_cohomology/include/gudhi/Persistent_cohomology.h @@ -561,7 +561,6 @@ class Persistent_cohomology { void output_diagram(std::ostream& ostream = std::cout) { cmp_intervals_by_length cmp(cpx_); std::sort(std::begin(persistent_pairs_), std::end(persistent_pairs_), cmp); - bool has_infinity = std::numeric_limits<Filtration_value>::has_infinity; for (auto pair : persistent_pairs_) { ostream << get<2>(pair) << " " << cpx_->dimension(get<0>(pair)) << " " << cpx_->filtration(get<0>(pair)) << " " @@ -571,9 +570,9 @@ class Persistent_cohomology { void write_output_diagram(std::string diagram_name) { std::ofstream diagram_out(diagram_name.c_str()); + diagram_out.exceptions(diagram_out.failbit); cmp_intervals_by_length cmp(cpx_); std::sort(std::begin(persistent_pairs_), std::end(persistent_pairs_), cmp); - bool has_infinity = std::numeric_limits<Filtration_value>::has_infinity; for (auto pair : persistent_pairs_) { diagram_out << cpx_->dimension(get<0>(pair)) << " " << cpx_->filtration(get<0>(pair)) << " " diff --git a/src/Rips_complex/utilities/rips_correlation_matrix_persistence.cpp b/src/Rips_complex/utilities/rips_correlation_matrix_persistence.cpp index 67f921a6..b473738e 100644 --- a/src/Rips_complex/utilities/rips_correlation_matrix_persistence.cpp +++ b/src/Rips_complex/utilities/rips_correlation_matrix_persistence.cpp @@ -71,9 +71,6 @@ int main(int argc, char* argv[]) { std::clog << "The complex contains " << simplex_tree.num_simplices() << " simplices \n"; std::clog << " and has dimension " << simplex_tree.dimension() << " \n"; - // Sort the simplices in the order of the filtration - simplex_tree.initialize_filtration(); - // Compute the persistence diagram of the complex Persistent_cohomology pcoh(simplex_tree); // initializes the coefficient field for homology diff --git a/src/Rips_complex/utilities/rips_distance_matrix_persistence.cpp b/src/Rips_complex/utilities/rips_distance_matrix_persistence.cpp index 4ad19675..6306755d 100644 --- a/src/Rips_complex/utilities/rips_distance_matrix_persistence.cpp +++ b/src/Rips_complex/utilities/rips_distance_matrix_persistence.cpp @@ -50,9 +50,6 @@ int main(int argc, char* argv[]) { std::clog << "The complex contains " << simplex_tree.num_simplices() << " simplices \n"; std::clog << " and has dimension " << simplex_tree.dimension() << " \n"; - // Sort the simplices in the order of the filtration - simplex_tree.initialize_filtration(); - // Compute the persistence diagram of the complex Persistent_cohomology pcoh(simplex_tree); // initializes the coefficient field for homology diff --git a/src/Rips_complex/utilities/rips_persistence.cpp b/src/Rips_complex/utilities/rips_persistence.cpp index 4cc63d3c..9d7490b3 100644 --- a/src/Rips_complex/utilities/rips_persistence.cpp +++ b/src/Rips_complex/utilities/rips_persistence.cpp @@ -52,9 +52,6 @@ int main(int argc, char* argv[]) { std::clog << "The complex contains " << simplex_tree.num_simplices() << " simplices \n"; std::clog << " and has dimension " << simplex_tree.dimension() << " \n"; - // Sort the simplices in the order of the filtration - simplex_tree.initialize_filtration(); - // Compute the persistence diagram of the complex Persistent_cohomology pcoh(simplex_tree); // initializes the coefficient field for homology diff --git a/src/Rips_complex/utilities/sparse_rips_persistence.cpp b/src/Rips_complex/utilities/sparse_rips_persistence.cpp index 40606158..ac935b41 100644 --- a/src/Rips_complex/utilities/sparse_rips_persistence.cpp +++ b/src/Rips_complex/utilities/sparse_rips_persistence.cpp @@ -54,9 +54,6 @@ int main(int argc, char* argv[]) { std::clog << "The complex contains " << simplex_tree.num_simplices() << " simplices \n"; std::clog << " and has dimension " << simplex_tree.dimension() << " \n"; - // Sort the simplices in the order of the filtration - simplex_tree.initialize_filtration(); - // Compute the persistence diagram of the complex Persistent_cohomology pcoh(simplex_tree); // initializes the coefficient field for homology diff --git a/src/Simplex_tree/include/gudhi/Simplex_tree.h b/src/Simplex_tree/include/gudhi/Simplex_tree.h index 430d1ac4..889dbd00 100644 --- a/src/Simplex_tree/include/gudhi/Simplex_tree.h +++ b/src/Simplex_tree/include/gudhi/Simplex_tree.h @@ -42,6 +42,20 @@ namespace Gudhi { +/** + * \class Extended_simplex_type Simplex_tree.h gudhi/Simplex_tree.h + * \brief Extended simplex type data structure for representing the type of simplices in an extended filtration. + * + * \details The extended simplex type can be either UP (which means + * that the simplex was present originally, and is thus part of the ascending extended filtration), DOWN (which means + * that the simplex is the cone of an original simplex, and is thus part of the descending extended filtration) or + * EXTRA (which means the simplex is the cone point). + * + * Details may be found in \cite Cohen-Steiner2009 and section 2.2 in \cite Carriere16. + * + */ +enum class Extended_simplex_type {UP, DOWN, EXTRA}; + struct Simplex_tree_options_full_featured; /** @@ -87,6 +101,8 @@ class Simplex_tree { /* \brief Set of nodes sharing a same parent in the simplex tree. */ typedef Simplex_tree_siblings<Simplex_tree, Dictionary> Siblings; + + struct Key_simplex_base_real { Key_simplex_base_real() : key_(-1) {} void assign_key(Simplex_key k) { key_ = k; } @@ -100,6 +116,12 @@ class Simplex_tree { void assign_key(Simplex_key); Simplex_key key() const; }; + struct Extended_filtration_data { + Filtration_value minval; + Filtration_value maxval; + Extended_filtration_data(){} + Extended_filtration_data(Filtration_value vmin, Filtration_value vmax): minval(vmin), maxval(vmax) {} + }; typedef typename std::conditional<Options::store_key, Key_simplex_base_real, Key_simplex_base_dummy>::type Key_simplex_base; @@ -120,7 +142,10 @@ class Simplex_tree { public: /** \brief Handle type to a simplex contained in the simplicial complex represented - * by the simplex tree. */ + * by the simplex tree. + * + * They are essentially pointers into internal vectors, and any insertion or removal + * of a simplex may invalidate any other Simplex_handle in the complex. */ typedef typename Dictionary::iterator Simplex_handle; private: @@ -233,11 +258,9 @@ class Simplex_tree { * * The filtration must be valid. If the filtration has not been initialized yet, the * method initializes it (i.e. order the simplices). If the complex has changed since the last time the filtration - * was initialized, please call `initialize_filtration()` to recompute it. */ + * was initialized, please call `clear_filtration()` or `initialize_filtration()` to recompute it. */ Filtration_simplex_range const& filtration_simplex_range(Indexing_tag = Indexing_tag()) { - if (filtration_vect_.empty()) { - initialize_filtration(); - } + maybe_initialize_filtration(); return filtration_vect_; } @@ -463,7 +486,7 @@ class Simplex_tree { public: /** \brief Returns the key associated to a simplex. * - * The filtration must be initialized. + * If no key has been assigned, returns `null_key()`. * \pre SimplexTreeOptions::store_key */ static Simplex_key key(Simplex_handle sh) { @@ -473,7 +496,6 @@ class Simplex_tree { /** \brief Returns the simplex that has index idx in the filtration. * * The filtration must be initialized. - * \pre SimplexTreeOptions::store_key */ Simplex_handle simplex(Simplex_key idx) const { return filtration_vect_[idx]; @@ -509,8 +531,7 @@ class Simplex_tree { return Dictionary_it(nullptr); } - /** \brief Returns a key different for all keys associated to the - * simplices of the simplicial complex. */ + /** \brief Returns a fixed number not in the interval [0, `num_simplices()`). */ static Simplex_key null_key() { return -1; } @@ -855,15 +876,13 @@ class Simplex_tree { } public: - /** \brief Initializes the filtrations, i.e. sort the - * simplices according to their order in the filtration and initializes all Simplex_keys. + /** \brief Initializes the filtration cache, i.e. sorts the + * simplices according to their order in the filtration. * - * After calling this method, filtration_simplex_range() becomes valid, and each simplex is - * assigned a Simplex_key corresponding to its order in the filtration (from 0 to m-1 for a - * simplicial complex with m simplices). + * It always recomputes the cache, even if one already exists. * - * Will be automatically called when calling filtration_simplex_range() - * if the filtration has never been initialized yet. */ + * Any insertion, deletion or change of filtration value invalidates this cache, + * which can be cleared with clear_filtration(). */ void initialize_filtration() { filtration_vect_.clear(); filtration_vect_.reserve(num_simplices()); @@ -885,6 +904,21 @@ class Simplex_tree { std::stable_sort(filtration_vect_.begin(), filtration_vect_.end(), is_before_in_filtration(this)); #endif } + /** \brief Initializes the filtration cache if it isn't initialized yet. + * + * Automatically called by filtration_simplex_range(). */ + void maybe_initialize_filtration() { + if (filtration_vect_.empty()) { + initialize_filtration(); + } + } + /** \brief Clears the filtration cache produced by initialize_filtration(). + * + * Useful when initialize_filtration() has already been called and we perform an operation + * (say an insertion) that invalidates the cache. */ + void clear_filtration() { + filtration_vect_.clear(); + } private: /** Recursive search of cofaces @@ -1106,6 +1140,7 @@ class Simplex_tree { * 1 when calling the method. */ void expansion(int max_dim) { if (max_dim <= 1) return; + clear_filtration(); // Drop the cache. dimension_ = max_dim; for (Dictionary_it root_it = root_.members_.begin(); root_it != root_.members_.end(); ++root_it) { @@ -1316,9 +1351,6 @@ class Simplex_tree { /** \brief This function ensures that each simplex has a higher filtration value than its faces by increasing the * filtration values. * @return True if any filtration value was modified, false if the filtration was already non-decreasing. - * \post Some simplex tree functions require the filtration to be valid. `make_filtration_non_decreasing()` - * function is not launching `initialize_filtration()` but returns the filtration modification information. If the - * complex has changed , please call `initialize_filtration()` to recompute it. * * If a simplex has a `NaN` filtration value, it is considered lower than any other defined filtration value. */ @@ -1330,6 +1362,8 @@ class Simplex_tree { modified |= rec_make_filtration_non_decreasing(simplex.second.children()); } } + if(modified) + clear_filtration(); // Drop the cache. return modified; } @@ -1369,16 +1403,16 @@ class Simplex_tree { public: /** \brief Prune above filtration value given as parameter. * @param[in] filtration Maximum threshold value. - * @return The filtration modification information. - * \post Some simplex tree functions require the filtration to be valid. `prune_above_filtration()` - * function is not launching `initialize_filtration()` but returns the filtration modification information. If the - * complex has changed , please call `initialize_filtration()` to recompute it. + * @return True if any simplex was removed, false if all simplices already had a value below the threshold. * \post Note that the dimension of the simplicial complex may be lower after calling `prune_above_filtration()` * than it was before. However, `upper_bound_dimension()` will return the old value, which remains a valid upper * bound. If you care, you can call `dimension()` to recompute the exact dimension. */ bool prune_above_filtration(Filtration_value filtration) { - return rec_prune_above_filtration(root(), filtration); + bool modified = rec_prune_above_filtration(root(), filtration); + if(modified) + clear_filtration(); // Drop the cache. + return modified; } private: @@ -1445,7 +1479,6 @@ class Simplex_tree { * @param[in] sh Simplex handle on the maximal simplex to remove. * \pre Please check the simplex has no coface before removing it. * \exception std::invalid_argument In debug mode, if sh has children. - * \post Be aware that removing is shifting data in a flat_map (initialize_filtration to be done). * \post Note that the dimension of the simplicial complex may be lower after calling `remove_maximal_simplex()` * than it was before. However, `upper_bound_dimension()` will return the old value, which remains a valid upper * bound. If you care, you can call `dimension()` to recompute the exact dimension. @@ -1471,6 +1504,109 @@ class Simplex_tree { } } + /** \brief Retrieve the original filtration value for a given simplex in the Simplex_tree. Since the + * computation of extended persistence requires modifying the filtration values, this function can be used + * to recover the original values. Moreover, computing extended persistence requires adding new simplices + * in the Simplex_tree. Hence, this function also outputs the type of each simplex. It can be either UP (which means + * that the simplex was present originally, and is thus part of the ascending extended filtration), DOWN (which means + * that the simplex is the cone of an original simplex, and is thus part of the descending extended filtration) or + * EXTRA (which means the simplex is the cone point). See the definition of Extended_simplex_type. Note that if the simplex type is DOWN, the original filtration value + * is set to be the original filtration value of the corresponding (not coned) original simplex. + * \pre This function should be called only if `extend_filtration()` has been called first! + * \post The output filtration value is supposed to be the same, but might be a little different, than the + * original filtration value, due to the internal transformation (scaling to [-2,-1]) that is + * performed on the original filtration values during the computation of extended persistence. + * @param[in] f Filtration value of the simplex in the extended (i.e., modified) filtration. + * @param[in] efd Structure containing the minimum and maximum values of the original filtration. This the output of `extend_filtration()`. + * @return A pair containing the original filtration value of the simplex as well as the simplex type. + */ + std::pair<Filtration_value, Extended_simplex_type> decode_extended_filtration(Filtration_value f, const Extended_filtration_data& efd){ + std::pair<Filtration_value, Extended_simplex_type> p; + Filtration_value minval = efd.minval; + Filtration_value maxval = efd.maxval; + if (f >= -2 && f <= -1){ + p.first = minval + (maxval-minval)*(f + 2); p.second = Extended_simplex_type::UP; + } + else if (f >= 1 && f <= 2){ + p.first = minval - (maxval-minval)*(f - 2); p.second = Extended_simplex_type::DOWN; + } + else{ + p.first = std::numeric_limits<Filtration_value>::quiet_NaN(); p.second = Extended_simplex_type::EXTRA; + } + return p; + }; + + /** \brief Extend filtration for computing extended persistence. + * This function only uses the filtration values at the 0-dimensional simplices, + * and computes the extended persistence diagram induced by the lower-star filtration + * computed with these values. + * \post Note that after calling this function, the filtration + * values are actually modified. The function `decode_extended_filtration()` + * retrieves the original values and outputs the extended simplex type. + * \pre Note that this code creates an extra vertex internally, so you should make sure that + * the Simplex tree does not contain a vertex with the largest Vertex_handle. + * @return A data structure containing the maximum and minimum values of the original filtration. + * It is meant to be provided as input to `decode_extended_filtration()` in order to retrieve + * the original filtration values for each simplex. + */ + Extended_filtration_data extend_filtration() { + clear_filtration(); // Drop the cache. + + // Compute maximum and minimum of filtration values + Vertex_handle maxvert = std::numeric_limits<Vertex_handle>::min(); + Filtration_value minval = std::numeric_limits<Filtration_value>::infinity(); + Filtration_value maxval = -std::numeric_limits<Filtration_value>::infinity(); + for (auto sh = root_.members().begin(); sh != root_.members().end(); ++sh){ + Filtration_value f = this->filtration(sh); + minval = std::min(minval, f); + maxval = std::max(maxval, f); + maxvert = std::max(sh->first, maxvert); + } + + GUDHI_CHECK(maxvert < std::numeric_limits<Vertex_handle>::max(), std::invalid_argument("Simplex_tree contains a vertex with the largest Vertex_handle")); + maxvert += 1; + + Simplex_tree st_copy = *this; + + // Add point for coning the simplicial complex + this->insert_simplex({maxvert}, -3); + + // For each simplex + std::vector<Vertex_handle> vr; + for (auto sh_copy : st_copy.complex_simplex_range()){ + + // Locate simplex + vr.clear(); + for (auto vh : st_copy.simplex_vertex_range(sh_copy)){ + vr.push_back(vh); + } + auto sh = this->find(vr); + + // Create cone on simplex + vr.push_back(maxvert); + if (this->dimension(sh) == 0){ + Filtration_value v = this->filtration(sh); + Filtration_value scaled_v = (v-minval)/(maxval-minval); + // Assign ascending value between -2 and -1 to vertex + this->assign_filtration(sh, -2 + scaled_v); + // Assign descending value between 1 and 2 to cone on vertex + this->insert_simplex(vr, 2 - scaled_v); + } + else{ + // Assign value -3 to simplex and cone on simplex + this->assign_filtration(sh, -3); + this->insert_simplex(vr, -3); + } + } + + // Automatically assign good values for simplices + this->make_filtration_non_decreasing(); + + // Return the filtration data + Extended_filtration_data efd(minval, maxval); + return efd; + } + /** \brief Returns a vertex of `sh` that has the same filtration value as `sh` if it exists, and `null_vertex()` otherwise. * * For a lower-star filtration built with `make_filtration_non_decreasing()`, this is a way to invert the process and find out which vertex had its filtration value propagated to `sh`. diff --git a/src/cmake/modules/GUDHI_third_party_libraries.cmake b/src/cmake/modules/GUDHI_third_party_libraries.cmake index 2d010483..0abe66b7 100644 --- a/src/cmake/modules/GUDHI_third_party_libraries.cmake +++ b/src/cmake/modules/GUDHI_third_party_libraries.cmake @@ -150,6 +150,25 @@ function( find_python_module PYTHON_MODULE_NAME ) endif() endfunction( find_python_module ) +# For modules that do not define module.__version__ +function( find_python_module_no_version PYTHON_MODULE_NAME ) + string(TOUPPER ${PYTHON_MODULE_NAME} PYTHON_MODULE_NAME_UP) + execute_process( + COMMAND ${PYTHON_EXECUTABLE} -c "import ${PYTHON_MODULE_NAME}" + RESULT_VARIABLE PYTHON_MODULE_RESULT + ERROR_VARIABLE PYTHON_MODULE_ERROR) + if(PYTHON_MODULE_RESULT EQUAL 0) + # Remove carriage return + message ("++ Python module ${PYTHON_MODULE_NAME} found") + set(${PYTHON_MODULE_NAME_UP}_FOUND TRUE PARENT_SCOPE) + else() + message ("PYTHON_MODULE_NAME = ${PYTHON_MODULE_NAME} + - PYTHON_MODULE_RESULT = ${PYTHON_MODULE_RESULT} + - PYTHON_MODULE_ERROR = ${PYTHON_MODULE_ERROR}") + set(${PYTHON_MODULE_NAME_UP}_FOUND FALSE PARENT_SCOPE) + endif() +endfunction( find_python_module_no_version ) + if( PYTHONINTERP_FOUND ) find_python_module("cython") find_python_module("pytest") @@ -160,6 +179,10 @@ if( PYTHONINTERP_FOUND ) find_python_module("sklearn") find_python_module("ot") find_python_module("pybind11") + find_python_module("torch") + find_python_module("pykeops") + find_python_module("eagerpy") + find_python_module_no_version("hnswlib") endif() if(NOT GUDHI_PYTHON_PATH) diff --git a/src/cmake/modules/GUDHI_user_version_target.cmake b/src/cmake/modules/GUDHI_user_version_target.cmake index 257d1939..9cf648e3 100644 --- a/src/cmake/modules/GUDHI_user_version_target.cmake +++ b/src/cmake/modules/GUDHI_user_version_target.cmake @@ -26,8 +26,12 @@ add_custom_command(TARGET user_version PRE_BUILD COMMAND ${CMAKE_COMMAND} -E # Generate bib files for Doxygen - cf. root CMakeLists.txt for explanation string(TIMESTAMP GUDHI_VERSION_YEAR "%Y") configure_file(${CMAKE_SOURCE_DIR}/biblio/how_to_cite_gudhi.bib.in "${CMAKE_CURRENT_BINARY_DIR}/biblio/how_to_cite_gudhi.bib" @ONLY) -file(COPY "${CMAKE_SOURCE_DIR}/biblio/how_to_cite_cgal.bib" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/biblio/") file(COPY "${CMAKE_SOURCE_DIR}/biblio/bibliography.bib" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/biblio/") + +# append cgal citation inside bibliography - sphinx cannot deal with more than one bib file +file(READ "${CMAKE_SOURCE_DIR}/biblio/how_to_cite_cgal.bib" CGAL_CITATION_CONTENT) +file(APPEND "${CMAKE_CURRENT_BINARY_DIR}/biblio/bibliography.bib" "${CGAL_CITATION_CONTENT}") + # Copy biblio directory for user version add_custom_command(TARGET user_version PRE_BUILD COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_BINARY_DIR}/biblio ${GUDHI_USER_VERSION_DIR}/biblio) diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index fb219884..4771cef9 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -78,6 +78,19 @@ if(PYTHONINTERP_FOUND) if(OT_FOUND) add_gudhi_debug_info("POT version ${OT_VERSION}") endif() + if(HNSWLIB_FOUND) + # Does not have a version number... + add_gudhi_debug_info("HNSWlib found") + endif() + if(TORCH_FOUND) + add_gudhi_debug_info("PyTorch version ${TORCH_VERSION}") + endif() + if(PYKEOPS_FOUND) + add_gudhi_debug_info("PyKeOps version ${PYKEOPS_VERSION}") + endif() + if(EAGERPY_FOUND) + add_gudhi_debug_info("EagerPy version ${EAGERPY_VERSION}") + endif() set(GUDHI_PYTHON_EXTRA_COMPILE_ARGS "${GUDHI_PYTHON_EXTRA_COMPILE_ARGS}'-DBOOST_RESULT_OF_USE_DECLTYPE', ") set(GUDHI_PYTHON_EXTRA_COMPILE_ARGS "${GUDHI_PYTHON_EXTRA_COMPILE_ARGS}'-DBOOST_ALL_NO_LIB', ") @@ -216,7 +229,7 @@ if(PYTHONINTERP_FOUND) # Other .py files file(COPY "gudhi/persistence_graphical_tools.py" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi") file(COPY "gudhi/representations" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi/") - file(COPY "gudhi/wasserstein.py" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi") + file(COPY "gudhi/wasserstein" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi") file(COPY "gudhi/point_cloud" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi") add_custom_command( @@ -229,6 +242,71 @@ if(PYTHONINTERP_FOUND) install(CODE "execute_process(COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/setup.py install)") + # Documentation generation is available through sphinx - requires all modules + # Make it first as sphinx test is by far the longest test which is nice when testing in parallel + if(SPHINX_PATH) + if(MATPLOTLIB_FOUND) + if(NUMPY_FOUND) + if(SCIPY_FOUND) + if(SKLEARN_FOUND) + if(OT_FOUND) + if(PYBIND11_FOUND) + if(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) + set (GUDHI_SPHINX_MESSAGE "Generating API documentation with Sphinx in ${CMAKE_CURRENT_BINARY_DIR}/sphinx/") + # User warning - Sphinx is a static pages generator, and configured to work fine with user_version + # Images and biblio warnings because not found on developper version + if (GUDHI_PYTHON_PATH STREQUAL "src/python") + set (GUDHI_SPHINX_MESSAGE "${GUDHI_SPHINX_MESSAGE} \n WARNING : Sphinx is configured for user version, you run it on developper version. Images and biblio will miss") + endif() + # sphinx target requires gudhi.so, because conf.py reads gudhi version from it + add_custom_target(sphinx + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/doc + COMMAND ${CMAKE_COMMAND} -E env "PYTHONPATH=${CMAKE_CURRENT_BINARY_DIR}" + ${SPHINX_PATH} -b html ${CMAKE_CURRENT_SOURCE_DIR}/doc ${CMAKE_CURRENT_BINARY_DIR}/sphinx + DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/gudhi.so" + COMMENT "${GUDHI_SPHINX_MESSAGE}" VERBATIM) + + add_test(NAME sphinx_py_test + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} -E env "PYTHONPATH=${CMAKE_CURRENT_BINARY_DIR}" + ${SPHINX_PATH} -b doctest ${CMAKE_CURRENT_SOURCE_DIR}/doc ${CMAKE_CURRENT_BINARY_DIR}/doctest) + + # Set missing or not modules + set(GUDHI_MODULES ${GUDHI_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MODULES") + else(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) + message("++ Python documentation module will not be compiled because it requires a Eigen3 and CGAL version >= 4.11.0") + set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") + endif(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) + else(PYBIND11_FOUND) + message("++ Python documentation module will not be compiled because pybind11 was not found") + set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") + endif(PYBIND11_FOUND) + else(OT_FOUND) + message("++ Python documentation module will not be compiled because POT was not found") + set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") + endif(OT_FOUND) + else(SKLEARN_FOUND) + message("++ Python documentation module will not be compiled because scikit-learn was not found") + set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") + endif(SKLEARN_FOUND) + else(SCIPY_FOUND) + message("++ Python documentation module will not be compiled because scipy was not found") + set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") + endif(SCIPY_FOUND) + else(NUMPY_FOUND) + message("++ Python documentation module will not be compiled because numpy was not found") + set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") + endif(NUMPY_FOUND) + else(MATPLOTLIB_FOUND) + message("++ Python documentation module will not be compiled because matplotlib was not found") + set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") + endif(MATPLOTLIB_FOUND) + else(SPHINX_PATH) + message("++ Python documentation module will not be compiled because sphinx and sphinxcontrib-bibtex were not found") + set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") + endif(SPHINX_PATH) + + # Test examples if (NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) # Bottleneck and Alpha @@ -390,6 +468,7 @@ if(PYTHONINTERP_FOUND) # Wasserstein if(OT_FOUND AND PYBIND11_FOUND) add_gudhi_py_test(test_wasserstein_distance) + add_gudhi_py_test(test_wasserstein_barycenter) endif() # Representations @@ -400,69 +479,11 @@ if(PYTHONINTERP_FOUND) # Time Delay add_gudhi_py_test(test_time_delay) - # Documentation generation is available through sphinx - requires all modules - if(SPHINX_PATH) - if(MATPLOTLIB_FOUND) - if(NUMPY_FOUND) - if(SCIPY_FOUND) - if(SKLEARN_FOUND) - if(OT_FOUND) - if(PYBIND11_FOUND) - if(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) - set (GUDHI_SPHINX_MESSAGE "Generating API documentation with Sphinx in ${CMAKE_CURRENT_BINARY_DIR}/sphinx/") - # User warning - Sphinx is a static pages generator, and configured to work fine with user_version - # Images and biblio warnings because not found on developper version - if (GUDHI_PYTHON_PATH STREQUAL "src/python") - set (GUDHI_SPHINX_MESSAGE "${GUDHI_SPHINX_MESSAGE} \n WARNING : Sphinx is configured for user version, you run it on developper version. Images and biblio will miss") - endif() - # sphinx target requires gudhi.so, because conf.py reads gudhi version from it - add_custom_target(sphinx - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/doc - COMMAND ${CMAKE_COMMAND} -E env "PYTHONPATH=${CMAKE_CURRENT_BINARY_DIR}" - ${SPHINX_PATH} -b html ${CMAKE_CURRENT_SOURCE_DIR}/doc ${CMAKE_CURRENT_BINARY_DIR}/sphinx - DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/gudhi.so" - COMMENT "${GUDHI_SPHINX_MESSAGE}" VERBATIM) - - add_test(NAME sphinx_py_test - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - COMMAND ${CMAKE_COMMAND} -E env "PYTHONPATH=${CMAKE_CURRENT_BINARY_DIR}" - ${SPHINX_PATH} -b doctest ${CMAKE_CURRENT_SOURCE_DIR}/doc ${CMAKE_CURRENT_BINARY_DIR}/doctest) - - # Set missing or not modules - set(GUDHI_MODULES ${GUDHI_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MODULES") - else(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) - message("++ Python documentation module will not be compiled because it requires a Eigen3 and CGAL version >= 4.11.0") - set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") - endif(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) - else(PYBIND11_FOUND) - message("++ Python documentation module will not be compiled because pybind11 was not found") - set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") - endif(PYBIND11_FOUND) - else(OT_FOUND) - message("++ Python documentation module will not be compiled because POT was not found") - set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") - endif(OT_FOUND) - else(SKLEARN_FOUND) - message("++ Python documentation module will not be compiled because scikit-learn was not found") - set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") - endif(SKLEARN_FOUND) - else(SCIPY_FOUND) - message("++ Python documentation module will not be compiled because scipy was not found") - set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") - endif(SCIPY_FOUND) - else(NUMPY_FOUND) - message("++ Python documentation module will not be compiled because numpy was not found") - set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") - endif(NUMPY_FOUND) - else(MATPLOTLIB_FOUND) - message("++ Python documentation module will not be compiled because matplotlib was not found") - set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") - endif(MATPLOTLIB_FOUND) - else(SPHINX_PATH) - message("++ Python documentation module will not be compiled because sphinx and sphinxcontrib-bibtex were not found") - set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") - endif(SPHINX_PATH) - + # DTM + if(SCIPY_FOUND AND SKLEARN_FOUND AND TORCH_FOUND AND HNSWLIB_FOUND AND PYKEOPS_FOUND AND EAGERPY_FOUND) + add_gudhi_py_test(test_knn) + add_gudhi_py_test(test_dtm) + endif() # Set missing or not modules set(GUDHI_MODULES ${GUDHI_MODULES} "python" CACHE INTERNAL "GUDHI_MODULES") diff --git a/src/python/doc/alpha_complex_sum.inc b/src/python/doc/alpha_complex_sum.inc index b5af0d27..9e6414d0 100644 --- a/src/python/doc/alpha_complex_sum.inc +++ b/src/python/doc/alpha_complex_sum.inc @@ -1,12 +1,12 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +----------------------------------------------------------------+------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------+ | .. figure:: | Alpha complex is a simplicial complex constructed from the finite | :Author: Vincent Rouvreau | | ../../doc/Alpha_complex/alpha_complex_representation.png | cells of a Delaunay Triangulation. | | - | :alt: Alpha complex representation | | :Introduced in: GUDHI 2.0.0 | + | :alt: Alpha complex representation | | :Since: GUDHI 2.0.0 | | :figclass: align-center | The filtration value of each simplex is computed as the **square** of | | - | | the circumradius of the simplex if the circumsphere is empty (the | :Copyright: MIT (`GPL v3 </licensing/>`_) | + | | the circumradius of the simplex if the circumsphere is empty (the | :License: MIT (`GPL v3 </licensing/>`_) | | | simplex is then said to be Gabriel), and as the minimum of the | | | | filtration values of the codimension 1 cofaces that make it not | :Requires: `Eigen <installation.html#eigen>`__ :math:`\geq` 3.1.0 and `CGAL <installation.html#cgal>`__ :math:`\geq` 4.11.0 | | | Gabriel otherwise. | | diff --git a/src/python/doc/alpha_complex_user.rst b/src/python/doc/alpha_complex_user.rst index 60319e84..a3b35c10 100644 --- a/src/python/doc/alpha_complex_user.rst +++ b/src/python/doc/alpha_complex_user.rst @@ -10,7 +10,7 @@ Definition .. include:: alpha_complex_sum.inc `AlphaComplex` is constructing a :doc:`SimplexTree <simplex_tree_ref>` using -`Delaunay Triangulation <http://doc.cgal.org/latest/Triangulation/index.html#Chapter_Triangulations>`_ +`Delaunay Triangulation <http://doc.cgal.org/latest/Triangulation/index.html#Chapter_Triangulations>`_ :cite:`cgal:hdj-t-19b` from `CGAL <http://www.cgal.org/>`_ (the Computational Geometry Algorithms Library :cite:`cgal:eb-19b`). @@ -203,9 +203,3 @@ the program output is: [4, 5, 6] -> 22.74 [3, 6] -> 30.25 -CGAL citations -============== - -.. bibliography:: ../../biblio/how_to_cite_cgal.bib - :filter: docnames - :style: unsrt diff --git a/src/python/doc/bottleneck_distance_sum.inc b/src/python/doc/bottleneck_distance_sum.inc index 6eb0ac19..0de4625c 100644 --- a/src/python/doc/bottleneck_distance_sum.inc +++ b/src/python/doc/bottleneck_distance_sum.inc @@ -1,12 +1,12 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +-----------------------------------------------------------------+----------------------------------------------------------------------+------------------------------------------------------------------+ | .. figure:: | Bottleneck distance measures the similarity between two persistence | :Author: François Godi | | ../../doc/Bottleneck_distance/perturb_pd.png | diagrams. It's the shortest distance b for which there exists a | | - | :figclass: align-center | perfect matching between the points of the two diagrams (+ all the | :Introduced in: GUDHI 2.0.0 | + | :figclass: align-center | perfect matching between the points of the two diagrams (+ all the | :Since: GUDHI 2.0.0 | | | diagonal points) such that any couple of matched points are at | | - | Bottleneck distance is the length of | distance at most b, where the distance between points is the sup | :Copyright: MIT (`GPL v3 </licensing/>`_) | + | Bottleneck distance is the length of | distance at most b, where the distance between points is the sup | :License: MIT (`GPL v3 </licensing/>`_) | | the longest edge | norm in :math:`\mathbb{R}^2`. | | | | | :Requires: `CGAL <installation.html#cgal>`__ :math:`\geq` 4.11.0 | +-----------------------------------------------------------------+----------------------------------------------------------------------+------------------------------------------------------------------+ diff --git a/src/python/doc/bottleneck_distance_user.rst b/src/python/doc/bottleneck_distance_user.rst index 9435c7f1..89da89d3 100644 --- a/src/python/doc/bottleneck_distance_user.rst +++ b/src/python/doc/bottleneck_distance_user.rst @@ -65,3 +65,4 @@ The output is: Bottleneck distance approximation = 0.81 Bottleneck distance value = 0.75 + diff --git a/src/python/doc/cubical_complex_sum.inc b/src/python/doc/cubical_complex_sum.inc index f200e695..28bf8e94 100644 --- a/src/python/doc/cubical_complex_sum.inc +++ b/src/python/doc/cubical_complex_sum.inc @@ -1,12 +1,12 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +--------------------------------------------------------------------------+----------------------------------------------------------------------+-----------------------------+ | .. figure:: | The cubical complex is an example of a structured complex useful in | :Author: Pawel Dlotko | | ../../doc/Bitmap_cubical_complex/Cubical_complex_representation.png | computational mathematics (specially rigorous numerics) and image | | - | :alt: Cubical complex representation | analysis. | :Introduced in: GUDHI 2.0.0 | + | :alt: Cubical complex representation | analysis. | :Since: GUDHI 2.0.0 | | :figclass: align-center | | | - | | | :Copyright: MIT | + | | | :License: MIT | | | | | +--------------------------------------------------------------------------+----------------------------------------------------------------------+-----------------------------+ | * :doc:`cubical_complex_user` | * :doc:`cubical_complex_ref` | diff --git a/src/python/doc/cubical_complex_user.rst b/src/python/doc/cubical_complex_user.rst index 56cf0170..e4733653 100644 --- a/src/python/doc/cubical_complex_user.rst +++ b/src/python/doc/cubical_complex_user.rst @@ -8,7 +8,7 @@ Definition ---------- ===================================== ===================================== ===================================== -:Author: Pawel Dlotko :Introduced in: GUDHI PYTHON 2.0.0 :Copyright: GPL v3 +:Author: Pawel Dlotko :Since: GUDHI PYTHON 2.0.0 :License: GPL v3 ===================================== ===================================== ===================================== +---------------------------------------------+----------------------------------------------------------------------+ @@ -158,10 +158,3 @@ Examples. --------- End user programs are available in python/example/ folder. - -Bibliography -============ - -.. bibliography:: ../../biblio/bibliography.bib - :filter: docnames - :style: unsrt diff --git a/src/python/doc/img/barycenter.png b/src/python/doc/img/barycenter.png Binary files differnew file mode 100644 index 00000000..cad6af70 --- /dev/null +++ b/src/python/doc/img/barycenter.png diff --git a/src/python/doc/index.rst b/src/python/doc/index.rst index 3387a64f..13e51047 100644 --- a/src/python/doc/index.rst +++ b/src/python/doc/index.rst @@ -71,6 +71,7 @@ Wasserstein distance .. include:: wasserstein_distance_sum.inc + Persistence representations =========================== @@ -85,10 +86,3 @@ Point cloud utilities ********************* .. include:: point_cloud_sum.inc - -Bibliography -************ - -.. bibliography:: ../../biblio/bibliography.bib - :filter: docnames - :style: unsrt diff --git a/src/python/doc/installation.rst b/src/python/doc/installation.rst index d459145b..09a843d5 100644 --- a/src/python/doc/installation.rst +++ b/src/python/doc/installation.rst @@ -175,8 +175,8 @@ Documentation To build the documentation, `sphinx-doc <http://www.sphinx-doc.org>`_ and `sphinxcontrib-bibtex <https://sphinxcontrib-bibtex.readthedocs.io>`_ are required. As the documentation is auto-tested, `CGAL`_, `Eigen`_, -`Matplotlib`_, `NumPy`_ and `SciPy`_ are also mandatory to build the -documentation. +`Matplotlib`_, `NumPy`_, `POT`_, `Scikit-learn`_ and `SciPy`_ are +also mandatory to build the documentation. Run the following commands in a terminal: @@ -192,8 +192,8 @@ CGAL ==== Some GUDHI modules (cf. :doc:`modules list </index>`), and few examples -require CGAL, a C++ library that provides easy access to efficient and -reliable geometric algorithms. +require `CGAL <https://www.cgal.org/>`_, a C++ library that provides easy +access to efficient and reliable geometric algorithms. The procedure to install this library @@ -211,6 +211,14 @@ The following examples requires CGAL version ≥ 4.11.0: * :download:`euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py>` * :download:`euclidean_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_witness_complex_diagram_persistence_from_off_file_example.py>` +EagerPy +======= + +Some Python functions can handle automatic differentiation (possibly only when +a flag `enable_autodiff=True` is used). In order to reduce code duplication, we +use `EagerPy <https://eagerpy.jonasrauber.de/>`_ which wraps arrays from +PyTorch, TensorFlow and JAX in a common interface. + Eigen ===== @@ -229,6 +237,13 @@ The following examples require `Eigen <http://eigen.tuxfamily.org/>`_ version ≠* :download:`euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py>` * :download:`euclidean_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_witness_complex_diagram_persistence_from_off_file_example.py>` +Hnswlib +======= + +:class:`~gudhi.point_cloud.knn.KNearestNeighbors` can use the Python package +`Hnswlib <https://github.com/nmslib/hnswlib>`_ as a backend if explicitly +requested, to speed-up queries. + Matplotlib ========== @@ -251,6 +266,13 @@ The following examples require the `Matplotlib <http://matplotlib.org>`_: * :download:`euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py>` * :download:`euclidean_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_witness_complex_diagram_persistence_from_off_file_example.py>` +PyKeOps +======= + +:class:`~gudhi.point_cloud.knn.KNearestNeighbors` can use the Python package +`PyKeOps <https://www.kernel-operations.io/keops/python/>`_ as a backend if +explicitly requested, to speed-up queries using a GPU. + Python Optimal Transport ======================== @@ -258,6 +280,12 @@ The :doc:`Wasserstein distance </wasserstein_distance_user>` module requires `POT <https://pot.readthedocs.io/>`_, a library that provides several solvers for optimization problems related to Optimal Transport. +PyTorch +======= + +`PyTorch <https://pytorch.org/>`_ is currently only used as a dependency of +`PyKeOps`_, and in some tests. + Scikit-learn ============ diff --git a/src/python/doc/nerve_gic_complex_sum.inc b/src/python/doc/nerve_gic_complex_sum.inc index d633c4ff..7fe55aff 100644 --- a/src/python/doc/nerve_gic_complex_sum.inc +++ b/src/python/doc/nerve_gic_complex_sum.inc @@ -1,12 +1,12 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +----------------------------------------------------------------+------------------------------------------------------------------------+------------------------------------------------------------------+ | .. figure:: | Nerves and Graph Induced Complexes are cover complexes, i.e. | :Author: Mathieu Carrière | | ../../doc/Nerve_GIC/gicvisu.jpg | simplicial complexes that provably contain topological information | | - | :alt: Graph Induced Complex of a point cloud. | about the input data. They can be computed with a cover of the data, | :Introduced in: GUDHI 2.3.0 | + | :alt: Graph Induced Complex of a point cloud. | about the input data. They can be computed with a cover of the data, | :Since: GUDHI 2.3.0 | | :figclass: align-center | that comes i.e. from the preimage of a family of intervals covering | | - | | the image of a scalar-valued function defined on the data. | :Copyright: MIT (`GPL v3 </licensing/>`_) | + | | the image of a scalar-valued function defined on the data. | :License: MIT (`GPL v3 </licensing/>`_) | | | | | | | | :Requires: `CGAL <installation.html#cgal>`__ :math:`\geq` 4.11.0 | | | | | diff --git a/src/python/doc/persistence_graphical_tools_sum.inc b/src/python/doc/persistence_graphical_tools_sum.inc index ef376802..b68d3d7e 100644 --- a/src/python/doc/persistence_graphical_tools_sum.inc +++ b/src/python/doc/persistence_graphical_tools_sum.inc @@ -1,12 +1,12 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +-----------------------------------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------+ | .. figure:: | These graphical tools comes on top of persistence results and allows | :Author: Vincent Rouvreau, Theo Lacombe | | img/graphical_tools_representation.png | the user to display easily persistence barcode, diagram or density. | | - | | | :Introduced in: GUDHI 2.0.0 | + | | | :Since: GUDHI 2.0.0 | | | Note that these functions return the matplotlib axis, allowing | | - | | for further modifications (title, aspect, etc.) | :Copyright: MIT | + | | for further modifications (title, aspect, etc.) | :License: MIT | | | | | | | | :Requires: matplotlib, numpy and scipy | +-----------------------------------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------+ diff --git a/src/python/doc/persistent_cohomology_sum.inc b/src/python/doc/persistent_cohomology_sum.inc index 4d7b077e..0effb50f 100644 --- a/src/python/doc/persistent_cohomology_sum.inc +++ b/src/python/doc/persistent_cohomology_sum.inc @@ -1,12 +1,12 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +-----------------------------------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------+ | .. figure:: | The theory of homology consists in attaching to a topological space | :Author: Clément Maria | | ../../doc/Persistent_cohomology/3DTorus_poch.png | a sequence of (homology) groups, capturing global topological | | - | :figclass: align-center | features like connected components, holes, cavities, etc. Persistent | :Introduced in: GUDHI 2.0.0 | + | :figclass: align-center | features like connected components, holes, cavities, etc. Persistent | :Since: GUDHI 2.0.0 | | | homology studies the evolution -- birth, life and death -- of these | | - | Rips Persistent Cohomology on a 3D | features when the topological space is changing. Consequently, the | :Copyright: MIT | + | Rips Persistent Cohomology on a 3D | features when the topological space is changing. Consequently, the | :License: MIT | | Torus | theory is essentially composed of three elements: topological spaces, | | | | their homology groups and an evolution scheme. | | | | | | diff --git a/src/python/doc/persistent_cohomology_user.rst b/src/python/doc/persistent_cohomology_user.rst index de83cda1..4d743aac 100644 --- a/src/python/doc/persistent_cohomology_user.rst +++ b/src/python/doc/persistent_cohomology_user.rst @@ -7,7 +7,7 @@ Persistent cohomology user manual Definition ---------- ===================================== ===================================== ===================================== -:Author: Clément Maria :Introduced in: GUDHI PYTHON 2.0.0 :Copyright: GPL v3 +:Author: Clément Maria :Since: GUDHI PYTHON 2.0.0 :License: GPL v3 ===================================== ===================================== ===================================== +-----------------------------------------------------------------+-----------------------------------------------------------------------+ @@ -111,10 +111,3 @@ We provide several example files: run these examples with -h for details on thei * :download:`rips_complex_diagram_persistence_from_distance_matrix_file_example.py <../example/rips_complex_diagram_persistence_from_distance_matrix_file_example.py>` * :download:`random_cubical_complex_persistence_example.py <../example/random_cubical_complex_persistence_example.py>` * :download:`tangential_complex_plain_homology_from_off_file_example.py <../example/tangential_complex_plain_homology_from_off_file_example.py>` - -Bibliography -============ - -.. bibliography:: ../../biblio/bibliography.bib - :filter: docnames - :style: unsrt diff --git a/src/python/doc/point_cloud.rst b/src/python/doc/point_cloud.rst index c0d4b303..192f70db 100644 --- a/src/python/doc/point_cloud.rst +++ b/src/python/doc/point_cloud.rst @@ -21,10 +21,25 @@ Subsampling :special-members: :show-inheritance: -TimeDelayEmbedding ------------------- +Time Delay Embedding +-------------------- .. autoclass:: gudhi.point_cloud.timedelay.TimeDelayEmbedding :members: :special-members: __call__ +K nearest neighbors +------------------- + +.. automodule:: gudhi.point_cloud.knn + :members: + :undoc-members: + :special-members: __init__ + +Distance to measure +------------------- + +.. automodule:: gudhi.point_cloud.dtm + :members: + :undoc-members: + :special-members: __init__ diff --git a/src/python/doc/point_cloud_sum.inc b/src/python/doc/point_cloud_sum.inc index 85d52de7..d4761aba 100644 --- a/src/python/doc/point_cloud_sum.inc +++ b/src/python/doc/point_cloud_sum.inc @@ -1,12 +1,12 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +----------------------------------------------------------------+------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------+ - | | :math:`(x_1, x_2, \ldots, x_d)` | Utilities to process point clouds: read from file, subsample, etc. | :Author: Vincent Rouvreau | - | | :math:`(y_1, y_2, \ldots, y_d)` | | | - | | | :Introduced in: GUDHI 2.0.0 | + | | :math:`(x_1, x_2, \ldots, x_d)` | Utilities to process point clouds: read from file, subsample, | :Authors: Vincent Rouvreau, Marc Glisse, Masatoshi Takenouchi | + | | :math:`(y_1, y_2, \ldots, y_d)` | find neighbors, embed time series in higher dimension, etc. | | + | | | :Since: GUDHI 2.0.0 | | | | | - | | | :Copyright: MIT (`GPL v3 </licensing/>`_) | + | | | :License: MIT (`GPL v3 </licensing/>`_, BSD-3-Clause, Apache-2.0) | | | Parts of this package require CGAL. | | | | | :Requires: `Eigen <installation.html#eigen>`__ :math:`\geq` 3.1.0 and `CGAL <installation.html#cgal>`__ :math:`\geq` 4.11.0 | | | | | diff --git a/src/python/doc/representations_sum.inc b/src/python/doc/representations_sum.inc index 700828f1..eac89b9d 100644 --- a/src/python/doc/representations_sum.inc +++ b/src/python/doc/representations_sum.inc @@ -1,12 +1,12 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +------------------------------------------------------------------+----------------------------------------------------------------+-----------------------------------------------+ | .. figure:: | Vectorizations, distances and kernels that work on persistence | :Author: Mathieu Carrière | | img/sklearn-tda.png | diagrams, compatible with scikit-learn. | | - | | | :Introduced in: GUDHI 3.1.0 | + | | | :Since: GUDHI 3.1.0 | | | | | - | | | :Copyright: MIT | + | | | :License: MIT | | | | | | | | :Requires: scikit-learn | +------------------------------------------------------------------+----------------------------------------------------------------+-----------------------------------------------+ diff --git a/src/python/doc/rips_complex_sum.inc b/src/python/doc/rips_complex_sum.inc index 857c6893..6feb74cd 100644 --- a/src/python/doc/rips_complex_sum.inc +++ b/src/python/doc/rips_complex_sum.inc @@ -1,12 +1,12 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +----------------------------------------------------------------+------------------------------------------------------------------------+----------------------------------------------------------------------+ | .. figure:: | Rips complex is a simplicial complex constructed from a one skeleton | :Authors: Clément Maria, Pawel Dlotko, Vincent Rouvreau, Marc Glisse | | ../../doc/Rips_complex/rips_complex_representation.png | graph. | | - | :figclass: align-center | | :Introduced in: GUDHI 2.0.0 | + | :figclass: align-center | | :Since: GUDHI 2.0.0 | | | The filtration value of each edge is computed from a user-given | | - | | distance function and is inserted until a user-given threshold | :Copyright: MIT | + | | distance function and is inserted until a user-given threshold | :License: MIT | | | value. | | | | | | | | This complex can be built from a point cloud and a distance function, | | diff --git a/src/python/doc/rips_complex_user.rst b/src/python/doc/rips_complex_user.rst index a27573e8..8efb12e6 100644 --- a/src/python/doc/rips_complex_user.rst +++ b/src/python/doc/rips_complex_user.rst @@ -8,7 +8,7 @@ Definition ---------- ==================================================================== ================================ ====================== -:Authors: Clément Maria, Pawel Dlotko, Vincent Rouvreau, Marc Glisse :Introduced in: GUDHI 2.0.0 :Copyright: GPL v3 +:Authors: Clément Maria, Pawel Dlotko, Vincent Rouvreau, Marc Glisse :Since: GUDHI 2.0.0 :License: GPL v3 ==================================================================== ================================ ====================== +-------------------------------------------+----------------------------------------------------------------------+ diff --git a/src/python/doc/simplex_tree_ref.rst b/src/python/doc/simplex_tree_ref.rst index 9eb8c199..46b2c1e5 100644 --- a/src/python/doc/simplex_tree_ref.rst +++ b/src/python/doc/simplex_tree_ref.rst @@ -8,7 +8,6 @@ Simplex tree reference manual .. autoclass:: gudhi.SimplexTree :members: - :undoc-members: :show-inheritance: .. automethod:: gudhi.SimplexTree.__init__ diff --git a/src/python/doc/simplex_tree_sum.inc b/src/python/doc/simplex_tree_sum.inc index 5ba58d2b..a8858f16 100644 --- a/src/python/doc/simplex_tree_sum.inc +++ b/src/python/doc/simplex_tree_sum.inc @@ -1,12 +1,12 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +----------------------------------------------------------------+------------------------------------------------------------------------+-----------------------------+ | .. figure:: | The simplex tree is an efficient and flexible data structure for | :Author: Clément Maria | | ../../doc/Simplex_tree/Simplex_tree_representation.png | representing general (filtered) simplicial complexes. | | - | :alt: Simplex tree representation | | :Introduced in: GUDHI 2.0.0 | + | :alt: Simplex tree representation | | :Since: GUDHI 2.0.0 | | :figclass: align-center | The data structure is described in | | - | | :cite:`boissonnatmariasimplextreealgorithmica` | :Copyright: MIT | + | | :cite:`boissonnatmariasimplextreealgorithmica` | :License: MIT | | | | | +----------------------------------------------------------------+------------------------------------------------------------------------+-----------------------------+ | * :doc:`simplex_tree_user` | * :doc:`simplex_tree_ref` | diff --git a/src/python/doc/tangential_complex_sum.inc b/src/python/doc/tangential_complex_sum.inc index d84aa433..45ce2a66 100644 --- a/src/python/doc/tangential_complex_sum.inc +++ b/src/python/doc/tangential_complex_sum.inc @@ -1,12 +1,12 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +----------------------------------------------------------------+------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------+ | .. figure:: | A Tangential Delaunay complex is a simplicial complex designed to | :Author: Clément Jamin | | ../../doc/Tangential_complex/tc_examples.png | reconstruct a :math:`k`-dimensional manifold embedded in :math:`d`- | | - | :figclass: align-center | dimensional Euclidean space. The input is a point sample coming from | :Introduced in: GUDHI 2.0.0 | + | :figclass: align-center | dimensional Euclidean space. The input is a point sample coming from | :Since: GUDHI 2.0.0 | | | an unknown manifold. The running time depends only linearly on the | | - | | extrinsic dimension :math:`d` and exponentially on the intrinsic | :Copyright: MIT (`GPL v3 </licensing/>`_) | + | | extrinsic dimension :math:`d` and exponentially on the intrinsic | :License: MIT (`GPL v3 </licensing/>`_) | | | dimension :math:`k`. | | | | | :Requires: `Eigen <installation.html#eigen>`__ :math:`\geq` 3.1.0 and `CGAL <installation.html#cgal>`__ :math:`\geq` 4.11.0 | +----------------------------------------------------------------+------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------+ diff --git a/src/python/doc/tangential_complex_user.rst b/src/python/doc/tangential_complex_user.rst index 852cf5b6..3d45473b 100644 --- a/src/python/doc/tangential_complex_user.rst +++ b/src/python/doc/tangential_complex_user.rst @@ -194,11 +194,3 @@ The output is: Tangential contains 4 vertices. Inconsistencies has been fixed. - - -Bibliography -============ - -.. bibliography:: ../../biblio/bibliography.bib - :filter: docnames - :style: unsrt diff --git a/src/python/doc/wasserstein_distance_sum.inc b/src/python/doc/wasserstein_distance_sum.inc index a97f428d..f9308e5e 100644 --- a/src/python/doc/wasserstein_distance_sum.inc +++ b/src/python/doc/wasserstein_distance_sum.inc @@ -1,13 +1,13 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +-----------------------------------------------------------------+----------------------------------------------------------------------+------------------------------------------------------------------+ | .. figure:: | The q-Wasserstein distance measures the similarity between two | :Author: Theo Lacombe | - | ../../doc/Bottleneck_distance/perturb_pd.png | persistence diagrams. It's the minimum value c that can be achieved | | - | :figclass: align-center | by a perfect matching between the points of the two diagrams (+ all | :Introduced in: GUDHI 3.1.0 | - | | diagonal points), where the value of a matching is defined as the | | - | Wasserstein distance is the q-th root of the sum of the | q-th root of the sum of all edge lengths to the power q. Edge lengths| :Copyright: MIT | - | edge lengths to the power q. | are measured in norm p, for :math:`1 \leq p \leq \infty`. | | + | ../../doc/Bottleneck_distance/perturb_pd.png | persistence diagrams using the sum of all edges lengths (instead of | | + | :figclass: align-center | the maximum). It allows to define sophisticated objects such as | :Since: GUDHI 3.1.0 | + | | barycenters of a family of persistence diagrams. | | + | | | :License: MIT | + | | | | | | | :Requires: Python Optimal Transport (POT) :math:`\geq` 0.5.1 | +-----------------------------------------------------------------+----------------------------------------------------------------------+------------------------------------------------------------------+ | * :doc:`wasserstein_distance_user` | | diff --git a/src/python/doc/wasserstein_distance_user.rst b/src/python/doc/wasserstein_distance_user.rst index a9b21fa5..c443bab5 100644 --- a/src/python/doc/wasserstein_distance_user.rst +++ b/src/python/doc/wasserstein_distance_user.rst @@ -9,10 +9,16 @@ Definition .. include:: wasserstein_distance_sum.inc -Functions ---------- -This implementation uses the Python Optimal Transport library and is based on -ideas from "Large Scale Computation of Means and Cluster for Persistence +The q-Wasserstein distance is defined as the minimal value achieved +by a perfect matching between the points of the two diagrams (+ all +diagonal points), where the value of a matching is defined as the +q-th root of the sum of all edge lengths to the power q. Edge lengths +are measured in norm p, for :math:`1 \leq p \leq \infty`. + +Distance Functions +------------------ +This first implementation uses the Python Optimal Transport library and is based +on ideas from "Large Scale Computation of Means and Cluster for Persistence Diagrams via Optimal Transport" :cite:`10.5555/3327546.3327645`. .. autofunction:: gudhi.wasserstein.wasserstein_distance @@ -26,7 +32,7 @@ Morozov, and Arnur Nigmetov. .. autofunction:: gudhi.hera.wasserstein_distance Basic example -------------- +************* This example computes the 1-Wasserstein distance from 2 persistence diagrams with Euclidean ground metric. Note that persistence diagrams must be submitted as (n x 2) numpy arrays and must not contain inf values. @@ -48,9 +54,9 @@ The output is: Wasserstein distance value = 1.45 -We can also have access to the optimal matching by letting `matching=True`. +We can also have access to the optimal matching by letting `matching=True`. It is encoded as a list of indices (i,j), meaning that the i-th point in X -is mapped to the j-th point in Y. +is mapped to the j-th point in Y. An index of -1 represents the diagonal. .. testcode:: @@ -78,9 +84,83 @@ An index of -1 represents the diagonal. The output is: .. testoutput:: - + Wasserstein distance value = 2.15 point 0 in dgm1 is matched to point 0 in dgm2 point 1 in dgm1 is matched to point 2 in dgm2 point 2 in dgm1 is matched to the diagonal point 1 in dgm2 is matched to the diagonal + +Barycenters +----------- + +A Frechet mean (or barycenter) is a generalization of the arithmetic +mean in a non linear space such as the one of persistence diagrams. +Given a set of persistence diagrams :math:`\mu_1 \dots \mu_n`, it is +defined as a minimizer of the variance functional, that is of +:math:`\mu \mapsto \sum_{i=1}^n d_2(\mu,\mu_i)^2`. +where :math:`d_2` denotes the Wasserstein-2 distance between +persistence diagrams. +It is known to exist and is generically unique. However, an exact +computation is in general untractable. Current implementation +available is based on (Turner et al., 2014), +:cite:`turner2014frechet` +and uses an EM-scheme to +provide a local minimum of the variance functional (somewhat similar +to the Lloyd algorithm to estimate a solution to the k-means +problem). The local minimum returned depends on the initialization of +the barycenter. +The combinatorial structure of the algorithm limits its +performances on large scale problems (thousands of diagrams and of points +per diagram). + +.. figure:: + ./img/barycenter.png + :figclass: align-center + + Illustration of Frechet mean between persistence + diagrams. + + +.. autofunction:: gudhi.wasserstein.barycenter.lagrangian_barycenter + +Basic example +************* + +This example estimates the Frechet mean (aka Wasserstein barycenter) between +four persistence diagrams. +It is initialized on the 4th diagram. +As the algorithm is not convex, its output depends on the initialization and +is only a local minimum of the objective function. +Initialization can be either given as an integer (in which case the i-th +diagram of the list is used as initial estimate) or as a diagram. +If None, it will randomly select one of the diagrams of the list +as initial estimate. +Note that persistence diagrams must be submitted as +(n x 2) numpy arrays and must not contain inf values. + + +.. testcode:: + + from gudhi.wasserstein.barycenter import lagrangian_barycenter + import numpy as np + + dg1 = np.array([[0.2, 0.5]]) + dg2 = np.array([[0.2, 0.7]]) + dg3 = np.array([[0.3, 0.6], [0.7, 0.8], [0.2, 0.3]]) + dg4 = np.array([]) + pdiagset = [dg1, dg2, dg3, dg4] + bary = lagrangian_barycenter(pdiagset=pdiagset,init=3) + + message = "Wasserstein barycenter estimated:" + print(message) + print(bary) + +The output is: + +.. testoutput:: + + Wasserstein barycenter estimated: + [[0.27916667 0.55416667] + [0.7375 0.7625 ] + [0.2375 0.2625 ]] diff --git a/src/python/doc/witness_complex_sum.inc b/src/python/doc/witness_complex_sum.inc index 71b65a71..34d4df4a 100644 --- a/src/python/doc/witness_complex_sum.inc +++ b/src/python/doc/witness_complex_sum.inc @@ -1,12 +1,12 @@ .. table:: - :widths: 30 50 20 + :widths: 30 40 30 +-------------------------------------------------------------------+----------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ | .. figure:: | Witness complex :math:`Wit(W,L)` is a simplicial complex defined on | :Author: Siargey Kachanovich | | ../../doc/Witness_complex/Witness_complex_representation.png | two sets of points in :math:`\mathbb{R}^D`. | | - | :alt: Witness complex representation | | :Introduced in: GUDHI 2.0.0 | + | :alt: Witness complex representation | | :Since: GUDHI 2.0.0 | | :figclass: align-center | The data structure is described in | | - | | :cite:`boissonnatmariasimplextreealgorithmica`. | :Copyright: MIT (`GPL v3 </licensing/>`_ for Euclidean versions only) | + | | :cite:`boissonnatmariasimplextreealgorithmica`. | :License: MIT (`GPL v3 </licensing/>`_ for Euclidean versions only) | | | | | | | | :Requires: `Eigen <installation.html#eigen>`__ :math:`\geq` 3.1.0 and `CGAL <installation.html#cgal>`__ :math:`\geq` 4.11.0 for Euclidean versions only | +-------------------------------------------------------------------+----------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/src/python/doc/witness_complex_user.rst b/src/python/doc/witness_complex_user.rst index 7087fa98..08dcd288 100644 --- a/src/python/doc/witness_complex_user.rst +++ b/src/python/doc/witness_complex_user.rst @@ -126,10 +126,3 @@ Example2: Computing persistence using strong relaxed witness complex Here is an example of constructing a strong witness complex filtration and computing persistence on it: * :download:`euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py>` - -Bibliography -============ - -.. bibliography:: ../../biblio/bibliography.bib - :filter: docnames - :style: unsrt diff --git a/src/python/doc/zbibliography.rst b/src/python/doc/zbibliography.rst new file mode 100644 index 00000000..e23fcf25 --- /dev/null +++ b/src/python/doc/zbibliography.rst @@ -0,0 +1,10 @@ +:orphan: + +.. To get rid of WARNING: document isn't included in any toctree + +Bibliography +------------ + +.. bibliography:: ../../biblio/bibliography.bib + :style: plain + diff --git a/src/python/example/alpha_complex_from_points_example.py b/src/python/example/alpha_complex_from_points_example.py index 73faf17c..465632eb 100755 --- a/src/python/example/alpha_complex_from_points_example.py +++ b/src/python/example/alpha_complex_from_points_example.py @@ -46,9 +46,6 @@ if simplex_tree.find([4]): else: print("[4] Not found...") -# Some insertions, simplex_tree needs to initialize filtrations -simplex_tree.initialize_filtration() - print("dimension=", simplex_tree.dimension()) print("filtrations=") for simplex_with_filtration in simplex_tree.get_filtration(): diff --git a/src/python/example/alpha_rips_persistence_bottleneck_distance.py b/src/python/example/alpha_rips_persistence_bottleneck_distance.py index f156826d..3e12b0d5 100755 --- a/src/python/example/alpha_rips_persistence_bottleneck_distance.py +++ b/src/python/example/alpha_rips_persistence_bottleneck_distance.py @@ -5,6 +5,7 @@ import argparse import math import errno import os +import numpy as np """ This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. @@ -56,7 +57,7 @@ with open(args.file, "r") as f: message = "Number of simplices=" + repr(rips_stree.num_simplices()) print(message) - rips_diag = rips_stree.persistence() + rips_stree.compute_persistence() print("##############################################################") print("AlphaComplex creation from points read in a OFF file") @@ -72,18 +73,13 @@ with open(args.file, "r") as f: message = "Number of simplices=" + repr(alpha_stree.num_simplices()) print(message) - alpha_diag = alpha_stree.persistence() + alpha_stree.compute_persistence() max_b_distance = 0.0 for dim in range(args.max_dimension): # Alpha persistence values needs to be transform because filtration # values are alpha square values - funcs = [math.sqrt, math.sqrt] - alpha_intervals = [] - for interval in alpha_stree.persistence_intervals_in_dimension(dim): - alpha_intervals.append( - map(lambda func, value: func(value), funcs, interval) - ) + alpha_intervals = np.sqrt(alpha_stree.persistence_intervals_in_dimension(dim)) rips_intervals = rips_stree.persistence_intervals_in_dimension(dim) bottleneck_distance = gudhi.bottleneck_distance( diff --git a/src/python/example/simplex_tree_example.py b/src/python/example/simplex_tree_example.py index 34833899..c4635dc5 100755 --- a/src/python/example/simplex_tree_example.py +++ b/src/python/example/simplex_tree_example.py @@ -42,7 +42,6 @@ print("simplices=") for simplex_with_filtration in st.get_simplices(): print("(%s, %.2f)" % tuple(simplex_with_filtration)) -st.initialize_filtration() print("filtration=") for simplex_with_filtration in st.get_filtration(): print("(%s, %.2f)" % tuple(simplex_with_filtration)) diff --git a/src/python/gudhi/cubical_complex.pyx b/src/python/gudhi/cubical_complex.pyx index d5ad1266..007abcb6 100644 --- a/src/python/gudhi/cubical_complex.pyx +++ b/src/python/gudhi/cubical_complex.pyx @@ -35,7 +35,8 @@ cdef extern from "Cubical_complex_interface.h" namespace "Gudhi": cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi": cdef cppclass Cubical_complex_persistence_interface "Gudhi::Persistent_cohomology_interface<Gudhi::Cubical_complex::Cubical_complex_interface<>>": Cubical_complex_persistence_interface(Bitmap_cubical_complex_base_interface * st, bool persistence_dim_max) - vector[pair[int, pair[double, double]]] get_persistence(int homology_coeff_field, double min_persistence) + void compute_persistence(int homology_coeff_field, double min_persistence) + vector[pair[int, pair[double, double]]] get_persistence() vector[int] betti_numbers() vector[int] persistent_betti_numbers(double from_value, double to_value) vector[pair[double,double]] intervals_in_dimension(int dimension) @@ -129,8 +130,31 @@ cdef class CubicalComplex: """ return self.thisptr.dimension() + def compute_persistence(self, homology_coeff_field=11, min_persistence=0): + """This function computes the persistence of the complex, so it can be + accessed through :func:`persistent_betti_numbers`, + :func:`persistence_intervals_in_dimension`, etc. This function is + equivalent to :func:`persistence` when you do not want the list + :func:`persistence` returns. + + :param homology_coeff_field: The homology coefficient field. Must be a + prime number + :type homology_coeff_field: int. + :param min_persistence: The minimum persistence value to take into + account (strictly greater than min_persistence). Default value is + 0.0. + Sets min_persistence to -1.0 to see all values. + :type min_persistence: float. + :returns: Nothing. + """ + if self.pcohptr != NULL: + del self.pcohptr + assert self.__is_defined() + self.pcohptr = new Cubical_complex_persistence_interface(self.thisptr, True) + self.pcohptr.compute_persistence(homology_coeff_field, min_persistence) + def persistence(self, homology_coeff_field=11, min_persistence=0): - """This function returns the persistence of the complex. + """This function computes and returns the persistence of the complex. :param homology_coeff_field: The homology coefficient field. Must be a prime number @@ -143,30 +167,22 @@ cdef class CubicalComplex: :returns: list of pairs(dimension, pair(birth, death)) -- the persistence of the complex. """ - if self.pcohptr != NULL: - del self.pcohptr - if self.thisptr != NULL: - self.pcohptr = new Cubical_complex_persistence_interface(self.thisptr, True) - cdef vector[pair[int, pair[double, double]]] persistence_result - if self.pcohptr != NULL: - persistence_result = self.pcohptr.get_persistence(homology_coeff_field, min_persistence) - return persistence_result + self.compute_persistence(homology_coeff_field, min_persistence) + return self.pcohptr.get_persistence() def betti_numbers(self): """This function returns the Betti numbers of the complex. :returns: list of int -- The Betti numbers ([B0, B1, ..., Bn]). - :note: betti_numbers function requires persistence function to be + :note: betti_numbers function requires :func:`compute_persistence` function to be launched first. :note: betti_numbers function always returns [1, 0, 0, ...] as infinity filtration cubes are not removed from the complex. """ - cdef vector[int] bn_result - if self.pcohptr != NULL: - bn_result = self.pcohptr.betti_numbers() - return bn_result + assert self.pcohptr != NULL, "compute_persistence() must be called before betti_numbers()" + return self.pcohptr.betti_numbers() def persistent_betti_numbers(self, from_value, to_value): """This function returns the persistent Betti numbers of the complex. @@ -181,13 +197,11 @@ cdef class CubicalComplex: :returns: list of int -- The persistent Betti numbers ([B0, B1, ..., Bn]). - :note: persistent_betti_numbers function requires persistence + :note: persistent_betti_numbers function requires :func:`compute_persistence` function to be launched first. """ - cdef vector[int] pbn_result - if self.pcohptr != NULL: - pbn_result = self.pcohptr.persistent_betti_numbers(<double>from_value, <double>to_value) - return pbn_result + assert self.pcohptr != NULL, "compute_persistence() must be called before persistent_betti_numbers()" + return self.pcohptr.persistent_betti_numbers(<double>from_value, <double>to_value) def persistence_intervals_in_dimension(self, dimension): """This function returns the persistence intervals of the complex in a @@ -198,13 +212,8 @@ cdef class CubicalComplex: :returns: The persistence intervals. :rtype: numpy array of dimension 2 - :note: intervals_in_dim function requires persistence function to be + :note: intervals_in_dim function requires :func:`compute_persistence` function to be launched first. """ - cdef vector[pair[double,double]] intervals_result - if self.pcohptr != NULL: - intervals_result = self.pcohptr.intervals_in_dimension(dimension) - else: - print("intervals_in_dim function requires persistence function" - " to be launched first.", file=sys.stderr) - return np.array(intervals_result) + assert self.pcohptr != NULL, "compute_persistence() must be called before persistence_intervals_in_dimension()" + return np.array(self.pcohptr.intervals_in_dimension(dimension)) diff --git a/src/python/gudhi/periodic_cubical_complex.pyx b/src/python/gudhi/periodic_cubical_complex.pyx index fd08b976..246a3a02 100644 --- a/src/python/gudhi/periodic_cubical_complex.pyx +++ b/src/python/gudhi/periodic_cubical_complex.pyx @@ -32,7 +32,8 @@ cdef extern from "Cubical_complex_interface.h" namespace "Gudhi": cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi": cdef cppclass Periodic_cubical_complex_persistence_interface "Gudhi::Persistent_cohomology_interface<Gudhi::Cubical_complex::Cubical_complex_interface<Gudhi::cubical_complex::Bitmap_cubical_complex_periodic_boundary_conditions_base<double>>>": Periodic_cubical_complex_persistence_interface(Periodic_cubical_complex_base_interface * st, bool persistence_dim_max) - vector[pair[int, pair[double, double]]] get_persistence(int homology_coeff_field, double min_persistence) + void compute_persistence(int homology_coeff_field, double min_persistence) + vector[pair[int, pair[double, double]]] get_persistence() vector[int] betti_numbers() vector[int] persistent_betti_numbers(double from_value, double to_value) vector[pair[double,double]] intervals_in_dimension(int dimension) @@ -134,8 +135,31 @@ cdef class PeriodicCubicalComplex: """ return self.thisptr.dimension() + def compute_persistence(self, homology_coeff_field=11, min_persistence=0): + """This function computes the persistence of the complex, so it can be + accessed through :func:`persistent_betti_numbers`, + :func:`persistence_intervals_in_dimension`, etc. This function is + equivalent to :func:`persistence` when you do not want the list + :func:`persistence` returns. + + :param homology_coeff_field: The homology coefficient field. Must be a + prime number + :type homology_coeff_field: int. + :param min_persistence: The minimum persistence value to take into + account (strictly greater than min_persistence). Default value is + 0.0. + Sets min_persistence to -1.0 to see all values. + :type min_persistence: float. + :returns: Nothing. + """ + if self.pcohptr != NULL: + del self.pcohptr + assert self.__is_defined() + self.pcohptr = new Periodic_cubical_complex_persistence_interface(self.thisptr, True) + self.pcohptr.compute_persistence(homology_coeff_field, min_persistence) + def persistence(self, homology_coeff_field=11, min_persistence=0): - """This function returns the persistence of the complex. + """This function computes and returns the persistence of the complex. :param homology_coeff_field: The homology coefficient field. Must be a prime number @@ -148,30 +172,22 @@ cdef class PeriodicCubicalComplex: :returns: list of pairs(dimension, pair(birth, death)) -- the persistence of the complex. """ - if self.pcohptr != NULL: - del self.pcohptr - if self.thisptr != NULL: - self.pcohptr = new Periodic_cubical_complex_persistence_interface(self.thisptr, True) - cdef vector[pair[int, pair[double, double]]] persistence_result - if self.pcohptr != NULL: - persistence_result = self.pcohptr.get_persistence(homology_coeff_field, min_persistence) - return persistence_result + self.compute_persistence(homology_coeff_field, min_persistence) + return self.pcohptr.get_persistence() def betti_numbers(self): """This function returns the Betti numbers of the complex. :returns: list of int -- The Betti numbers ([B0, B1, ..., Bn]). - :note: betti_numbers function requires persistence function to be + :note: betti_numbers function requires :func:`compute_persistence` function to be launched first. - :note: betti_numbers function always returns [1, 0, 0, ...] as infinity + :note: This function always returns the Betti numbers of a torus as infinity filtration cubes are not removed from the complex. """ - cdef vector[int] bn_result - if self.pcohptr != NULL: - bn_result = self.pcohptr.betti_numbers() - return bn_result + assert self.pcohptr != NULL, "compute_persistence() must be called before betti_numbers()" + return self.pcohptr.betti_numbers() def persistent_betti_numbers(self, from_value, to_value): """This function returns the persistent Betti numbers of the complex. @@ -186,13 +202,11 @@ cdef class PeriodicCubicalComplex: :returns: list of int -- The persistent Betti numbers ([B0, B1, ..., Bn]). - :note: persistent_betti_numbers function requires persistence + :note: persistent_betti_numbers function requires :func:`compute_persistence` function to be launched first. """ - cdef vector[int] pbn_result - if self.pcohptr != NULL: - pbn_result = self.pcohptr.persistent_betti_numbers(<double>from_value, <double>to_value) - return pbn_result + assert self.pcohptr != NULL, "compute_persistence() must be called before persistent_betti_numbers()" + return self.pcohptr.persistent_betti_numbers(<double>from_value, <double>to_value) def persistence_intervals_in_dimension(self, dimension): """This function returns the persistence intervals of the complex in a @@ -203,13 +217,8 @@ cdef class PeriodicCubicalComplex: :returns: The persistence intervals. :rtype: numpy array of dimension 2 - :note: intervals_in_dim function requires persistence function to be + :note: intervals_in_dim function requires :func:`compute_persistence` function to be launched first. """ - cdef vector[pair[double,double]] intervals_result - if self.pcohptr != NULL: - intervals_result = self.pcohptr.intervals_in_dimension(dimension) - else: - print("intervals_in_dim function requires persistence function" - " to be launched first.", file=sys.stderr) - return np.array(intervals_result) + assert self.pcohptr != NULL, "compute_persistence() must be called before persistence_intervals_in_dimension()" + return np.array(self.pcohptr.intervals_in_dimension(dimension)) diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py new file mode 100644 index 00000000..13e16d24 --- /dev/null +++ b/src/python/gudhi/point_cloud/dtm.py @@ -0,0 +1,70 @@ +# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. +# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. +# Author(s): Marc Glisse +# +# Copyright (C) 2020 Inria +# +# Modification(s): +# - YYYY/MM Author: Description of the modification + +from .knn import KNearestNeighbors + +__author__ = "Marc Glisse" +__copyright__ = "Copyright (C) 2020 Inria" +__license__ = "MIT" + + +class DistanceToMeasure: + """ + Class to compute the distance to the empirical measure defined by a point set, as introduced in :cite:`dtm`. + """ + + def __init__(self, k, q=2, **kwargs): + """ + Args: + k (int): number of neighbors (possibly including the point itself). + q (float): order used to compute the distance to measure. Defaults to 2. + kwargs: same parameters as :class:`~gudhi.point_cloud.knn.KNearestNeighbors`, except that + metric="neighbors" means that :func:`transform` expects an array with the distances + to the k nearest neighbors. + """ + self.k = k + self.q = q + self.params = kwargs + + def fit_transform(self, X, y=None): + return self.fit(X).transform(X) + + def fit(self, X, y=None): + """ + Args: + X (numpy.array): coordinates for mass points. + """ + if self.params.setdefault("metric", "euclidean") != "neighbors": + self.knn = KNearestNeighbors( + self.k, return_index=False, return_distance=True, sort_results=False, **self.params + ) + self.knn.fit(X) + return self + + def transform(self, X): + """ + Args: + X (numpy.array): coordinates for query points, or distance matrix if metric is "precomputed", + or distances to the k nearest neighbors if metric is "neighbors" (if the array has more + than k columns, the remaining ones are ignored). + + Returns: + numpy.array: a 1-d array with, for each point of X, its distance to the measure defined + by the argument of :func:`fit`. + """ + if self.params["metric"] == "neighbors": + distances = X[:, : self.k] + else: + distances = self.knn.transform(X) + distances = distances ** self.q + dtm = distances.sum(-1) / self.k + dtm = dtm ** (1.0 / self.q) + # We compute too many powers, 1/p in knn then q in dtm, 1/q in dtm then q or some log in the caller. + # Add option to skip the final root? + return dtm diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py new file mode 100644 index 00000000..07553d6d --- /dev/null +++ b/src/python/gudhi/point_cloud/knn.py @@ -0,0 +1,324 @@ +# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. +# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. +# Author(s): Marc Glisse +# +# Copyright (C) 2020 Inria +# +# Modification(s): +# - YYYY/MM Author: Description of the modification + +import numpy + +# TODO: https://github.com/facebookresearch/faiss + +__author__ = "Marc Glisse" +__copyright__ = "Copyright (C) 2020 Inria" +__license__ = "MIT" + + +class KNearestNeighbors: + """ + Class wrapping several implementations for computing the k nearest neighbors in a point set. + """ + + def __init__(self, k, return_index=True, return_distance=False, metric="euclidean", **kwargs): + """ + Args: + k (int): number of neighbors (possibly including the point itself). + return_index (bool): if True, return the index of each neighbor. + return_distance (bool): if True, return the distance to each neighbor. + implementation (str): choice of the library that does the real work. + + * 'keops' for a brute-force, CUDA implementation through pykeops. Useful when the dimension becomes large (10+) but the number of points remains low (less than a million). Only "minkowski" and its aliases are supported. + * 'ckdtree' for scipy's cKDTree. Only "minkowski" and its aliases are supported. + * 'sklearn' for scikit-learn's NearestNeighbors. Note that this provides in particular an option algorithm="brute". + * 'hnsw' for hnswlib.Index. It can be very fast but does not provide guarantees. Only supports "euclidean" for now. + * None will try to select a sensible one (scipy if possible, scikit-learn otherwise). + metric (str): see `sklearn.neighbors.NearestNeighbors`. + eps (float): relative error when computing nearest neighbors with the cKDTree. + p (float): norm L^p on input points (including numpy.inf) if metric is "minkowski". Defaults to 2. + n_jobs (int): number of jobs to schedule for parallel processing of nearest neighbors on the CPU. + If -1 is given all processors are used. Default: 1. + sort_results (bool): if True, then distances and indices of each point are + sorted on return, so that the first column contains the closest points. + Otherwise, neighbors are returned in an arbitrary order. Defaults to True. + enable_autodiff (bool): if the input is a torch.tensor, jax.numpy.ndarray or tensorflow.Tensor, this + instructs the function to compute distances in a way that works with automatic differentiation. + This is experimental, not supported for all metrics, and requires the package EagerPy. + Defaults to False. + kwargs: additional parameters are forwarded to the backends. + """ + self.k = k + self.return_index = return_index + self.return_distance = return_distance + self.metric = metric + self.params = kwargs + # canonicalize + if metric == "euclidean": + self.params["p"] = 2 + self.metric = "minkowski" + elif metric == "manhattan": + self.params["p"] = 1 + self.metric = "minkowski" + elif metric == "chebyshev": + self.params["p"] = numpy.inf + self.metric = "minkowski" + elif metric == "minkowski": + self.params["p"] = kwargs.get("p", 2) + if self.params.get("implementation") in {"keops", "ckdtree"}: + assert self.metric == "minkowski" + if self.params.get("implementation") == "hnsw": + assert self.metric == "minkowski" and self.params["p"] == 2 + if not self.params.get("implementation"): + if self.metric == "minkowski": + self.params["implementation"] = "ckdtree" + else: + self.params["implementation"] = "sklearn" + if not return_distance: + self.params["enable_autodiff"] = False + + def fit_transform(self, X, y=None): + return self.fit(X).transform(X) + + def fit(self, X, y=None): + """ + Args: + X (numpy.array): coordinates for reference points. + """ + self.ref_points = X + if self.params.get("enable_autodiff", False): + import eagerpy as ep + + X = ep.astensor(X) + if self.params["implementation"] != "keops" or not isinstance(X, ep.PyTorchTensor): + # I don't know a clever way to reuse a GPU tensor from tensorflow in pytorch + # without copying to/from the CPU. + X = X.numpy() + if self.params["implementation"] == "ckdtree": + # sklearn could handle this, but it is much slower + from scipy.spatial import cKDTree + + self.kdtree = cKDTree(X) + + if self.params["implementation"] == "sklearn" and self.metric != "precomputed": + # FIXME: sklearn badly handles "precomputed" + from sklearn.neighbors import NearestNeighbors + + nargs = { + k: v for k, v in self.params.items() if k in {"p", "n_jobs", "metric_params", "algorithm", "leaf_size"} + } + self.nn = NearestNeighbors(self.k, metric=self.metric, **nargs) + self.nn.fit(X) + + if self.params["implementation"] == "hnsw": + import hnswlib + + self.graph = hnswlib.Index("l2", len(X[0])) # Actually returns squared distances + self.graph.init_index( + len(X), **{k: v for k, v in self.params.items() if k in {"ef_construction", "M", "random_seed"}} + ) + n = self.params.get("num_threads") + if n is None: + n = self.params.get("n_jobs", 1) + self.params["num_threads"] = n + self.graph.add_items(X, num_threads=n) + + return self + + def transform(self, X): + """ + Args: + X (numpy.array): coordinates for query points, or distance matrix if metric is "precomputed". + + Returns: + numpy.array: if return_index, an array of shape (len(X), k) with the indices (in the argument + of :func:`fit`) of the k nearest neighbors to the points of X. If return_distance, an array of the + same shape with the distances to those neighbors. If both, a tuple with the two arrays, in this order. + """ + if self.params.get("enable_autodiff", False): + # pykeops does not support autodiff for kmin yet, but when it does in the future, + # we may want a special path. + import eagerpy as ep + + save_return_index = self.return_index + self.return_index = True + self.return_distance = False + self.params["enable_autodiff"] = False + try: + newX = ep.astensor(X) + if self.params["implementation"] != "keops" or ( + not isinstance(newX, ep.PyTorchTensor) and not isinstance(newX, ep.NumPyTensor) + ): + newX = newX.numpy() + else: + newX = newX.raw + neighbors = self.transform(newX) + finally: + self.return_index = save_return_index + self.return_distance = True + self.params["enable_autodiff"] = True + # We can implement more later as needed + assert self.metric == "minkowski" + p = self.params["p"] + Y = ep.astensor(self.ref_points) + neighbor_pts = Y[ + neighbors, + ] + diff = neighbor_pts - X[:, None, :] + if isinstance(diff, ep.PyTorchTensor): + # https://github.com/jonasrauber/eagerpy/issues/6 + distances = ep.astensor(diff.raw.norm(p, -1)) + else: + distances = diff.norms.lp(p, -1) + if self.return_index: + return neighbors, distances.raw + else: + return distances.raw + + metric = self.metric + k = self.k + + if metric == "precomputed": + # scikit-learn could handle that, but they insist on calling fit() with an unused square array, which is too unnatural. + if self.return_index: + n_jobs = self.params.get("n_jobs", 1) + # Supposedly numpy can be compiled with OpenMP and handle this, but nobody does that?! + if n_jobs == 1: + neighbors = numpy.argpartition(X, k - 1)[:, 0:k] + if self.params.get("sort_results", True): + X = numpy.take_along_axis(X, neighbors, axis=-1) + ngb_order = numpy.argsort(X, axis=-1) + neighbors = numpy.take_along_axis(neighbors, ngb_order, axis=-1) + else: + ngb_order = neighbors + if self.return_distance: + distances = numpy.take_along_axis(X, ngb_order, axis=-1) + return neighbors, distances + else: + return neighbors + else: + from joblib import Parallel, delayed, effective_n_jobs + from sklearn.utils import gen_even_slices + + slices = gen_even_slices(len(X), effective_n_jobs(-1)) + parallel = Parallel(backend="threading", n_jobs=-1) + if self.params.get("sort_results", True): + + def func(M): + neighbors = numpy.argpartition(M, k - 1)[:, 0:k] + Y = numpy.take_along_axis(M, neighbors, axis=-1) + ngb_order = numpy.argsort(Y, axis=-1) + return numpy.take_along_axis(neighbors, ngb_order, axis=-1) + + else: + + def func(M): + return numpy.argpartition(M, k - 1)[:, 0:k] + + neighbors = numpy.concatenate(parallel(delayed(func)(X[s]) for s in slices)) + if self.return_distance: + distances = numpy.take_along_axis(X, neighbors, axis=-1) + return neighbors, distances + else: + return neighbors + if self.return_distance: + n_jobs = self.params.get("n_jobs", 1) + if n_jobs == 1: + distances = numpy.partition(X, k - 1)[:, 0:k] + if self.params.get("sort_results"): + # partition is not guaranteed to sort the lower half, although it often does + distances.sort(axis=-1) + else: + from joblib import Parallel, delayed, effective_n_jobs + from sklearn.utils import gen_even_slices + + if self.params.get("sort_results"): + + def func(M): + # Not partitioning in place, because we should not modify the user's array? + r = numpy.partition(M, k - 1)[:, 0:k] + r.sort(axis=-1) + return r + + else: + func = lambda M: numpy.partition(M, k - 1)[:, 0:k] + slices = gen_even_slices(len(X), effective_n_jobs(-1)) + parallel = Parallel(backend="threading", n_jobs=-1) + distances = numpy.concatenate(parallel(delayed(func)(X[s]) for s in slices)) + return distances + return None + + if self.params["implementation"] == "hnsw": + ef = self.params.get("ef") + if ef is not None: + self.graph.set_ef(ef) + neighbors, distances = self.graph.knn_query(X, k, num_threads=self.params["num_threads"]) + # The k nearest neighbors are always sorted. I couldn't find it in the doc, but the code calls searchKnn, + # which returns a priority_queue, and then fills the return array backwards with top/pop on the queue. + if self.return_index: + if self.return_distance: + return neighbors, numpy.sqrt(distances) + else: + return neighbors + if self.return_distance: + return numpy.sqrt(distances) + return None + + if self.params["implementation"] == "keops": + import torch + from pykeops.torch import LazyTensor + + # 'float64' is slow except on super expensive GPUs. Allow it with some param? + XX = torch.as_tensor(X, dtype=torch.float32) + if X is self.ref_points: + YY = XX + else: + YY = torch.as_tensor(self.ref_points, dtype=torch.float32) + p = self.params["p"] + if p == numpy.inf: + # Requires pykeops 1.4 or later + mat = (LazyTensor(XX[:, None, :]) - LazyTensor(YY[None, :, :])).abs().max(-1) + elif p == 2: # Any even integer? + mat = ((LazyTensor(XX[:, None, :]) - LazyTensor(YY[None, :, :])) ** p).sum(-1) + else: + mat = ((LazyTensor(XX[:, None, :]) - LazyTensor(YY[None, :, :])).abs() ** p).sum(-1) + + if self.return_index: + if self.return_distance: + distances, neighbors = mat.Kmin_argKmin(k, dim=1) + if p != numpy.inf: + distances = distances ** (1.0 / p) + return neighbors, distances + else: + neighbors = mat.argKmin(k, dim=1) + return neighbors + if self.return_distance: + distances = mat.Kmin(k, dim=1) + if p != numpy.inf: + distances = distances ** (1.0 / p) + return distances + return None + + if self.params["implementation"] == "ckdtree": + qargs = {key: val for key, val in self.params.items() if key in {"p", "eps", "n_jobs"}} + distances, neighbors = self.kdtree.query(X, k=self.k, **qargs) + if self.return_index: + if self.return_distance: + return neighbors, distances + else: + return neighbors + if self.return_distance: + return distances + return None + + assert self.params["implementation"] == "sklearn" + if self.return_distance: + distances, neighbors = self.nn.kneighbors(X, return_distance=True) + if self.return_index: + return neighbors, distances + else: + return distances + if self.return_index: + neighbors = self.nn.kneighbors(X, return_distance=False) + return neighbors + return None diff --git a/src/python/gudhi/simplex_tree.pxd b/src/python/gudhi/simplex_tree.pxd index 4038b41d..1d4ed926 100644 --- a/src/python/gudhi/simplex_tree.pxd +++ b/src/python/gudhi/simplex_tree.pxd @@ -48,8 +48,7 @@ cdef extern from "Simplex_tree_interface.h" namespace "Gudhi": int dimension() int upper_bound_dimension() bool find_simplex(vector[int] simplex) - bool insert_simplex_and_subfaces(vector[int] simplex, - double filtration) + bool insert(vector[int] simplex, double filtration) vector[pair[vector[int], double]] get_star(vector[int] simplex) vector[pair[vector[int], double]] get_cofaces(vector[int] simplex, int dimension) @@ -57,6 +56,8 @@ cdef extern from "Simplex_tree_interface.h" namespace "Gudhi": void remove_maximal_simplex(vector[int] simplex) bool prune_above_filtration(double filtration) bool make_filtration_non_decreasing() + void compute_extended_filtration() + vector[vector[pair[int, pair[double, double]]]] compute_extended_persistence_subdiagrams(vector[pair[int, pair[double, double]]] dgm, double min_persistence) # Iterators over Simplex tree pair[vector[int], double] get_simplex_and_filtration(Simplex_tree_simplex_handle f_simplex) Simplex_tree_simplices_iterator get_simplices_iterator_begin() @@ -69,11 +70,12 @@ cdef extern from "Simplex_tree_interface.h" namespace "Gudhi": cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi": cdef cppclass Simplex_tree_persistence_interface "Gudhi::Persistent_cohomology_interface<Gudhi::Simplex_tree<Gudhi::Simplex_tree_options_full_featured>>": Simplex_tree_persistence_interface(Simplex_tree_interface_full_featured * st, bool persistence_dim_max) - vector[pair[int, pair[double, double]]] get_persistence(int homology_coeff_field, double min_persistence) + void compute_persistence(int homology_coeff_field, double min_persistence) + vector[pair[int, pair[double, double]]] get_persistence() vector[int] betti_numbers() vector[int] persistent_betti_numbers(double from_value, double to_value) vector[pair[double,double]] intervals_in_dimension(int dimension) - void write_output_diagram(string diagram_file_name) + void write_output_diagram(string diagram_file_name) except + vector[pair[vector[int], vector[int]]] persistence_pairs() pair[vector[vector[int]], vector[vector[int]]] lower_star_generators() pair[vector[vector[int]], vector[vector[int]]] flag_generators() diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index dcf1b46e..55115cca 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -91,7 +91,7 @@ cdef class SimplexTree: (with more :meth:`assign_filtration` or :meth:`make_filtration_non_decreasing` for instance) before calling any function that relies on the filtration property, like - :meth:`initialize_filtration`. + :meth:`persistence`. """ self.get_ptr().assign_simplex_filtration(simplex, filtration) @@ -99,16 +99,7 @@ cdef class SimplexTree: """This function initializes and sorts the simplicial complex filtration vector. - .. note:: - - This function must be launched before - :func:`persistence()<gudhi.SimplexTree.persistence>`, - :func:`betti_numbers()<gudhi.SimplexTree.betti_numbers>`, - :func:`persistent_betti_numbers()<gudhi.SimplexTree.persistent_betti_numbers>`, - or :func:`get_filtration()<gudhi.SimplexTree.get_filtration>` - after :func:`inserting<gudhi.SimplexTree.insert>` or - :func:`removing<gudhi.SimplexTree.remove_maximal_simplex>` - simplices. + .. deprecated:: 3.2.0 """ self.get_ptr().initialize_filtration() @@ -140,9 +131,9 @@ cdef class SimplexTree: This function is not constant time because it can recompute dimension if required (can be triggered by - :func:`remove_maximal_simplex()<gudhi.SimplexTree.remove_maximal_simplex>` + :func:`remove_maximal_simplex` or - :func:`prune_above_filtration()<gudhi.SimplexTree.prune_above_filtration>` + :func:`prune_above_filtration` methods). """ return self.get_ptr().dimension() @@ -167,9 +158,9 @@ cdef class SimplexTree: This function must be used with caution because it disables dimension recomputation when required (this recomputation can be triggered by - :func:`remove_maximal_simplex()<gudhi.SimplexTree.remove_maximal_simplex>` + :func:`remove_maximal_simplex` or - :func:`prune_above_filtration()<gudhi.SimplexTree.prune_above_filtration>` + :func:`prune_above_filtration` ). """ self.get_ptr().set_dimension(<int>dimension) @@ -183,10 +174,7 @@ cdef class SimplexTree: :returns: true if the simplex was found, false otherwise. :rtype: bool """ - cdef vector[int] csimplex - for i in simplex: - csimplex.push_back(i) - return self.get_ptr().find_simplex(csimplex) + return self.get_ptr().find_simplex(simplex) def insert(self, simplex, filtration=0.0): """This function inserts the given N-simplex and its subfaces with the @@ -203,11 +191,7 @@ cdef class SimplexTree: otherwise (whatever its original filtration value). :rtype: bool """ - cdef vector[int] csimplex - for i in simplex: - csimplex.push_back(i) - return self.get_ptr().insert_simplex_and_subfaces(csimplex, - <double>filtration) + return self.get_ptr().insert(simplex, <double>filtration) def get_simplices(self): """This function returns a generator with simplices and their given @@ -309,17 +293,12 @@ cdef class SimplexTree: .. note:: - Be aware that removing is shifting data in a flat_map - (:func:`initialize_filtration()<gudhi.SimplexTree.initialize_filtration>` to be done). - - .. note:: - The dimension of the simplicial complex may be lower after calling remove_maximal_simplex than it was before. However, - :func:`upper_bound_dimension()<gudhi.SimplexTree.upper_bound_dimension>` + :func:`upper_bound_dimension` method will return the old value, which remains a valid upper bound. If you care, you can call - :func:`dimension()<gudhi.SimplexTree.dimension>` + :func:`dimension` to recompute the exact dimension. """ self.get_ptr().remove_maximal_simplex(simplex) @@ -335,24 +314,14 @@ cdef class SimplexTree: .. note:: - Some simplex tree functions require the filtration to be valid. - prune_above_filtration function is not launching - :func:`initialize_filtration()<gudhi.SimplexTree.initialize_filtration>` - but returns the filtration modification - information. If the complex has changed , please call - :func:`initialize_filtration()<gudhi.SimplexTree.initialize_filtration>` - to recompute it. - - .. note:: - Note that the dimension of the simplicial complex may be lower after calling - :func:`prune_above_filtration()<gudhi.SimplexTree.prune_above_filtration>` + :func:`prune_above_filtration` than it was before. However, - :func:`upper_bound_dimension()<gudhi.SimplexTree.upper_bound_dimension>` + :func:`upper_bound_dimension` will return the old value, which remains a valid upper bound. If you care, you can call - :func:`dimension()<gudhi.SimplexTree.dimension>` + :func:`dimension` method to recompute the exact dimension. """ return self.get_ptr().prune_above_filtration(filtration) @@ -383,22 +352,63 @@ cdef class SimplexTree: :returns: True if any filtration value was modified, False if the filtration was already non-decreasing. :rtype: bool + """ + return self.get_ptr().make_filtration_non_decreasing() + def extend_filtration(self): + """ Extend filtration for computing extended persistence. This function only uses the + filtration values at the 0-dimensional simplices, and computes the extended persistence + diagram induced by the lower-star filtration computed with these values. .. note:: - Some simplex tree functions require the filtration to be valid. - make_filtration_non_decreasing function is not launching - :func:`initialize_filtration()<gudhi.SimplexTree.initialize_filtration>` - but returns the filtration modification - information. If the complex has changed , please call - :func:`initialize_filtration()<gudhi.SimplexTree.initialize_filtration>` - to recompute it. + Note that after calling this function, the filtration + values are actually modified within the Simplex_tree. + The function :func:`extended_persistence` + retrieves the original values. + + .. note:: + + Note that this code creates an extra vertex internally, so you should make sure that + the Simplex_tree does not contain a vertex with the largest possible value (i.e., 4294967295). """ - return self.get_ptr().make_filtration_non_decreasing() + self.get_ptr().compute_extended_filtration() + + def extended_persistence(self, homology_coeff_field=11, min_persistence=0): + """This function retrieves good values for extended persistence, and separate the diagrams + into the Ordinary, Relative, Extended+ and Extended- subdiagrams. + + :param homology_coeff_field: The homology coefficient field. Must be a + prime number. Default value is 11. + :type homology_coeff_field: int. + :param min_persistence: The minimum persistence value (i.e., the absolute value of the difference between the persistence diagram point coordinates) to take into + account (strictly greater than min_persistence). Default value is + 0.0. + Sets min_persistence to -1.0 to see all values. + :type min_persistence: float. + :returns: A list of four persistence diagrams in the format described in :func:`persistence`. The first one is Ordinary, the second one is Relative, the third one is Extended+ and the fourth one is Extended-. See https://link.springer.com/article/10.1007/s10208-008-9027-z and/or section 2.2 in https://link.springer.com/article/10.1007/s10208-017-9370-z for a description of these subtypes. + + .. note:: + + This function should be called only if :func:`extend_filtration` has been called first! + + .. note:: + + The coordinates of the persistence diagram points might be a little different than the + original filtration values due to the internal transformation (scaling to [-2,-1]) that is + performed on these values during the computation of extended persistence. + """ + cdef vector[pair[int, pair[double, double]]] persistence_result + if self.pcohptr != NULL: + del self.pcohptr + self.pcohptr = new Simplex_tree_persistence_interface(self.get_ptr(), False) + self.pcohptr.compute_persistence(homology_coeff_field, -1.) + persistence_result = self.pcohptr.get_persistence() + return self.get_ptr().compute_extended_persistence_subdiagrams(persistence_result, min_persistence) + def persistence(self, homology_coeff_field=11, min_persistence=0, persistence_dim_max = False): - """This function returns the persistence of the simplicial complex. + """This function computes and returns the persistence of the simplicial complex. :param homology_coeff_field: The homology coefficient field. Must be a prime number. Default value is 11. @@ -415,13 +425,32 @@ cdef class SimplexTree: :returns: The persistence of the simplicial complex. :rtype: list of pairs(dimension, pair(birth, death)) """ + self.compute_persistence(homology_coeff_field, min_persistence, persistence_dim_max) + return self.pcohptr.get_persistence() + + def compute_persistence(self, homology_coeff_field=11, min_persistence=0, persistence_dim_max = False): + """This function computes the persistence of the simplicial complex, so it can be accessed through + :func:`persistent_betti_numbers`, :func:`persistence_pairs`, etc. This function is equivalent to :func:`persistence` + when you do not want the list :func:`persistence` returns. + + :param homology_coeff_field: The homology coefficient field. Must be a + prime number. Default value is 11. + :type homology_coeff_field: int. + :param min_persistence: The minimum persistence value to take into + account (strictly greater than min_persistence). Default value is + 0.0. + Sets min_persistence to -1.0 to see all values. + :type min_persistence: float. + :param persistence_dim_max: If true, the persistent homology for the + maximal dimension in the complex is computed. If false, it is + ignored. Default is false. + :type persistence_dim_max: bool + :returns: Nothing. + """ if self.pcohptr != NULL: del self.pcohptr self.pcohptr = new Simplex_tree_persistence_interface(self.get_ptr(), persistence_dim_max) - cdef vector[pair[int, pair[double, double]]] persistence_result - if self.pcohptr != NULL: - persistence_result = self.pcohptr.get_persistence(homology_coeff_field, min_persistence) - return persistence_result + self.pcohptr.compute_persistence(homology_coeff_field, min_persistence) def betti_numbers(self): """This function returns the Betti numbers of the simplicial complex. @@ -430,16 +459,11 @@ cdef class SimplexTree: :rtype: list of int :note: betti_numbers function requires - :func:`persistence()<gudhi.SimplexTree.persistence>` + :func:`compute_persistence` function to be launched first. """ - cdef vector[int] bn_result - if self.pcohptr != NULL: - bn_result = self.pcohptr.betti_numbers() - else: - print("betti_numbers function requires persistence function" - " to be launched first.") - return bn_result + assert self.pcohptr != NULL, "compute_persistence() must be called before betti_numbers()" + return self.pcohptr.betti_numbers() def persistent_betti_numbers(self, from_value, to_value): """This function returns the persistent Betti numbers of the @@ -456,16 +480,11 @@ cdef class SimplexTree: :rtype: list of int :note: persistent_betti_numbers function requires - :func:`persistence()<gudhi.SimplexTree.persistence>` + :func:`compute_persistence` function to be launched first. """ - cdef vector[int] pbn_result - if self.pcohptr != NULL: - pbn_result = self.pcohptr.persistent_betti_numbers(<double>from_value, <double>to_value) - else: - print("persistent_betti_numbers function requires persistence function" - " to be launched first.") - return pbn_result + assert self.pcohptr != NULL, "compute_persistence() must be called before persistent_betti_numbers()" + return self.pcohptr.persistent_betti_numbers(<double>from_value, <double>to_value) def persistence_intervals_in_dimension(self, dimension): """This function returns the persistence intervals of the simplicial @@ -477,16 +496,11 @@ cdef class SimplexTree: :rtype: numpy array of dimension 2 :note: intervals_in_dim function requires - :func:`persistence()<gudhi.SimplexTree.persistence>` + :func:`compute_persistence` function to be launched first. """ - cdef vector[pair[double,double]] intervals_result - if self.pcohptr != NULL: - intervals_result = self.pcohptr.intervals_in_dimension(dimension) - else: - print("intervals_in_dim function requires persistence function" - " to be launched first.") - return np_array(intervals_result) + assert self.pcohptr != NULL, "compute_persistence() must be called before persistence_intervals_in_dimension()" + return np_array(self.pcohptr.intervals_in_dimension(dimension)) def persistence_pairs(self): """This function returns a list of persistence birth and death simplices pairs. @@ -495,36 +509,25 @@ cdef class SimplexTree: :rtype: list of pair of list of int :note: persistence_pairs function requires - :func:`persistence()<gudhi.SimplexTree.persistence>` + :func:`compute_persistence` function to be launched first. """ - cdef vector[pair[vector[int],vector[int]]] persistence_pairs_result - if self.pcohptr != NULL: - persistence_pairs_result = self.pcohptr.persistence_pairs() - else: - print("persistence_pairs function requires persistence function" - " to be launched first.") - return persistence_pairs_result + assert self.pcohptr != NULL, "compute_persistence() must be called before persistence_pairs()" + return self.pcohptr.persistence_pairs() - def write_persistence_diagram(self, persistence_file=''): + def write_persistence_diagram(self, persistence_file): """This function writes the persistence intervals of the simplicial complex in a user given file name. - :param persistence_file: The specific dimension. + :param persistence_file: Name of the file. :type persistence_file: string. :note: intervals_in_dim function requires - :func:`persistence()<gudhi.SimplexTree.persistence>` + :func:`compute_persistence` function to be launched first. """ - if self.pcohptr != NULL: - if persistence_file != '': - self.pcohptr.write_output_diagram(persistence_file.encode('utf-8')) - else: - print("persistence_file must be specified") - else: - print("intervals_in_dim function requires persistence function" - " to be launched first.") + assert self.pcohptr != NULL, "compute_persistence() must be called before write_persistence_diagram()" + self.pcohptr.write_output_diagram(persistence_file.encode('utf-8')) def lower_star_persistence_generators(self): """Assuming this is a lower-star filtration, this function returns the persistence pairs, diff --git a/src/python/gudhi/wasserstein/__init__.py b/src/python/gudhi/wasserstein/__init__.py new file mode 100644 index 00000000..ed225ba4 --- /dev/null +++ b/src/python/gudhi/wasserstein/__init__.py @@ -0,0 +1 @@ +from .wasserstein import wasserstein_distance diff --git a/src/python/gudhi/wasserstein/barycenter.py b/src/python/gudhi/wasserstein/barycenter.py new file mode 100644 index 00000000..de7aea81 --- /dev/null +++ b/src/python/gudhi/wasserstein/barycenter.py @@ -0,0 +1,159 @@ +# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. +# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. +# Author(s): Theo Lacombe +# +# Copyright (C) 2019 Inria +# +# Modification(s): +# - YYYY/MM Author: Description of the modification + + +import ot +import numpy as np +import scipy.spatial.distance as sc + +from gudhi.wasserstein import wasserstein_distance + + +def _mean(x, m): + ''' + :param x: a list of 2D-points, off diagonal, x_0... x_{k-1} + :param m: total amount of points taken into account, + that is we have (m-k) copies of diagonal + :returns: the weighted mean of x with (m-k) copies of the diagonal + ''' + k = len(x) + if k > 0: + w = np.mean(x, axis=0) + w_delta = (w[0] + w[1]) / 2 * np.ones(2) + return (k * w + (m-k) * w_delta) / m + else: + return np.array([0, 0]) + + +def lagrangian_barycenter(pdiagset, init=None, verbose=False): + ''' + :param pdiagset: a list of ``numpy.array`` of shape `(n x 2)` + (`n` can variate), encoding a set of + persistence diagrams with only finite coordinates. + :param init: The initial value for barycenter estimate. + If ``None``, init is made on a random diagram from the dataset. + Otherwise, it can be an ``int`` + (then initialization is made on ``pdiagset[init]``) + or a `(n x 2)` ``numpy.array`` enconding + a persistence diagram with `n` points. + :type init: ``int``, or (n x 2) ``np.array`` + :param verbose: if ``True``, returns additional information about the + barycenter. + :type verbose: boolean + :returns: If not verbose (default), a ``numpy.array`` encoding + the barycenter estimate of pdiagset + (local minimum of the energy function). + If ``pdiagset`` is empty, returns ``None``. + If verbose, returns a couple ``(Y, log)`` + where ``Y`` is the barycenter estimate, + and ``log`` is a ``dict`` that contains additional informations: + + - `"groupings"`, a list of list of pairs ``(i,j)``. + Namely, ``G[k] = [...(i, j)...]``, where ``(i,j)`` indicates + that ``pdiagset[k][i]`` is matched to ``Y[j]`` + if ``i = -1`` or ``j = -1``, it means they + represent the diagonal. + + - `"energy"`, ``float`` representing the Frechet energy value obtained. + It is the mean of squared distances of observations to the output. + + - `"nb_iter"`, ``int`` number of iterations performed before convergence of the algorithm. + ''' + X = pdiagset # to shorten notations, not a copy + m = len(X) # number of diagrams we are averaging + if m == 0: + print("Warning: computing barycenter of empty diag set. Returns None") + return None + + # store the number of off-diagonal point for each of the X_i + nb_off_diag = np.array([len(X_i) for X_i in X]) + # Initialisation of barycenter + if init is None: + i0 = np.random.randint(m) # Index of first state for the barycenter + Y = X[i0].copy() + else: + if type(init)==int: + Y = X[init].copy() + else: + Y = init.copy() + + nb_iter = 0 + + converged = False # stoping criterion + while not converged: + nb_iter += 1 + K = len(Y) # current nb of points in Y (some might be on diagonal) + G = np.full((K, m), -1, dtype=int) # will store for each j, the (index) + # point matched in each other diagram + #(might be the diagonal). + # that is G[j, i] = k <=> y_j is matched to + # x_k in the diagram i-th diagram X[i] + updated_points = np.zeros((K, 2)) # will store the new positions of + # the points of Y. + # If points disappear, there thrown + # on [0,0] by default. + new_created_points = [] # will store potential new points. + + # Step 1 : compute optimal matching (Y, X_i) for each X_i + # and create new points in Y if needed + for i in range(m): + _, indices = wasserstein_distance(Y, X[i], matching=True, order=2., internal_p=2.) + for y_j, x_i_j in indices: + if y_j >= 0: # we matched an off diagonal point to x_i_j... + if x_i_j >= 0: # ...which is also an off-diagonal point. + G[y_j, i] = x_i_j + else: # ...which is a diagonal point + G[y_j, i] = -1 # -1 stands for the diagonal (mask) + else: # We matched a diagonal point to x_i_j... + if x_i_j >= 0: # which is a off-diag point ! + # need to create new point in Y + new_y = _mean(np.array([X[i][x_i_j]]), m) + # Average this point with (m-1) copies of Delta + new_created_points.append(new_y) + + # Step 2 : Update current point position thanks to groupings computed + to_delete = [] + for j in range(K): + matched_points = [X[i][G[j, i]] for i in range(m) if G[j, i] > -1] + new_y_j = _mean(matched_points, m) + if not np.array_equal(new_y_j, np.array([0,0])): + updated_points[j] = new_y_j + else: # this points is no longer of any use. + to_delete.append(j) + # we remove the point to be deleted now. + updated_points = np.delete(updated_points, to_delete, axis=0) + + # we cannot converge if there have been new created points. + if new_created_points: + Y = np.concatenate((updated_points, new_created_points)) + else: + # Step 3 : we check convergence + if np.array_equal(updated_points, Y): + converged = True + Y = updated_points + + + if verbose: + groupings = [] + energy = 0 + log = {} + n_y = len(Y) + for i in range(m): + cost, edges = wasserstein_distance(Y, X[i], matching=True, order=2., internal_p=2.) + groupings.append(edges) + energy += cost + log["groupings"] = groupings + energy = energy/m + log["energy"] = energy + log["nb_iter"] = nb_iter + + return Y, log + else: + return Y + diff --git a/src/python/gudhi/wasserstein.py b/src/python/gudhi/wasserstein/wasserstein.py index 3dd993f9..efc851a0 100644 --- a/src/python/gudhi/wasserstein.py +++ b/src/python/gudhi/wasserstein/wasserstein.py @@ -9,11 +9,14 @@ import numpy as np import scipy.spatial.distance as sc + try: import ot except ImportError: print("POT (Python Optimal Transport) package is not installed. Try to run $ conda install -c conda-forge pot ; or $ pip install POT") + +# Currently unused, but Théo says it is likely to be used again. def _proj_on_diag(X): ''' :param X: (n x 2) array encoding the points of a persistent diagram. @@ -23,28 +26,36 @@ def _proj_on_diag(X): return np.array([Z , Z]).T -def _build_dist_matrix(X, Y, order=2., internal_p=2.): +def _dist_to_diag(X, internal_p): + ''' + :param X: (n x 2) array encoding the points of a persistent diagram. + :param internal_p: Ground metric (i.e. norm L^p). + :returns: (n) array encoding the (respective orthogonal) distances of the points to the diagonal + + .. note:: + Assumes that the points are above the diagonal. + ''' + return (X[:, 1] - X[:, 0]) * 2 ** (1.0 / internal_p - 1) + + +def _build_dist_matrix(X, Y, order, internal_p): ''' :param X: (n x 2) numpy.array encoding the (points of the) first diagram. :param Y: (m x 2) numpy.array encoding the second diagram. :param order: exponent for the Wasserstein metric. :param internal_p: Ground metric (i.e. norm L^p). - :returns: (n+1) x (m+1) np.array encoding the cost matrix C. - For 0 <= i < n, 0 <= j < m, C[i,j] encodes the distance between X[i] and Y[j], - while C[i, m] (resp. C[n, j]) encodes the distance (to the p) between X[i] (resp Y[j]) + :returns: (n+1) x (m+1) np.array encoding the cost matrix C. + For 0 <= i < n, 0 <= j < m, C[i,j] encodes the distance between X[i] and Y[j], + while C[i, m] (resp. C[n, j]) encodes the distance (to the p) between X[i] (resp Y[j]) and its orthogonal projection onto the diagonal. note also that C[n, m] = 0 (it costs nothing to move from the diagonal to the diagonal). ''' - Xdiag = _proj_on_diag(X) - Ydiag = _proj_on_diag(Y) + Cxd = _dist_to_diag(X, internal_p)**order + Cdy = _dist_to_diag(Y, internal_p)**order if np.isinf(internal_p): C = sc.cdist(X,Y, metric='chebyshev')**order - Cxd = np.linalg.norm(X - Xdiag, ord=internal_p, axis=1)**order - Cdy = np.linalg.norm(Y - Ydiag, ord=internal_p, axis=1)**order else: C = sc.cdist(X,Y, metric='minkowski', p=internal_p)**order - Cxd = np.linalg.norm(X - Xdiag, ord=internal_p, axis=1)**order - Cdy = np.linalg.norm(Y - Ydiag, ord=internal_p, axis=1)**order Cf = np.hstack((C, Cxd[:,None])) Cdy = np.append(Cdy, 0) @@ -58,24 +69,23 @@ def _perstot(X, order, internal_p): :param X: (n x 2) numpy.array (points of a given diagram). :param order: exponent for Wasserstein. Default value is 2. :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); Default value is 2 (Euclidean norm). - :returns: float, the total persistence of the diagram (that is, its distance to the empty diagram). + :returns: float, the total persistence of the diagram (that is, its distance to the empty diagram). ''' - Xdiag = _proj_on_diag(X) - return (np.sum(np.linalg.norm(X - Xdiag, ord=internal_p, axis=1)**order))**(1./order) + return np.linalg.norm(_dist_to_diag(X, internal_p), ord=order) def wasserstein_distance(X, Y, matching=False, order=2., internal_p=2.): ''' - :param X: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points + :param X: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points (i.e. with infinite coordinate). :param Y: (m x 2) numpy.array encoding the second diagram. :param matching: if True, computes and returns the optimal matching between X and Y, encoded as a (n x 2) np.array [...[i,j]...], meaning the i-th point in X is matched to the j-th point in Y, with the convention (-1) represents the diagonal. :param order: exponent for Wasserstein; Default value is 2. - :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); + :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); Default value is 2 (Euclidean norm). - :returns: the Wasserstein distance of order q (1 <= q < infinity) between persistence diagrams with + :returns: the Wasserstein distance of order q (1 <= q < infinity) between persistence diagrams with respect to the internal_p-norm as ground metric. If matching is set to True, also returns the optimal matching between X and Y. ''' diff --git a/src/python/include/Alpha_complex_interface.h b/src/python/include/Alpha_complex_interface.h index 8614eee3..40de88f3 100644 --- a/src/python/include/Alpha_complex_interface.h +++ b/src/python/include/Alpha_complex_interface.h @@ -58,7 +58,6 @@ class Alpha_complex_interface { void create_simplex_tree(Simplex_tree_interface<>* simplex_tree, double max_alpha_square) { alpha_complex_->create_complex(*simplex_tree, max_alpha_square); - simplex_tree->initialize_filtration(); } private: diff --git a/src/python/include/Euclidean_strong_witness_complex_interface.h b/src/python/include/Euclidean_strong_witness_complex_interface.h index c1c72737..f94c51ef 100644 --- a/src/python/include/Euclidean_strong_witness_complex_interface.h +++ b/src/python/include/Euclidean_strong_witness_complex_interface.h @@ -50,12 +50,10 @@ class Euclidean_strong_witness_complex_interface { void create_simplex_tree(Gudhi::Simplex_tree<>* simplex_tree, double max_alpha_square, std::size_t limit_dimension) { witness_complex_->create_complex(*simplex_tree, max_alpha_square, limit_dimension); - simplex_tree->initialize_filtration(); } void create_simplex_tree(Gudhi::Simplex_tree<>* simplex_tree, double max_alpha_square) { witness_complex_->create_complex(*simplex_tree, max_alpha_square); - simplex_tree->initialize_filtration(); } std::vector<double> get_point(unsigned vh) { diff --git a/src/python/include/Euclidean_witness_complex_interface.h b/src/python/include/Euclidean_witness_complex_interface.h index 5d7dbdc2..4411ae79 100644 --- a/src/python/include/Euclidean_witness_complex_interface.h +++ b/src/python/include/Euclidean_witness_complex_interface.h @@ -49,12 +49,10 @@ class Euclidean_witness_complex_interface { void create_simplex_tree(Gudhi::Simplex_tree<>* simplex_tree, double max_alpha_square, std::size_t limit_dimension) { witness_complex_->create_complex(*simplex_tree, max_alpha_square, limit_dimension); - simplex_tree->initialize_filtration(); } void create_simplex_tree(Gudhi::Simplex_tree<>* simplex_tree, double max_alpha_square) { witness_complex_->create_complex(*simplex_tree, max_alpha_square); - simplex_tree->initialize_filtration(); } std::vector<double> get_point(unsigned vh) { diff --git a/src/python/include/Nerve_gic_interface.h b/src/python/include/Nerve_gic_interface.h index 5e7f8ae6..ab14c318 100644 --- a/src/python/include/Nerve_gic_interface.h +++ b/src/python/include/Nerve_gic_interface.h @@ -29,7 +29,6 @@ class Nerve_gic_interface : public Cover_complex<std::vector<double>> { public: void create_simplex_tree(Simplex_tree_interface<>* simplex_tree) { create_complex(*simplex_tree); - simplex_tree->initialize_filtration(); } void set_cover_from_Euclidean_Voronoi(int m) { set_cover_from_Voronoi(Gudhi::Euclidean_distance(), m); diff --git a/src/python/include/Persistent_cohomology_interface.h b/src/python/include/Persistent_cohomology_interface.h index 3074389c..0de9bd5c 100644 --- a/src/python/include/Persistent_cohomology_interface.h +++ b/src/python/include/Persistent_cohomology_interface.h @@ -42,21 +42,19 @@ persistent_cohomology::Persistent_cohomology<FilteredComplex, persistent_cohomol }; public: - Persistent_cohomology_interface(FilteredComplex* stptr) - : Base(*stptr), - stptr_(stptr) { } - - Persistent_cohomology_interface(FilteredComplex* stptr, bool persistence_dim_max) + Persistent_cohomology_interface(FilteredComplex* stptr, bool persistence_dim_max=false) : Base(*stptr, persistence_dim_max), stptr_(stptr) { } - std::vector<std::pair<int, std::pair<double, double>>> get_persistence(int homology_coeff_field, - double min_persistence) { + // TODO: move to the constructors? + void compute_persistence(int homology_coeff_field, double min_persistence) { Base::init_coefficients(homology_coeff_field); Base::compute_persistent_cohomology(min_persistence); + } - auto const& persistent_pairs = Base::get_persistent_pairs(); + std::vector<std::pair<int, std::pair<double, double>>> get_persistence() { std::vector<std::pair<int, std::pair<double, double>>> persistence; + auto const& persistent_pairs = Base::get_persistent_pairs(); persistence.reserve(persistent_pairs.size()); for (auto pair : persistent_pairs) { persistence.emplace_back(stptr_->dimension(get<0>(pair)), diff --git a/src/python/include/Rips_complex_interface.h b/src/python/include/Rips_complex_interface.h index a66b0e5b..d98b0226 100644 --- a/src/python/include/Rips_complex_interface.h +++ b/src/python/include/Rips_complex_interface.h @@ -53,7 +53,6 @@ class Rips_complex_interface { rips_complex_->create_complex(*simplex_tree, dim_max); else sparse_rips_complex_->create_complex(*simplex_tree, dim_max); - simplex_tree->initialize_filtration(); } private: diff --git a/src/python/include/Simplex_tree_interface.h b/src/python/include/Simplex_tree_interface.h index 4a7062d6..56d7c41d 100644 --- a/src/python/include/Simplex_tree_interface.h +++ b/src/python/include/Simplex_tree_interface.h @@ -16,8 +16,6 @@ #include <gudhi/Simplex_tree.h> #include <gudhi/Points_off_io.h> -#include "Persistent_cohomology_interface.h" - #include <iostream> #include <vector> #include <utility> // std::pair @@ -37,18 +35,25 @@ class Simplex_tree_interface : public Simplex_tree<SimplexTreeOptions> { using Filtered_simplices = std::vector<Simplex_and_filtration>; using Skeleton_simplex_iterator = typename Base::Skeleton_simplex_iterator; using Complex_simplex_iterator = typename Base::Complex_simplex_iterator; + using Extended_filtration_data = typename Base::Extended_filtration_data; public: - bool find_simplex(const Simplex& vh) { - return (Base::find(vh) != Base::null_simplex()); + + Extended_filtration_data efd; + + bool find_simplex(const Simplex& simplex) { + return (Base::find(simplex) != Base::null_simplex()); } - void assign_simplex_filtration(const Simplex& vh, Filtration_value filtration) { - Base::assign_filtration(Base::find(vh), filtration); + void assign_simplex_filtration(const Simplex& simplex, Filtration_value filtration) { + Base::assign_filtration(Base::find(simplex), filtration); + Base::clear_filtration(); } bool insert(const Simplex& simplex, Filtration_value filtration = 0) { Insertion_result result = Base::insert_simplex_and_subfaces(simplex, filtration); + if (result.first != Base::null_simplex()) + Base::clear_filtration(); return (result.second); } @@ -82,7 +87,7 @@ class Simplex_tree_interface : public Simplex_tree<SimplexTreeOptions> { void remove_maximal_simplex(const Simplex& simplex) { Base::remove_maximal_simplex(Base::find(simplex)); - Base::initialize_filtration(); + Base::clear_filtration(); } Simplex_and_filtration get_simplex_and_filtration(Simplex_handle f_simplex) { @@ -117,9 +122,39 @@ class Simplex_tree_interface : public Simplex_tree<SimplexTreeOptions> { return cofaces; } - void create_persistence(Gudhi::Persistent_cohomology_interface<Base>* pcoh) { - Base::initialize_filtration(); - pcoh = new Gudhi::Persistent_cohomology_interface<Base>(*this); + void compute_extended_filtration() { + this->efd = this->extend_filtration(); + return; + } + + std::vector<std::vector<std::pair<int, std::pair<Filtration_value, Filtration_value>>>> compute_extended_persistence_subdiagrams(const std::vector<std::pair<int, std::pair<Filtration_value, Filtration_value>>>& dgm, Filtration_value min_persistence){ + std::vector<std::vector<std::pair<int, std::pair<Filtration_value, Filtration_value>>>> new_dgm(4); + for (unsigned int i = 0; i < dgm.size(); i++){ + std::pair<Filtration_value, Extended_simplex_type> px = this->decode_extended_filtration(dgm[i].second.first, this->efd); + std::pair<Filtration_value, Extended_simplex_type> py = this->decode_extended_filtration(dgm[i].second.second, this->efd); + std::pair<int, std::pair<Filtration_value, Filtration_value>> pd_point = std::make_pair(dgm[i].first, std::make_pair(px.first, py.first)); + if(std::abs(px.first - py.first) > min_persistence){ + //Ordinary + if (px.second == Extended_simplex_type::UP && py.second == Extended_simplex_type::UP){ + new_dgm[0].push_back(pd_point); + } + // Relative + else if (px.second == Extended_simplex_type::DOWN && py.second == Extended_simplex_type::DOWN){ + new_dgm[1].push_back(pd_point); + } + else{ + // Extended+ + if (px.first < py.first){ + new_dgm[2].push_back(pd_point); + } + //Extended- + else{ + new_dgm[3].push_back(pd_point); + } + } + } + } + return new_dgm; } // Iterator over the simplex tree diff --git a/src/python/include/Strong_witness_complex_interface.h b/src/python/include/Strong_witness_complex_interface.h index cda5b514..e9ab0c7b 100644 --- a/src/python/include/Strong_witness_complex_interface.h +++ b/src/python/include/Strong_witness_complex_interface.h @@ -41,13 +41,11 @@ class Strong_witness_complex_interface { void create_simplex_tree(Simplex_tree_interface<>* simplex_tree, double max_alpha_square, std::size_t limit_dimension) { witness_complex_->create_complex(*simplex_tree, max_alpha_square, limit_dimension); - simplex_tree->initialize_filtration(); } void create_simplex_tree(Simplex_tree_interface<>* simplex_tree, double max_alpha_square) { witness_complex_->create_complex(*simplex_tree, max_alpha_square); - simplex_tree->initialize_filtration(); } private: diff --git a/src/python/include/Tangential_complex_interface.h b/src/python/include/Tangential_complex_interface.h index 698226cc..b1afce94 100644 --- a/src/python/include/Tangential_complex_interface.h +++ b/src/python/include/Tangential_complex_interface.h @@ -90,7 +90,6 @@ class Tangential_complex_interface { void create_simplex_tree(Simplex_tree<>* simplex_tree) { tangential_complex_->create_complex<Gudhi::Simplex_tree<Gudhi::Simplex_tree_options_full_featured>>(*simplex_tree); - simplex_tree->initialize_filtration(); } void set_max_squared_edge_length(double max_squared_edge_length) { diff --git a/src/python/include/Witness_complex_interface.h b/src/python/include/Witness_complex_interface.h index 45e14253..76947e53 100644 --- a/src/python/include/Witness_complex_interface.h +++ b/src/python/include/Witness_complex_interface.h @@ -41,13 +41,11 @@ class Witness_complex_interface { void create_simplex_tree(Simplex_tree_interface<>* simplex_tree, double max_alpha_square, std::size_t limit_dimension) { witness_complex_->create_complex(*simplex_tree, max_alpha_square, limit_dimension); - simplex_tree->initialize_filtration(); } void create_simplex_tree(Simplex_tree_interface<>* simplex_tree, double max_alpha_square) { witness_complex_->create_complex(*simplex_tree, max_alpha_square); - simplex_tree->initialize_filtration(); } private: diff --git a/src/python/test/test_dtm.py b/src/python/test/test_dtm.py new file mode 100755 index 00000000..bff4c267 --- /dev/null +++ b/src/python/test/test_dtm.py @@ -0,0 +1,68 @@ +""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. + See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. + Author(s): Marc Glisse + + Copyright (C) 2020 Inria + + Modification(s): + - YYYY/MM Author: Description of the modification +""" + +from gudhi.point_cloud.dtm import DistanceToMeasure +import numpy +import pytest +import torch + + +def test_dtm_compare_euclidean(): + pts = numpy.random.rand(1000, 4) + k = 6 + dtm = DistanceToMeasure(k, implementation="ckdtree") + r0 = dtm.fit_transform(pts) + dtm = DistanceToMeasure(k, implementation="sklearn") + r1 = dtm.fit_transform(pts) + assert r1 == pytest.approx(r0) + dtm = DistanceToMeasure(k, implementation="sklearn", algorithm="brute") + r2 = dtm.fit_transform(pts) + assert r2 == pytest.approx(r0) + dtm = DistanceToMeasure(k, implementation="hnsw") + r3 = dtm.fit_transform(pts) + assert r3 == pytest.approx(r0, rel=0.1) + from scipy.spatial.distance import cdist + + d = cdist(pts, pts) + dtm = DistanceToMeasure(k, metric="precomputed") + r4 = dtm.fit_transform(d) + assert r4 == pytest.approx(r0) + dtm = DistanceToMeasure(k, metric="precomputed", n_jobs=2) + r4b = dtm.fit_transform(d) + assert r4b == pytest.approx(r0) + dtm = DistanceToMeasure(k, implementation="keops") + r5 = dtm.fit_transform(pts) + assert r5 == pytest.approx(r0) + pts2 = torch.tensor(pts, requires_grad=True) + assert pts2.grad is None + dtm = DistanceToMeasure(k, implementation="keops", enable_autodiff=True) + r6 = dtm.fit_transform(pts2) + assert r6.detach().numpy() == pytest.approx(r0) + r6.sum().backward() + assert not torch.isnan(pts2.grad).any() + pts2 = torch.tensor(pts, requires_grad=True) + assert pts2.grad is None + dtm = DistanceToMeasure(k, implementation="ckdtree", enable_autodiff=True) + r7 = dtm.fit_transform(pts2) + assert r7.detach().numpy() == pytest.approx(r0) + r7.sum().backward() + assert not torch.isnan(pts2.grad).any() + + +def test_dtm_precomputed(): + dist = numpy.array([[1.0, 3, 8], [1, 5, 5], [0, 2, 3]]) + dtm = DistanceToMeasure(2, q=1, metric="neighbors") + r = dtm.fit_transform(dist) + assert r == pytest.approx([2.0, 3, 1]) + + dist = numpy.array([[2.0, 2], [0, 1], [3, 4]]) + dtm = DistanceToMeasure(2, q=2, metric="neighbors") + r = dtm.fit_transform(dist) + assert r == pytest.approx([2.0, 0.707, 3.5355], rel=0.01) diff --git a/src/python/test/test_knn.py b/src/python/test/test_knn.py new file mode 100755 index 00000000..a87ec212 --- /dev/null +++ b/src/python/test/test_knn.py @@ -0,0 +1,130 @@ +""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. + See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. + Author(s): Marc Glisse + + Copyright (C) 2020 Inria + + Modification(s): + - YYYY/MM Author: Description of the modification +""" + +from gudhi.point_cloud.knn import KNearestNeighbors +import numpy as np +import pytest + + +def test_knn_explicit(): + base = np.array([[1.0, 1], [1, 2], [4, 2], [4, 3]]) + query = np.array([[1.0, 1], [2, 2], [4, 4]]) + knn = KNearestNeighbors(2, metric="manhattan", return_distance=True, return_index=True) + knn.fit(base) + r = knn.transform(query) + assert r[0] == pytest.approx(np.array([[0, 1], [1, 0], [3, 2]])) + assert r[1] == pytest.approx(np.array([[0.0, 1], [1, 2], [1, 2]])) + + knn = KNearestNeighbors(2, metric="chebyshev", return_distance=True, return_index=False) + knn.fit(base) + r = knn.transform(query) + assert r == pytest.approx(np.array([[0.0, 1], [1, 1], [1, 2]])) + r = ( + KNearestNeighbors(2, metric="chebyshev", return_distance=True, return_index=False, implementation="keops") + .fit(base) + .transform(query) + ) + assert r == pytest.approx(np.array([[0.0, 1], [1, 1], [1, 2]])) + r = ( + KNearestNeighbors(2, metric="chebyshev", return_distance=True, return_index=False, implementation="keops", enable_autodiff=True) + .fit(base) + .transform(query) + ) + assert r == pytest.approx(np.array([[0.0, 1], [1, 1], [1, 2]])) + + knn = KNearestNeighbors(2, metric="minkowski", p=3, return_distance=False, return_index=True) + knn.fit(base) + r = knn.transform(query) + assert np.array_equal(r, [[0, 1], [1, 0], [3, 2]]) + r = ( + KNearestNeighbors(2, metric="minkowski", p=3, return_distance=False, return_index=True, implementation="keops") + .fit(base) + .transform(query) + ) + assert np.array_equal(r, [[0, 1], [1, 0], [3, 2]]) + + dist = np.array([[0.0, 3, 8], [1, 0, 5], [1, 2, 0]]) + knn = KNearestNeighbors(2, metric="precomputed", return_index=True, return_distance=False) + r = knn.fit_transform(dist) + assert np.array_equal(r, [[0, 1], [1, 0], [2, 0]]) + knn = KNearestNeighbors(2, metric="precomputed", return_index=True, return_distance=True, sort_results=True) + r = knn.fit_transform(dist) + assert np.array_equal(r[0], [[0, 1], [1, 0], [2, 0]]) + assert np.array_equal(r[1], [[0, 3], [0, 1], [0, 1]]) + # Second time in parallel + knn = KNearestNeighbors(2, metric="precomputed", return_index=True, return_distance=False, n_jobs=2, sort_results=True) + r = knn.fit_transform(dist) + assert np.array_equal(r, [[0, 1], [1, 0], [2, 0]]) + knn = KNearestNeighbors(2, metric="precomputed", return_index=True, return_distance=True, n_jobs=2) + r = knn.fit_transform(dist) + assert np.array_equal(r[0], [[0, 1], [1, 0], [2, 0]]) + assert np.array_equal(r[1], [[0, 3], [0, 1], [0, 1]]) + + +def test_knn_compare(): + base = np.array([[1.0, 1], [1, 2], [4, 2], [4, 3]]) + query = np.array([[1.0, 1], [2, 2], [4, 4]]) + r0 = ( + KNearestNeighbors(2, implementation="ckdtree", return_index=True, return_distance=False) + .fit(base) + .transform(query) + ) + r1 = ( + KNearestNeighbors(2, implementation="sklearn", return_index=True, return_distance=False) + .fit(base) + .transform(query) + ) + r2 = ( + KNearestNeighbors(2, implementation="hnsw", return_index=True, return_distance=False).fit(base).transform(query) + ) + r3 = ( + KNearestNeighbors(2, implementation="keops", return_index=True, return_distance=False) + .fit(base) + .transform(query) + ) + assert np.array_equal(r0, r1) and np.array_equal(r0, r2) and np.array_equal(r0, r3) + + r0 = ( + KNearestNeighbors(2, implementation="ckdtree", return_index=True, return_distance=True) + .fit(base) + .transform(query) + ) + r1 = ( + KNearestNeighbors(2, implementation="sklearn", return_index=True, return_distance=True) + .fit(base) + .transform(query) + ) + r2 = KNearestNeighbors(2, implementation="hnsw", return_index=True, return_distance=True).fit(base).transform(query) + r3 = ( + KNearestNeighbors(2, implementation="keops", return_index=True, return_distance=True).fit(base).transform(query) + ) + assert np.array_equal(r0[0], r1[0]) and np.array_equal(r0[0], r2[0]) and np.array_equal(r0[0], r3[0]) + d0 = pytest.approx(r0[1]) + assert r1[1] == d0 and r2[1] == d0 and r3[1] == d0 + + +def test_knn_nop(): + # This doesn't look super useful... + p = np.array([[0.0]]) + assert None is KNearestNeighbors( + k=1, return_index=False, return_distance=False, implementation="sklearn" + ).fit_transform(p) + assert None is KNearestNeighbors( + k=1, return_index=False, return_distance=False, implementation="ckdtree" + ).fit_transform(p) + assert None is KNearestNeighbors( + k=1, return_index=False, return_distance=False, implementation="hnsw", ef=5 + ).fit_transform(p) + assert None is KNearestNeighbors( + k=1, return_index=False, return_distance=False, implementation="keops" + ).fit_transform(p) + assert None is KNearestNeighbors( + k=1, return_index=False, return_distance=False, metric="precomputed" + ).fit_transform(p) diff --git a/src/python/test/test_simplex_tree.py b/src/python/test/test_simplex_tree.py index f7848379..2137d822 100755 --- a/src/python/test/test_simplex_tree.py +++ b/src/python/test/test_simplex_tree.py @@ -9,6 +9,7 @@ """ from gudhi import SimplexTree +import pytest __author__ = "Vincent Rouvreau" __copyright__ = "Copyright (C) 2016 Inria" @@ -45,7 +46,6 @@ def test_insertion(): assert st.find([2, 3]) == False # filtration test - st.initialize_filtration() assert st.filtration([0, 1, 2]) == 4.0 assert st.filtration([0, 2]) == 4.0 assert st.filtration([1, 2]) == 4.0 @@ -92,7 +92,6 @@ def test_insertion(): assert st.find([1]) == True assert st.find([2]) == True - st.initialize_filtration() assert st.persistence(persistence_dim_max=True) == [ (1, (4.0, float("inf"))), (0, (0.0, float("inf"))), @@ -150,7 +149,6 @@ def test_expansion(): st.expansion(3) assert st.num_vertices() == 7 assert st.num_simplices() == 22 - st.initialize_filtration() assert list(st.get_filtration()) == [ ([2], 0.1), @@ -250,6 +248,87 @@ def test_make_filtration_non_decreasing(): assert st.filtration([3, 4]) == 2.0 assert st.filtration([4, 5]) == 2.0 +def test_extend_filtration(): + + # Inserted simplex: + # 5 4 + # o o + # / \ / + # o o + # /2\ /3 + # o o + # 1 0 + + st = SimplexTree() + st.insert([0,2]) + st.insert([1,2]) + st.insert([0,3]) + st.insert([2,5]) + st.insert([3,4]) + st.insert([3,5]) + st.assign_filtration([0], 1.) + st.assign_filtration([1], 2.) + st.assign_filtration([2], 3.) + st.assign_filtration([3], 4.) + st.assign_filtration([4], 5.) + st.assign_filtration([5], 6.) + + assert list(st.get_filtration()) == [ + ([0, 2], 0.0), + ([1, 2], 0.0), + ([0, 3], 0.0), + ([3, 4], 0.0), + ([2, 5], 0.0), + ([3, 5], 0.0), + ([0], 1.0), + ([1], 2.0), + ([2], 3.0), + ([3], 4.0), + ([4], 5.0), + ([5], 6.0) + ] + + st.extend_filtration() + + assert list(st.get_filtration()) == [ + ([6], -3.0), + ([0], -2.0), + ([1], -1.8), + ([2], -1.6), + ([0, 2], -1.6), + ([1, 2], -1.6), + ([3], -1.4), + ([0, 3], -1.4), + ([4], -1.2), + ([3, 4], -1.2), + ([5], -1.0), + ([2, 5], -1.0), + ([3, 5], -1.0), + ([5, 6], 1.0), + ([4, 6], 1.2), + ([3, 6], 1.4), + ([3, 4, 6], 1.4), + ([3, 5, 6], 1.4), + ([2, 6], 1.6), + ([2, 5, 6], 1.6), + ([1, 6], 1.8), + ([1, 2, 6], 1.8), + ([0, 6], 2.0), + ([0, 2, 6], 2.0), + ([0, 3, 6], 2.0) + ] + + dgms = st.extended_persistence(min_persistence=-1.) + + assert dgms[0][0][1][0] == pytest.approx(2.) + assert dgms[0][0][1][1] == pytest.approx(3.) + assert dgms[1][0][1][0] == pytest.approx(5.) + assert dgms[1][0][1][1] == pytest.approx(4.) + assert dgms[2][0][1][0] == pytest.approx(1.) + assert dgms[2][0][1][1] == pytest.approx(6.) + assert dgms[3][0][1][0] == pytest.approx(6.) + assert dgms[3][0][1][1] == pytest.approx(1.) + def test_simplices_iterator(): st = SimplexTree() diff --git a/src/python/test/test_wasserstein_barycenter.py b/src/python/test/test_wasserstein_barycenter.py new file mode 100755 index 00000000..f68c748e --- /dev/null +++ b/src/python/test/test_wasserstein_barycenter.py @@ -0,0 +1,46 @@ +from gudhi.wasserstein.barycenter import lagrangian_barycenter +import numpy as np + +""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. + See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. + Author(s): Theo Lacombe + + Copyright (C) 2019 Inria + + Modification(s): + - YYYY/MM Author: Description of the modification +""" + +__author__ = "Theo Lacombe" +__copyright__ = "Copyright (C) 2019 Inria" +__license__ = "MIT" + + +def test_lagrangian_barycenter(): + + dg1 = np.array([[0.2, 0.5]]) + dg2 = np.array([[0.2, 0.7]]) + dg3 = np.array([[0.3, 0.6], [0.7, 0.8], [0.2, 0.3]]) + dg4 = np.array([]) + dg5 = np.array([]) + dg6 = np.array([]) + res = np.array([[0.27916667, 0.55416667], [0.7375, 0.7625], [0.2375, 0.2625]]) + + dg7 = np.array([[0.1, 0.15], [0.1, 0.7], [0.2, 0.22], [0.55, 0.84], [0.11, 0.91], [0.61, 0.75], [0.33, 0.46], [0.12, 0.41], [0.32, 0.48]]) + dg8 = np.array([[0., 4.], [4, 8]]) + + # error crit. + eps = 1e-7 + + + assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg1, dg2, dg3, dg4],init=3, verbose=False) - res) < eps + assert np.array_equal(lagrangian_barycenter(pdiagset=[dg4, dg5, dg6], verbose=False), np.empty(shape=(0,2))) + assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg7], verbose=False) - dg7) < eps + Y, log = lagrangian_barycenter(pdiagset=[dg4, dg8], verbose=True) + assert np.linalg.norm(Y - np.array([[1,3], [5, 7]])) < eps + assert np.abs(log["energy"] - 2) < eps + assert np.array_equal(log["groupings"][0] , np.array([[0, -1], [1, -1]])) + assert np.array_equal(log["groupings"][1] , np.array([[0, 0], [1, 1]])) + assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg8, dg4], init=np.array([[0.2, 0.6], [0.5, 0.7]]), verbose=False) - np.array([[1, 3], [5, 7]])) < eps + assert lagrangian_barycenter(pdiagset = []) is None + diff --git a/src/python/test/test_wasserstein_distance.py b/src/python/test/test_wasserstein_distance.py index 0d70e11a..1a4acc1d 100755 --- a/src/python/test/test_wasserstein_distance.py +++ b/src/python/test/test_wasserstein_distance.py @@ -8,6 +8,7 @@ - YYYY/MM Author: Description of the modification """ +from gudhi.wasserstein.wasserstein import _proj_on_diag from gudhi.wasserstein import wasserstein_distance as pot from gudhi.hera import wasserstein_distance as hera import numpy as np @@ -17,6 +18,12 @@ __author__ = "Theo Lacombe" __copyright__ = "Copyright (C) 2019 Inria" __license__ = "MIT" +def test_proj_on_diag(): + dgm = np.array([[1., 1.], [1., 2.], [3., 5.]]) + assert np.array_equal(_proj_on_diag(dgm), [[1., 1.], [1.5, 1.5], [4., 4.]]) + empty = np.empty((0, 2)) + assert np.array_equal(_proj_on_diag(empty), empty) + def _basic_wasserstein(wasserstein_distance, delta, test_infinity=True, test_matching=True): diag1 = np.array([[2.7, 3.7], [9.6, 14.0], [34.2, 34.974]]) diag2 = np.array([[2.8, 4.45], [9.5, 14.1]]) @@ -70,7 +77,7 @@ def _basic_wasserstein(wasserstein_distance, delta, test_infinity=True, test_mat assert np.array_equal(match , [[0, -1], [1, -1]]) match = wasserstein_distance(diag1, diag2, matching=True, internal_p=2., order=2.)[1] assert np.array_equal(match, [[0, 0], [1, 1], [2, -1]]) - + def hera_wrap(delta): |