diff options
-rw-r--r-- | biblio/bibliography.bib | 8 | ||||
-rw-r--r-- | src/Collapse/doc/intro_edge_collapse.h | 72 | ||||
-rw-r--r-- | src/Collapse/include/gudhi/Flag_complex_edge_collapser.h | 579 | ||||
-rw-r--r-- | src/Collapse/test/collapse_unit_test.cpp | 16 | ||||
-rw-r--r-- | src/common/doc/main_page.md | 7 | ||||
-rw-r--r-- | src/python/CMakeLists.txt | 6 | ||||
-rw-r--r-- | src/python/gudhi/__init__.py.in | 4 | ||||
-rw-r--r-- | src/python/gudhi/simplex_tree.pyx | 15 | ||||
-rw-r--r-- | src/python/include/Simplex_tree_interface.h | 8 | ||||
-rwxr-xr-x | src/python/test/test_simplex_tree.py | 17 |
10 files changed, 325 insertions, 407 deletions
diff --git a/biblio/bibliography.bib b/biblio/bibliography.bib index e75e8db2..ec8772aa 100644 --- a/biblio/bibliography.bib +++ b/biblio/bibliography.bib @@ -1347,6 +1347,12 @@ doi="10.1007/978-3-030-43408-3_2", annote = {Keywords: Computational Topology, Topological Data Analysis, Edge Collapse, Simple Collapse, Persistent homology} } +@misc{edgecollapsearxiv, + author = {Marc Glisse and Siddharth Pritam}, + title = {{Swap, Shift and Trim to Edge Collapse a Filtration}}, + url = {https://arxiv.org/abs/2203.07022}, +} + @phdthesis{KachanovichThesis, TITLE = {{Meshing submanifolds using Coxeter triangulations}}, AUTHOR = {Kachanovich, Siargey}, @@ -1360,4 +1366,4 @@ doi="10.1007/978-3-030-43408-3_2", PDF = {https://hal.inria.fr/tel-02419148v2/file/2019AZUR4072.pdf}, HAL_ID = {tel-02419148}, HAL_VERSION = {v2}, -}
\ No newline at end of file +} diff --git a/src/Collapse/doc/intro_edge_collapse.h b/src/Collapse/doc/intro_edge_collapse.h index fde39707..12e909c8 100644 --- a/src/Collapse/doc/intro_edge_collapse.h +++ b/src/Collapse/doc/intro_edge_collapse.h @@ -17,68 +17,48 @@ namespace collapse { /** \defgroup edge_collapse Edge collapse * - * \author Siddharth Pritam + * \author Siddharth Pritam and Marc Glisse * * @{ * - * This module implements edge collapse of a filtered flag complex, in particular it reduces a filtration of - * Vietoris-Rips complex from its graph to another smaller flag filtration with same persistence. - * Where a filtration is a sequence of simplicial (here Rips) complexes connected with inclusions. + * This module implements edge collapse of a filtered flag complex as described in \cite edgecollapsearxiv, in + * particular it reduces a filtration of Vietoris-Rips complex represented by a graph to a smaller flag filtration with + * the same persistent homology. * * \section edge_collapse_definition Edge collapse definition * * An edge \f$e\f$ in a simplicial complex \f$K\f$ is called a <b>dominated edge</b> if the link of \f$e\f$ in * \f$K\f$, \f$lk_K(e)\f$ is a simplicial cone, that is, there exists a vertex \f$v^{\prime} \notin e\f$ and a - * subcomplex \f$L\f$ in \f$K\f$, such that \f$lk_K(e) = v^{\prime}L\f$. We say that the vertex \f$v^{\prime}\f$ is - * {dominating} \f$e\f$ and \f$e\f$ is {dominated} by \f$v^{\prime}\f$. - * An <b> elementary egde collapse </b> is the removal of a dominated edge \f$e\f$ from \f$K\f$, - * which we denote with \f$K\f$ \f${\searrow\searrow}^1 \f$ \f$K\setminus e\f$. - * The symbol \f$\mathbf{K\setminus e}\f$ (deletion of \f$e\f$ from \f$K\f$) refers to the subcomplex of \f$K\f$ which - * has all simplices of \f$K\f$ except \f$e\f$ and the ones containing \f$e\f$. - * There is an <b>edge collapse</b> from a simplicial complex \f$K\f$ to its subcomplex \f$L\f$, - * if there exists a series of elementary edge collapses from \f$K\f$ to \f$L\f$, denoted as \f$K\f$ - * \f${\searrow\searrow}\f$ \f$L\f$. - * - * An edge collapse is a homotopy preserving operation, and it can be further expressed as sequence of the classical - * elementary simple collapse. - * A complex without any dominated edge is called a \f$1\f$- minimal complex and the core \f$K^1\f$ of simplicial - * complex is a minimal complex such that \f$K\f$ \f${\searrow\searrow}\f$ \f$K^1\f$. - * Computation of a core (not unique) involves computation of dominated edges and the dominated edges can be easily - * characterized as follows: + * subcomplex \f$L\f$ in \f$K\f$, such that \f$lk_K(e) = v^{\prime}L\f$. We say that the vertex \f$v^{\prime}\f$ + * \e dominates \f$e\f$ and \f$e\f$ is \e dominated by \f$v^{\prime}\f$. + * An <b> elementary edge collapse </b> is the removal of a dominated edge \f$e\f$ from \f$K\f$ (the cofaces of \f$e\f$ + * are implicitly removed as well). + * Domination is used as a simple sufficient condition that ensures that this removal is a homotopy preserving + * operation. + * + * The dominated edges can be easily characterized as follows: * - * -- For general simplicial complex: An edge \f$e \in K\f$ is dominated by another vertex \f$v^{\prime} \in K\f$, - * <i>if and only if</i> all the maximal simplices of \f$K\f$ that contain \f$e\f$ also contain \f$v^{\prime}\f$ + * -- For a general simplicial complex: an edge \f$e \in K\f$ is dominated by another vertex \f$v^{\prime} \in K\f$, + * if and only if all the maximal simplices of \f$K\f$ that contain \f$e\f$ also contain \f$v^{\prime}\f$. * - * -- For a flag complex: An edge \f$e \in K\f$ is dominated by another vertex \f$v^{\prime} \in K\f$, <i>if and only - * if</i> all the vertices in \f$K\f$ that has an edge with both vertices of \f$e\f$ also has an edge with - * \f$v^{\prime}\f$. + * -- For a flag complex: an edge \f$e \in K\f$ is dominated by another vertex \f$v^{\prime} \in K\f$, if and only + * if all the vertices in \f$K\f$ that have an edge with both vertices of \f$e\f$ also have an edge with + * \f$v^{\prime}\f$. Notice that this only depends on the graph. * - * The algorithm to compute the smaller induced filtration is described in Section 5 \cite edgecollapsesocg2020. - * Edge collapse can be successfully employed to reduce any given filtration of flag complexes to a smaller induced + * In the context of a filtration, an edge collapse may translate into an increase of the filtration value of an edge, + * or its removal if it already had the largest filtration value. + * The algorithm to compute the smaller induced filtration is described in \cite edgecollapsearxiv. + * Edge collapse can be successfully employed to reduce any input filtration of flag complexes to a smaller induced * filtration which preserves the persistent homology of the original filtration and is a flag complex as well. * - * The general idea is that we consider edges in the filtered graph and sort them according to their filtration value - * giving them a total order. - * Each edge gets a unique index denoted as \f$i\f$ in this order. To reduce the filtration, we move forward with - * increasing filtration value - * in the graph and check if the current edge \f$e_i\f$ is dominated in the current graph \f$G_i := \{e_1, .. e_i\} \f$ - * or not. - * If the edge \f$e_i\f$ is dominated we remove it from the filtration and move forward to the next edge \f$e_{i+1}\f$. - * If \f$e_i\f$ is non-dominated then we keep it in the reduced filtration and then go backward in the current graph - * \f$G_i\f$ to look for new non-dominated edges that was dominated before but might become non-dominated at this - * point. - * If an edge \f$e_j, j < i \f$ during the backward search is found to be non-dominated, we include \f$e_j\f$ in to the - * reduced filtration and we set its new filtration value to be \f$i\f$ that is the index of \f$e_i\f$. - * The precise mechanism for this reduction has been described in Section 5 \cite edgecollapsesocg2020. - * Here we implement this mechanism for a filtration of Rips complex. - * After perfoming the reduction the filtration reduces to a flag-filtration with the same persistence as the original - * filtration. - * + * The algorithm implemented here does not produce a minimal filtration. Taking its output and applying the algorithm a + * second time may further simplify the filtration. + * * \subsection edgecollapseexample Basic edge collapse * * This example calls `Gudhi::collapse::flag_complex_collapse_edges()` from a proximity graph represented as a list of * `Filtered_edge`. - * Then it collapses edges and displays a new list of `Filtered_edge` (with less edges) + * Then it collapses edges and displays a new list of `Filtered_edge` (with fewer edges) * that will preserve the persistence homology computation. * * \include edge_collapse_basic_example.cpp @@ -88,7 +68,7 @@ namespace collapse { * \code $> ./Edge_collapse_example_basic * \endcode * - * the program output is: + * the program output could be: * * \include edge_collapse_example_basic.txt */ diff --git a/src/Collapse/include/gudhi/Flag_complex_edge_collapser.h b/src/Collapse/include/gudhi/Flag_complex_edge_collapser.h index 713c6608..c823901f 100644 --- a/src/Collapse/include/gudhi/Flag_complex_edge_collapser.h +++ b/src/Collapse/include/gudhi/Flag_complex_edge_collapser.h @@ -1,11 +1,12 @@ /* This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. * See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. - * Author(s): Siddharth Pritam + * Author(s): Siddharth Pritam, Marc Glisse * * Copyright (C) 2020 Inria * * Modification(s): * - 2020/03 Vincent Rouvreau: integration to the gudhi library + * - 2021 Marc Glisse: complete rewrite * - YYYY/MM Author: Description of the modification */ @@ -14,367 +15,319 @@ #include <gudhi/Debug_utils.h> -#include <boost/functional/hash.hpp> -#include <boost/iterator/iterator_facade.hpp> - -#include <Eigen/Sparse> -#include <Eigen/src/Core/util/Macros.h> // for EIGEN_VERSION_AT_LEAST +#include <boost/container/flat_map.hpp> +#include <boost/container/flat_set.hpp> #ifdef GUDHI_USE_TBB #include <tbb/parallel_sort.h> #endif -#include <iostream> -#include <utility> // for std::pair +#include <utility> #include <vector> -#include <unordered_map> -#include <unordered_set> -#include <set> -#include <tuple> // for std::tie -#include <algorithm> // for std::includes -#include <iterator> // for std::inserter -#include <type_traits> // for std::decay - -// Make compilation fail - required for external projects - https://github.com/GUDHI/gudhi-devel/issues/10 -#if !EIGEN_VERSION_AT_LEAST(3,1,0) -# error Edge Collapse is only available for Eigen3 >= 3.1.0 -#endif +#include <tuple> +#include <algorithm> +#include <limits> namespace Gudhi { namespace collapse { /** \private - * - * \brief Flag complex sparse matrix data structure. * - * \details - * This class stores a <a target="_blank" href="https://en.wikipedia.org/wiki/Clique_complex">Flag complex</a> - * in an <a target="_blank" href="https://eigen.tuxfamily.org/dox/group__TutorialSparse.html">Eigen sparse matrix</a>. + * \brief Flag complex sparse matrix data structure. * - * \tparam Vertex type must be a signed integer type. It admits a total order <. - * \tparam Filtration type for the value of the filtration function. Must be comparable with <. + * \tparam Vertex type must be an integer type. + * \tparam Filtration type for the value of the filtration function. */ -template<typename Vertex, typename Filtration> -class Flag_complex_edge_collapser { - public: - /** \brief Re-define Vertex as Vertex_handle type to ease the interface with `Gudhi::Proximity_graph`. */ - using Vertex_handle = Vertex; - /** \brief Re-define Filtration as Filtration_value type to ease the interface with `Gudhi::Proximity_graph`. */ - using Filtration_value = Filtration; - - private: - // internal numbering of vertices and edges - using IVertex = std::size_t; - using Edge_index = std::size_t; - using IEdge = std::pair<IVertex, IVertex>; - - // The sparse matrix data type - // (Eigen::SparseMatrix<Edge_index, Eigen::RowMajor> has slow insertions) - using Sparse_vector = Eigen::SparseVector<Edge_index>; - using Sparse_row_matrix = std::vector<Sparse_vector>; - - // Range of neighbors of a vertex - template<bool closed> - struct Neighbours { - class iterator : public boost::iterator_facade<iterator, - IVertex, /* value_type */ - std::input_iterator_tag, // or boost::single_pass_traversal_tag - IVertex /* reference */ > - { - public: - iterator():ptr(nullptr){} - iterator(Neighbours const*p):ptr(p){find_valid();} - private: - friend class boost::iterator_core_access; - Neighbours const*ptr; - void increment(){ - ++ptr->it; - find_valid(); - } - void find_valid(){ - auto& it = ptr->it; - do { - if(!it) { ptr=nullptr; break; } - if(IVertex(it.index()) == ptr->u) { - if(closed) break; - else continue; - } - Edge_index e = it.value(); - if(e <= ptr->ec->current_backward || ptr->ec->critical_edge_indicator_[e]) break; - } while(++it, true); - } - bool equal(iterator const& other) const { return ptr == other.ptr; } - IVertex dereference() const { return ptr->it.index(); } - }; - typedef iterator const_iterator; - mutable typename Sparse_vector::InnerIterator it; - Flag_complex_edge_collapser const*ec; - IVertex u; - iterator begin() const { return this; } - iterator end() const { return {}; } - explicit Neighbours(Flag_complex_edge_collapser const*p,IVertex u):it(p->sparse_row_adjacency_matrix_[u]),ec(p),u(u){} - }; - - // A range of row indices - using IVertex_vector = std::vector<IVertex>; - - public: - /** \brief Filtered_edge is a type to store an edge with its filtration value. */ - using Filtered_edge = std::tuple<Vertex_handle, Vertex_handle, Filtration_value>; - - private: - // Map from row index to its vertex handle - std::vector<Vertex_handle> row_to_vertex_; - - // Index of the current edge in the backwards walk. Edges <= current_backward are part of the temporary graph, - // while edges > current_backward are removed unless critical_edge_indicator_. - Edge_index current_backward = -1; - - // Map from IEdge to its index - std::unordered_map<IEdge, Edge_index, boost::hash<IEdge>> iedge_to_index_map_; - - // Boolean vector to indicate if the edge is critical. - std::vector<bool> critical_edge_indicator_; - - // Map from vertex handle to its row index - std::unordered_map<Vertex_handle, IVertex> vertex_to_row_; - - // Stores the Sparse matrix of Filtration values representing the original graph. - // The matrix rows and columns are indexed by IVertex. - Sparse_row_matrix sparse_row_adjacency_matrix_; - - // The input, a vector of filtered edges. - std::vector<Filtered_edge> f_edge_vector_; - - // Edge is the actual edge (u,v), with Vertex_handle u and v, not IVertex. - bool edge_is_dominated(Vertex_handle u, Vertex_handle v) const - { - const IVertex rw_u = vertex_to_row_.at(u); - const IVertex rw_v = vertex_to_row_.at(v); -#ifdef DEBUG_TRACES - std::cout << "The edge {" << u << ", " << v << "} is going for domination check." << std::endl; -#endif // DEBUG_TRACES - auto common_neighbours = open_common_neighbours_row_index(rw_u, rw_v); -#ifdef DEBUG_TRACES - std::cout << "And its common neighbours are." << std::endl; - for (auto neighbour : common_neighbours) { - std::cout << row_to_vertex_[neighbour] << ", " ; - } - std::cout<< std::endl; -#endif // DEBUG_TRACES - if (common_neighbours.size() == 1) - return true; - else - for (auto rw_c : common_neighbours) { - auto neighbours_c = neighbours_row_index<true>(rw_c); - // If neighbours_c contains the common neighbours. - if (std::includes(neighbours_c.begin(), neighbours_c.end(), - common_neighbours.begin(), common_neighbours.end())) - return true; - } - return false; +template<typename Vertex, typename Filtration_value> +struct Flag_complex_edge_collapser { + using Filtered_edge = std::tuple<Vertex, Vertex, Filtration_value>; + typedef std::pair<Vertex,Vertex> Edge; + struct Cmpi { template<class T, class U> bool operator()(T const&a, U const&b)const{return b<a; } }; + typedef boost::container::flat_map<Vertex, Filtration_value> Ngb_list; + typedef std::vector<Ngb_list> Neighbors; + Neighbors neighbors; // closed neighborhood + std::size_t num_vertices; + std::vector<std::tuple<Vertex, Vertex, Filtration_value>> res; + +#ifdef GUDHI_COLLAPSE_USE_DENSE_ARRAY + // Minimal matrix interface + // Using this matrix generally helps performance, but the memory use may be excessive for a very sparse graph + // (and in extreme cases the constant initialization of the matrix may start to dominate the runnning time). + // Are there cases where the matrix is too big but a hash table would help? + std::vector<Filtration_value> neighbors_data; + void init_neighbors_dense(){ + neighbors_data.clear(); + neighbors_data.resize(num_vertices*num_vertices, std::numeric_limits<Filtration_value>::infinity()); } + Filtration_value& neighbors_dense(Vertex i, Vertex j){return neighbors_data[num_vertices*j+i];} +#endif - // Returns the edges connecting u and v (extremities of crit) to their common neighbors (not themselves) - std::set<Edge_index> three_clique_indices(Edge_index crit) { - std::set<Edge_index> edge_indices; - - Vertex_handle u = std::get<0>(f_edge_vector_[crit]); - Vertex_handle v = std::get<1>(f_edge_vector_[crit]); - -#ifdef DEBUG_TRACES - std::cout << "The current critical edge to re-check criticality with filt value is : f {" << u << "," << v - << "} = " << std::get<2>(f_edge_vector_[crit]) << std::endl; -#endif // DEBUG_TRACES - auto rw_u = vertex_to_row_[u]; - auto rw_v = vertex_to_row_[v]; - - IVertex_vector common_neighbours = open_common_neighbours_row_index(rw_u, rw_v); + // This does not touch the events list, only the adjacency matrix(es) + void delay_neighbor(Vertex u, Vertex v, Filtration_value f) { + neighbors[u][v]=f; + neighbors[v][u]=f; +#ifdef GUDHI_COLLAPSE_USE_DENSE_ARRAY + neighbors_dense(u,v)=f; + neighbors_dense(v,u)=f; +#endif + } + void remove_neighbor(Vertex u, Vertex v) { + neighbors[u].erase(v); + neighbors[v].erase(u); +#ifdef GUDHI_COLLAPSE_USE_DENSE_ARRAY + neighbors_dense(u,v)=std::numeric_limits<Filtration_value>::infinity(); + neighbors_dense(v,u)=std::numeric_limits<Filtration_value>::infinity(); +#endif + } - for (auto rw_c : common_neighbours) { - IEdge e_with_new_nbhr_v = std::minmax(rw_u, rw_c); - IEdge e_with_new_nbhr_u = std::minmax(rw_v, rw_c); - edge_indices.emplace(iedge_to_index_map_[e_with_new_nbhr_v]); - edge_indices.emplace(iedge_to_index_map_[e_with_new_nbhr_u]); + template<class FilteredEdgeRange> + void read_edges(FilteredEdgeRange const&r){ + neighbors.resize(num_vertices); +#ifdef GUDHI_COLLAPSE_USE_DENSE_ARRAY + init_neighbors_dense(); +#endif + // Use the raw sequence to avoid maintaining the order + std::vector<typename Ngb_list::sequence_type> neighbors_seq(num_vertices); + for(auto&&e : r){ + using std::get; + Vertex u = get<0>(e); + Vertex v = get<1>(e); + Filtration_value f = get<2>(e); + neighbors_seq[u].emplace_back(v, f); + neighbors_seq[v].emplace_back(u, f); +#ifdef GUDHI_COLLAPSE_USE_DENSE_ARRAY + neighbors_dense(u,v)=f; + neighbors_dense(v,u)=f; +#endif + } + for(std::size_t i=0;i<neighbors_seq.size();++i){ + neighbors_seq[i].emplace_back(i, -std::numeric_limits<Filtration_value>::infinity()); + neighbors[i].adopt_sequence(std::move(neighbors_seq[i])); // calls sort +#ifdef GUDHI_COLLAPSE_USE_DENSE_ARRAY + neighbors_dense(i,i)=-std::numeric_limits<Filtration_value>::infinity(); +#endif } - return edge_indices; } - // Detect and set all edges that are becoming critical - template<typename FilteredEdgeOutput> - void set_edge_critical(Edge_index indx, Filtration_value filt, FilteredEdgeOutput filtered_edge_output) { -#ifdef DEBUG_TRACES - std::cout << "The curent index with filtration value " << indx << ", " << filt << " is primary critical" << - std::endl; -#endif // DEBUG_TRACES - std::set<Edge_index> effected_indices = three_clique_indices(indx); - // Cannot use boost::adaptors::reverse in such dynamic cases apparently - for (auto it = effected_indices.rbegin(); it != effected_indices.rend(); ++it) { - current_backward = *it; - Vertex_handle u = std::get<0>(f_edge_vector_[current_backward]); - Vertex_handle v = std::get<1>(f_edge_vector_[current_backward]); - // If current_backward is not critical so it should be processed, otherwise it stays in the graph - if (!critical_edge_indicator_[current_backward]) { - if (!edge_is_dominated(u, v)) { -#ifdef DEBUG_TRACES - std::cout << "The curent index became critical " << current_backward << std::endl; -#endif // DEBUG_TRACES - critical_edge_indicator_[current_backward] = true; - filtered_edge_output(u, v, filt); - std::set<Edge_index> inner_effected_indcs = three_clique_indices(current_backward); - for (auto inr_idx : inner_effected_indcs) { - if(inr_idx < current_backward) // && !critical_edge_indicator_[inr_idx] - effected_indices.emplace(inr_idx); - } -#ifdef DEBUG_TRACES - std::cout << "The following edge is critical with filt value: {" << u << "," << v << "}; " - << filt << std::endl; -#endif // DEBUG_TRACES - } + // Open neighborhood + // At some point it helped gcc to add __attribute__((noinline)) here, otherwise we had +50% on the running time + // on one example. It looks ok now, or I forgot which example that was. + void common_neighbors(boost::container::flat_set<Vertex>& e_ngb, + std::vector<std::pair<Filtration_value, Vertex>>& e_ngb_later, + Vertex u, Vertex v, Filtration_value f_event){ + // Using neighbors_dense here seems to hurt, even if we loop on the smaller of nu and nv. + Ngb_list const&nu = neighbors[u]; + Ngb_list const&nv = neighbors[v]; + auto ui = nu.begin(); + auto ue = nu.end(); + auto vi = nv.begin(); + auto ve = nv.end(); + assert(ui != ue && vi != ve); + while(ui != ue && vi != ve){ + Vertex w = ui->first; + if(w < vi->first) { ++ui; continue; } + if(w > vi->first) { ++vi; continue; } + // nu and nv are closed, so we need to exclude e here. + if(w != u && w != v) { + Filtration_value f = std::max(ui->second, vi->second); + if(f > f_event) + e_ngb_later.emplace_back(f, w); + else + e_ngb.insert(e_ngb.end(), w); } + ++ui; ++vi; } - // Clear the implicit "removed from graph" data structure - current_backward = -1; } - // Returns list of neighbors of a particular vertex. - template<bool closed> - auto neighbours_row_index(IVertex rw_u) const - { - return Neighbours<closed>(this, rw_u); + // Test if the neighborhood of e is included in the closed neighborhood of c + template<class Ngb> + bool is_dominated_by(Ngb const& e_ngb, Vertex c, Filtration_value f){ + // The best strategy probably depends on the distribution, how sparse / dense the adjacency matrix is, + // how (un)balanced the sizes of e_ngb and nc are. + // Some efficient operations on sets work best with bitsets, although the need for a map complicates things. +#ifdef GUDHI_COLLAPSE_USE_DENSE_ARRAY + for(auto v : e_ngb) { + // if(v==c)continue; + if(neighbors_dense(v,c) > f) return false; + } + return true; +#else + auto&&nc = neighbors[c]; + // if few neighbors, use dichotomy? Seems slower. + // I tried storing a copy of neighbors as a vector<absl::flat_hash_map> and using it for nc, but it was + // a bit slower here. It did help with neighbors[dominator].find(w) in the main function though, + // sometimes enough, sometimes not. + auto ci = nc.begin(); + auto ce = nc.end(); + auto eni = e_ngb.begin(); + auto ene = e_ngb.end(); + assert(eni != ene); + assert(ci != ce); + // if(*eni == c && ++eni == ene) return true; + for(;;){ + Vertex ve = *eni; + Vertex vc = ci->first; + while(ve > vc) { + // try a gallop strategy (exponential search)? Seems slower + if(++ci == ce) return false; + vc = ci->first; + } + if(ve < vc) return false; + // ve == vc + if(ci->second > f) return false; + if(++eni == ene)return true; + // If we stored an open neighborhood of c (excluding c), we would need to test for c here and before the loop + // if(*eni == c && ++eni == ene)return true; + if(++ci == ce) return false; + } +#endif } - // Returns the list of open neighbours of the edge :{u,v}. - IVertex_vector open_common_neighbours_row_index(IVertex rw_u, IVertex rw_v) const - { - auto non_zero_indices_u = neighbours_row_index<false>(rw_u); - auto non_zero_indices_v = neighbours_row_index<false>(rw_v); - IVertex_vector common; - std::set_intersection(non_zero_indices_u.begin(), non_zero_indices_u.end(), non_zero_indices_v.begin(), - non_zero_indices_v.end(), std::back_inserter(common)); - - return common; - } + template<class FilteredEdgeRange, class Delay> + void process_edges(FilteredEdgeRange const& edges, Delay&& delay) { + { + Vertex maxi = 0, maxj = 0; + for(auto& fe : edges) { + Vertex i = std::get<0>(fe); + Vertex j = std::get<1>(fe); + if (i > maxi) maxi = i; + if (j > maxj) maxj = j; + } + num_vertices = std::max(maxi, maxj) + 1; + } - // Insert a vertex in the data structure - IVertex insert_vertex(Vertex_handle vertex) { - auto n = row_to_vertex_.size(); - auto result = vertex_to_row_.emplace(vertex, n); - // If it was not already inserted - Value won't be updated by emplace if it is already present - if (result.second) { - // Expand the matrix. The size of rows is irrelevant. - sparse_row_adjacency_matrix_.emplace_back((std::numeric_limits<Eigen::Index>::max)()); - // Initializing the diagonal element of the adjency matrix corresponding to rw_b. - sparse_row_adjacency_matrix_[n].insert(n) = -1; // not an edge - // Must be done after reading its size() - row_to_vertex_.push_back(vertex); + read_edges(edges); + + boost::container::flat_set<Vertex> e_ngb; + e_ngb.reserve(num_vertices); + std::vector<std::pair<Filtration_value, Vertex>> e_ngb_later; + for(auto&e:edges) { + { + Vertex u = std::get<0>(e); + Vertex v = std::get<1>(e); + Filtration_value input_time = std::get<2>(e); + auto time = delay(input_time); + auto start_time = time; + e_ngb.clear(); + e_ngb_later.clear(); + common_neighbors(e_ngb, e_ngb_later, u, v, time); + // If we identify a good candidate (the first common neighbor) for being a dominator of e until infinity, + // we could check that a bit more cheaply. It does not seem to help though. + auto cmp1=[](auto const&a, auto const&b){return a.first > b.first;}; + auto e_ngb_later_begin=e_ngb_later.begin(); + auto e_ngb_later_end=e_ngb_later.end(); + bool heapified = false; + + bool dead = false; + while(true) { + Vertex dominator = -1; + // special case for size 1 + // if(e_ngb.size()==1){dominator=*e_ngb.begin();}else + // It is tempting to test the dominators in increasing order of filtration value, which is likely to reduce + // the number of calls to is_dominated_by before finding a dominator, but sorting, even partially / lazily, + // is very expensive. + for(auto c : e_ngb){ + if(is_dominated_by(e_ngb, c, time)){ + dominator = c; + break; + } + } + if(dominator==-1) break; + // Push as long as dominator remains a dominator. + // Iterate on times where at least one neighbor appears. + for (bool still_dominated = true; still_dominated; ) { + if(e_ngb_later_begin == e_ngb_later_end) { + dead = true; goto end_move; + } + if(!heapified) { + // Eagerly sorting can be slow + std::make_heap(e_ngb_later_begin, e_ngb_later_end, cmp1); + heapified=true; + } + time = e_ngb_later_begin->first; // first place it may become critical + // Update the neighborhood for this new time, while checking if any of the new neighbors break domination. + while (e_ngb_later_begin != e_ngb_later_end && e_ngb_later_begin->first <= time) { + Vertex w = e_ngb_later_begin->second; +#ifdef GUDHI_COLLAPSE_USE_DENSE_ARRAY + if (neighbors_dense(dominator,w) > e_ngb_later_begin->first) + still_dominated = false; +#else + auto& ngb_dom = neighbors[dominator]; + auto wit = ngb_dom.find(w); // neighborhood may be open or closed, it does not matter + if (wit == ngb_dom.end() || wit->second > e_ngb_later_begin->first) + still_dominated = false; +#endif + e_ngb.insert(w); + std::pop_heap(e_ngb_later_begin, e_ngb_later_end--, cmp1); + } + } // this doesn't seem to help that much... + } +end_move: + if(dead) { + remove_neighbor(u, v); + } else if(start_time != time) { + delay_neighbor(u, v, time); + res.emplace_back(u, v, time); + } else { + res.emplace_back(u, v, input_time); + } + } } - return result.first->second; } - // Insert an edge in the data structure - // @exception std::invalid_argument In debug mode, if u == v - IEdge insert_new_edge(Vertex_handle u, Vertex_handle v, Edge_index idx) - { - GUDHI_CHECK((u != v), std::invalid_argument("Flag_complex_edge_collapser::insert_new_edge with u == v")); - // The edge must not be added before, it should be a new edge. - IVertex rw_u = insert_vertex(u); - IVertex rw_v = insert_vertex(v); -#ifdef DEBUG_TRACES - std::cout << "Inserting the edge " << u <<", " << v << std::endl; -#endif // DEBUG_TRACES - sparse_row_adjacency_matrix_[rw_u].insert(rw_v) = idx; - sparse_row_adjacency_matrix_[rw_v].insert(rw_u) = idx; - return std::minmax(rw_u, rw_v); + std::vector<Filtered_edge> output() { + return std::move(res); } - public: - /** \brief Flag_complex_edge_collapser constructor from a range of filtered edges. - * - * @param[in] edges Range of Filtered edges range.There is no need the range to be sorted, as it will be performed in - * `Flag_complex_edge_collapser::process_edges`. - * - * \tparam FilteredEdgeRange must be a range for which std::begin and std::end return iterators on a - * `Flag_complex_edge_collapser::Filtered_edge`. - */ - template<typename FilteredEdgeRange> - Flag_complex_edge_collapser(const FilteredEdgeRange& edges) - : f_edge_vector_(std::begin(edges), std::end(edges)) { } +}; - /** \brief Performs edge collapse in a increasing sequence of the filtration value. - * - * \tparam filtered_edge_output is a functor that is called on the output edges, in non-decreasing order of - * filtration, as filtered_edge_output(u, v, f) where u and v are Vertex_handle representing the extremities of the - * edge, and f is its new Filtration_value. - */ - template<typename FilteredEdgeOutput> - void process_edges(FilteredEdgeOutput filtered_edge_output) { - // Sort edges - auto sort_by_filtration = [](const Filtered_edge& edge_a, const Filtered_edge& edge_b) -> bool - { - return (std::get<2>(edge_a) < std::get<2>(edge_b)); - }; +template<class R> R to_range(R&& r) { return std::move(r); } +template<class R, class T> R to_range(T&& t) { R r; r.insert(r.end(), t.begin(), t.end()); return r; } +template<class FilteredEdgeRange, class Delay> +auto flag_complex_collapse_edges(FilteredEdgeRange&& edges, Delay&&delay) { + // Would it help to label the points according to some spatial sorting? + auto first_edge_itr = std::begin(edges); + using Vertex = std::decay_t<decltype(std::get<0>(*first_edge_itr))>; + using Filtration_value = std::decay_t<decltype(std::get<2>(*first_edge_itr))>; + using Edge_collapser = Flag_complex_edge_collapser<Vertex, Filtration_value>; + if (first_edge_itr != std::end(edges)) { + auto edges2 = to_range<std::vector<typename Edge_collapser::Filtered_edge>>(std::forward<FilteredEdgeRange>(edges)); #ifdef GUDHI_USE_TBB - tbb::parallel_sort(f_edge_vector_.begin(), f_edge_vector_.end(), sort_by_filtration); + // I think this sorting is always negligible compared to the collapse, but parallelizing it shouldn't hurt. + tbb::parallel_sort(edges2.begin(), edges2.end(), + [](auto const&a, auto const&b){return std::get<2>(a)>std::get<2>(b);}); #else - std::sort(f_edge_vector_.begin(), f_edge_vector_.end(), sort_by_filtration); + std::sort(edges2.begin(), edges2.end(), [](auto const&a, auto const&b){return std::get<2>(a)>std::get<2>(b);}); #endif - - for (Edge_index endIdx = 0; endIdx < f_edge_vector_.size(); endIdx++) { - Filtered_edge fec = f_edge_vector_[endIdx]; - Vertex_handle u = std::get<0>(fec); - Vertex_handle v = std::get<1>(fec); - Filtration_value filt = std::get<2>(fec); - - // Inserts the edge in the sparse matrix to update the graph (G_i) - IEdge ie = insert_new_edge(u, v, endIdx); - - iedge_to_index_map_.emplace(ie, endIdx); - critical_edge_indicator_.push_back(false); - - if (!edge_is_dominated(u, v)) { - critical_edge_indicator_[endIdx] = true; - filtered_edge_output(u, v, filt); - if (endIdx > 1) - set_edge_critical(endIdx, filt, filtered_edge_output); - } - } + Edge_collapser edge_collapser; + edge_collapser.process_edges(edges2, std::forward<Delay>(delay)); + return edge_collapser.output(); } - -}; + return std::vector<typename Edge_collapser::Filtered_edge>(); +} /** \brief Implicitly constructs a flag complex from edges as an input, collapses edges while preserving the persistent - * homology and returns the remaining edges as a range. + * homology and returns the remaining edges as a range. The filtration value of vertices is irrelevant to this function. * - * \param[in] edges Range of Filtered edges.There is no need the range to be sorted, as it will be performed. + * \param[in] edges Range of Filtered edges. There is no need for the range to be sorted, as it will be done internally. * - * \tparam FilteredEdgeRange furnishes `std::begin` and `std::end` methods and returns an iterator on a - * FilteredEdge of type `std::tuple<Vertex_handle, Vertex_handle, Filtration_value>` where `Vertex_handle` is the type - * of a vertex index and `Filtration_value` is the type of an edge filtration value. + * \tparam FilteredEdgeRange Range of `std::tuple<Vertex_handle, Vertex_handle, Filtration_value>` + * where `Vertex_handle` is the type of a vertex index. * * \return Remaining edges after collapse as a range of * `std::tuple<Vertex_handle, Vertex_handle, Filtration_value>`. - * + * * \ingroup edge_collapse - * + * + * \note + * Advanced: Defining the macro GUDHI_COLLAPSE_USE_DENSE_ARRAY tells gudhi to allocate a square table of size the + * maximum vertex index. This usually speeds up the computation for dense graphs. However, for sparse graphs, the memory + * use may be problematic and initializing this large table may be slow. */ template<class FilteredEdgeRange> auto flag_complex_collapse_edges(const FilteredEdgeRange& edges) { - auto first_edge_itr = std::begin(edges); - using Vertex_handle = std::decay_t<decltype(std::get<0>(*first_edge_itr))>; - using Filtration_value = std::decay_t<decltype(std::get<2>(*first_edge_itr))>; - using Edge_collapser = Flag_complex_edge_collapser<Vertex_handle, Filtration_value>; - std::vector<typename Edge_collapser::Filtered_edge> remaining_edges; - if (first_edge_itr != std::end(edges)) { - Edge_collapser edge_collapser(edges); - edge_collapser.process_edges( - [&remaining_edges](Vertex_handle u, Vertex_handle v, Filtration_value filtration) { - // insert the edge - remaining_edges.emplace_back(u, v, filtration); - }); - } - return remaining_edges; + return flag_complex_collapse_edges(edges, [](auto const&d){return d;}); } } // namespace collapse diff --git a/src/Collapse/test/collapse_unit_test.cpp b/src/Collapse/test/collapse_unit_test.cpp index b8876246..f41dbedd 100644 --- a/src/Collapse/test/collapse_unit_test.cpp +++ b/src/Collapse/test/collapse_unit_test.cpp @@ -98,8 +98,8 @@ BOOST_AUTO_TEST_CASE(collapse) { // o---o // 0 3 edges.emplace_back(0, 2, 2.); - edges.emplace_back(1, 3, 2.); - trace_and_check_collapse(edges, {{1, 3, 2.}}); + edges.emplace_back(1, 3, 2.1); + trace_and_check_collapse(edges, {{1, 3, 2.1}}); // 1 2 4 // o---o---o @@ -121,8 +121,8 @@ BOOST_AUTO_TEST_CASE(collapse) { // o---o---o // 0 3 5 edges.emplace_back(2, 5, 4.); - edges.emplace_back(4, 3, 4.); - trace_and_check_collapse(edges, {{1, 3, 2.}, {4, 3, 4.}}); + edges.emplace_back(4, 3, 4.1); + trace_and_check_collapse(edges, {{1, 3, 2.}, {4, 3, 4.1}}); // 1 2 4 // o---o---o @@ -132,8 +132,8 @@ BOOST_AUTO_TEST_CASE(collapse) { // o---o---o // 0 3 5 edges.emplace_back(1, 5, 5.); - edges.emplace_back(0, 4, 5.); - trace_and_check_collapse(edges, {{1, 3, 2.}, {4, 3, 4.}, {0, 4, 5.}}); + edges.emplace_back(0, 4, 5.1); + trace_and_check_collapse(edges, {{1, 3, 2.}, {4, 3, 4.}, {0, 4, 5.1}}); } BOOST_AUTO_TEST_CASE(collapse_from_array) { @@ -150,8 +150,8 @@ BOOST_AUTO_TEST_CASE(collapse_from_array) { {2, 3, 1.}, {3, 0, 1.}, {0, 2, 2.}, - {1, 3, 2.}}}; - trace_and_check_collapse(f_edge_array, {{1, 3, 2.}}); + {1, 3, 2.1}}}; + trace_and_check_collapse(f_edge_array, {{1, 3, 2.1}}); } BOOST_AUTO_TEST_CASE(collapse_from_proximity_graph) { diff --git a/src/common/doc/main_page.md b/src/common/doc/main_page.md index 17354179..4f2f1692 100644 --- a/src/common/doc/main_page.md +++ b/src/common/doc/main_page.md @@ -231,13 +231,12 @@ homology of the input sequence. The resulting method is simple and extremely efficient. Computation of edge collapse and persistent homology of a filtered flag complex via edge collapse as described in - \cite edgecollapsesocg2020. + \cite edgecollapsearxiv. </td> <td width="15%"> - <b>Author:</b> Siddharth Pritam<br> + <b>Author:</b> Siddharth Pritam, Marc Glisse<br> <b>Introduced in:</b> GUDHI 3.3.0<br> - <b>Copyright:</b> MIT<br> - <b>Requires:</b> \ref eigen + <b>Copyright:</b> MIT </td> </tr> <tr> diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index 81bc53fa..54221151 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -148,10 +148,6 @@ if(PYTHONINTERP_FOUND) add_gudhi_debug_info("Eigen3 version ${EIGEN3_VERSION}") # No problem, even if no CGAL found set(GUDHI_PYTHON_EXTRA_COMPILE_ARGS "${GUDHI_PYTHON_EXTRA_COMPILE_ARGS}'-DCGAL_EIGEN3_ENABLED', ") - set(GUDHI_PYTHON_EXTRA_COMPILE_ARGS "${GUDHI_PYTHON_EXTRA_COMPILE_ARGS}'-DGUDHI_USE_EIGEN3', ") - set(GUDHI_USE_EIGEN3 "True") - else (EIGEN3_FOUND) - set(GUDHI_USE_EIGEN3 "False") endif (EIGEN3_FOUND) set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'off_reader', ") @@ -609,4 +605,4 @@ if(PYTHONINTERP_FOUND) else(PYTHONINTERP_FOUND) message("++ Python module will not be compiled because no Python interpreter was found") set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python" CACHE INTERNAL "GUDHI_MISSING_MODULES") -endif(PYTHONINTERP_FOUND)
\ No newline at end of file +endif(PYTHONINTERP_FOUND) diff --git a/src/python/gudhi/__init__.py.in b/src/python/gudhi/__init__.py.in index 3043201a..79e12fbc 100644 --- a/src/python/gudhi/__init__.py.in +++ b/src/python/gudhi/__init__.py.in @@ -23,10 +23,6 @@ __all__ = [@GUDHI_PYTHON_MODULES@ @GUDHI_PYTHON_MODULES_EXTRA@] __available_modules = '' __missing_modules = '' -# For unitary tests purpose -# could use "if 'collapse_edges' in gudhi.__all__" when collapse edges will have a python module -__GUDHI_USE_EIGEN3 = @GUDHI_USE_EIGEN3@ - # Try to import * from gudhi.__module_name for default modules. # Extra modules require an explicit import by the user (mostly because of # unusual dependencies, but also to avoid cluttering namespace gudhi and diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index a4914184..2c53a872 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -676,18 +676,17 @@ cdef class SimplexTree: return (normal0, normals, infinite0, infinites) def collapse_edges(self, nb_iterations = 1): - """Assuming the simplex tree is a 1-skeleton graph, this method collapse edges (simplices of higher dimension - are ignored) and resets the simplex tree from the remaining edges. - A good candidate is to build a simplex tree on top of a :class:`~gudhi.RipsComplex` of dimension 1 before - collapsing edges + """Assuming the complex is a graph (simplices of higher dimension are ignored), this method implicitly + interprets it as the 1-skeleton of a flag complex, and replaces it with another (smaller) graph whose + expansion has the same persistent homology, using a technique known as edge collapses + (see :cite:`edgecollapsearxiv`). + + A natural application is to get a simplex tree of dimension 1 from :class:`~gudhi.RipsComplex`, + then collapse edges, perform :meth:`expansion()` and finally compute persistence (cf. :download:`rips_complex_edge_collapse_example.py <../example/rips_complex_edge_collapse_example.py>`). - For implementation details, please refer to :cite:`edgecollapsesocg2020`. :param nb_iterations: The number of edge collapse iterations to perform. Default is 1. :type nb_iterations: int - - :note: collapse_edges method requires `Eigen <installation.html#eigen>`_ >= 3.1.0 and an exception is thrown - if this method is not available. """ # Backup old pointer cdef Simplex_tree_interface_full_featured* ptr = self.get_ptr() diff --git a/src/python/include/Simplex_tree_interface.h b/src/python/include/Simplex_tree_interface.h index aa3dac18..7f9b0067 100644 --- a/src/python/include/Simplex_tree_interface.h +++ b/src/python/include/Simplex_tree_interface.h @@ -15,9 +15,7 @@ #include <gudhi/distance_functions.h> #include <gudhi/Simplex_tree.h> #include <gudhi/Points_off_io.h> -#ifdef GUDHI_USE_EIGEN3 #include <gudhi/Flag_complex_edge_collapser.h> -#endif #include <iostream> #include <vector> @@ -165,7 +163,6 @@ class Simplex_tree_interface : public Simplex_tree<SimplexTreeOptions> { } Simplex_tree_interface* collapse_edges(int nb_collapse_iteration) { -#ifdef GUDHI_USE_EIGEN3 using Filtered_edge = std::tuple<Vertex_handle, Vertex_handle, Filtration_value>; std::vector<Filtered_edge> edges; for (Simplex_handle sh : Base::skeleton_simplex_range(1)) { @@ -179,7 +176,7 @@ class Simplex_tree_interface : public Simplex_tree<SimplexTreeOptions> { } for (int iteration = 0; iteration < nb_collapse_iteration; iteration++) { - edges = Gudhi::collapse::flag_complex_collapse_edges(edges); + edges = Gudhi::collapse::flag_complex_collapse_edges(std::move(edges)); } Simplex_tree_interface* collapsed_stree_ptr = new Simplex_tree_interface(); // Copy the original 0-skeleton @@ -191,9 +188,6 @@ class Simplex_tree_interface : public Simplex_tree<SimplexTreeOptions> { collapsed_stree_ptr->insert({std::get<0>(remaining_edge), std::get<1>(remaining_edge)}, std::get<2>(remaining_edge)); } return collapsed_stree_ptr; -#else - throw std::runtime_error("Unable to collapse edges as it requires Eigen3 >= 3.1.0."); -#endif } void expansion_with_blockers_callback(int dimension, blocker_func_t user_func, void *user_data) { diff --git a/src/python/test/test_simplex_tree.py b/src/python/test/test_simplex_tree.py index eb481a49..688f4fd6 100755 --- a/src/python/test/test_simplex_tree.py +++ b/src/python/test/test_simplex_tree.py @@ -8,7 +8,7 @@ - YYYY/MM Author: Description of the modification """ -from gudhi import SimplexTree, __GUDHI_USE_EIGEN3 +from gudhi import SimplexTree import numpy as np import pytest @@ -354,16 +354,11 @@ def test_collapse_edges(): assert st.num_simplices() == 10 - if __GUDHI_USE_EIGEN3: - st.collapse_edges() - assert st.num_simplices() == 9 - assert st.find([1, 3]) == False - for simplex in st.get_skeleton(0): - assert simplex[1] == 1. - else: - # If no Eigen3, collapse_edges throws an exception - with pytest.raises(RuntimeError): - st.collapse_edges() + st.collapse_edges() + assert st.num_simplices() == 9 + assert st.find([0, 2]) == False # [1, 3] would be fine as well + for simplex in st.get_skeleton(0): + assert simplex[1] == 1. def test_reset_filtration(): st = SimplexTree() |