From 922acb40aec110347ac17da391ce5ae2b5da57db Mon Sep 17 00:00:00 2001 From: Ulrich Bauer Date: Sun, 24 Sep 2017 18:19:44 +0200 Subject: consolidated code for sparse and dense distance matrices --- ripser.cpp | 593 +++++++++++++++++++++++++++++-------------------------------- 1 file changed, 283 insertions(+), 310 deletions(-) (limited to 'ripser.cpp') diff --git a/ripser.cpp b/ripser.cpp index 2f41b05..1cb1d79 100644 --- a/ripser.cpp +++ b/ripser.cpp @@ -456,201 +456,12 @@ public: return diam; } - class simplex_coboundary_enumerator { - private: - index_t idx_below, idx_above, v, k; - std::vector vertices; - const diameter_entry_t simplex; - const coefficient_t modulus; - const compressed_lower_distance_matrix& dist; - const binomial_coeff_table& binomial_coeff; - - public: - simplex_coboundary_enumerator(const diameter_entry_t _simplex, index_t _dim, - const ripser& parent) - : idx_below(get_index(_simplex)), idx_above(0), v(parent.n - 1), k(_dim + 1), - vertices(_dim + 1), simplex(_simplex), modulus(parent.modulus), dist(parent.dist), - binomial_coeff(parent.binomial_coeff) { - parent.get_simplex_vertices(get_index(_simplex), _dim, parent.n, vertices.begin()); - } - - bool has_next() { - while ((v != -1) && (binomial_coeff(v, k) <= idx_below)) { - idx_below -= binomial_coeff(v, k); - idx_above += binomial_coeff(v, k + 1); - --v; - --k; - assert(k != -1); - } - return v != -1; - } - - diameter_entry_t next() { - value_t coface_diameter = get_diameter(simplex); - for (index_t w : vertices) coface_diameter = std::max(coface_diameter, dist(v, w)); - index_t coface_index = idx_above + binomial_coeff(v--, k + 1) + idx_below; - coefficient_t coface_coefficient = - (k & 1 ? -1 + modulus : 1) * get_coefficient(simplex) % modulus; - return diameter_entry_t(coface_diameter, coface_index, coface_coefficient); - } - }; - - class simplex_sparse_coboundary_enumerator { - private: - const ripser& parent; - - index_t idx_below, idx_above, v, k, max_vertex_below; - const diameter_entry_t simplex; - const coefficient_t modulus; - const DistanceMatrix& dist; - const binomial_coeff_table& binomial_coeff; - - std::vector& vertices; - std::vector::const_reverse_iterator>& neighbor_it; - std::vector::const_reverse_iterator>& neighbor_end; - diameter_index_t x; - - public: - simplex_sparse_coboundary_enumerator(const diameter_entry_t _simplex, index_t _dim, - const ripser& _parent) - : parent(_parent), idx_below(get_index(_simplex)), idx_above(0), - v(parent.n - 1), k(_dim + 1), max_vertex_below(parent.n - 1), simplex(_simplex), modulus(parent.modulus), - dist(parent.dist), binomial_coeff(parent.binomial_coeff), vertices(parent.vertices), - neighbor_it(parent.neighbor_it), neighbor_end(parent.neighbor_end) { - - neighbor_it.clear(); - neighbor_end.clear(); - vertices.clear(); - - parent.get_simplex_vertices(idx_below, _dim, parent.n, std::back_inserter(vertices)); - - for (auto v : vertices) { - neighbor_it.push_back(dist.neighbors[v].rbegin()); - neighbor_end.push_back(dist.neighbors[v].rend()); - } - } - - bool has_next(bool all_cofaces = true) { - for (auto &it0 = neighbor_it[0], &end0 = neighbor_end[0]; it0 != end0; ++it0) { - x = *it0; - for (size_t idx = 1; idx < neighbor_it.size(); ++idx) { - auto &it = neighbor_it[idx], end = neighbor_end[idx]; - while (get_index(*it) > get_index(x)) - if (++it == end) return false; - auto y = *it; - if (get_index(y) != get_index(x)) - goto continue_outer; - else - x = std::max(x, y); - } - return all_cofaces || - !(k > 0 && - parent.get_next_vertex(max_vertex_below, idx_below, k) > get_index(x)); - continue_outer:; - } - return false; - } - - diameter_entry_t next() { - ++neighbor_it[0]; - - while (k > 0 && parent.get_next_vertex(max_vertex_below, idx_below, k) > get_index(x)) { - idx_below -= binomial_coeff(max_vertex_below, k); - idx_above += binomial_coeff(max_vertex_below, k + 1); - --k; - } - - value_t coface_diameter = std::max(get_diameter(simplex), get_diameter(x)); - - coefficient_t coface_coefficient = - (k & 1 ? -1 + modulus : 1) * get_coefficient(simplex) % modulus; - - return diameter_entry_t(coface_diameter, - idx_above + binomial_coeff(get_index(x), k + 1) + idx_below, - coface_coefficient); - } - }; - - void assemble_columns_to_reduce(std::vector& columns_to_reduce, - hash_map& pivot_column_index, index_t dim) { - index_t num_simplices = binomial_coeff(n, dim + 1); - - columns_to_reduce.clear(); - -#ifdef INDICATE_PROGRESS - std::cout << "\033[K" - << "assembling " << num_simplices << " columns" << std::flush << "\r"; -#endif - - for (index_t index = 0; index < num_simplices; ++index) { - if (pivot_column_index.find(index) == pivot_column_index.end()) { - value_t diameter = compute_diameter(index, dim); - if (diameter <= threshold) - columns_to_reduce.push_back(std::make_pair(diameter, index)); -#ifdef INDICATE_PROGRESS - if ((index + 1) % 1000000 == 0) - std::cout << "\033[K" - << "assembled " << columns_to_reduce.size() << " out of " - << (index + 1) << "/" << num_simplices << " columns" << std::flush - << "\r"; -#endif - } - } - -#ifdef INDICATE_PROGRESS - std::cout << "\033[K" - << "sorting " << num_simplices << " columns" << std::flush << "\r"; -#endif - - std::sort(columns_to_reduce.begin(), columns_to_reduce.end(), - greater_diameter_or_smaller_index()); -#ifdef INDICATE_PROGRESS - std::cout << "\033[K"; -#endif - } - - void assemble_sparse_columns_to_reduce(std::vector& simplices, - std::vector& columns_to_reduce, - hash_map& pivot_column_index, - index_t dim) { - -#ifdef INDICATE_PROGRESS - std::cout << "\033[K" - << "assembling columns" << std::flush << "\r"; -#endif - - --dim; - columns_to_reduce.clear(); - - std::vector next_simplices; - - for (diameter_index_t simplex : simplices) { - simplex_sparse_coboundary_enumerator cofaces(simplex, dim, *this); - - while (cofaces.has_next(false)) { - auto coface = cofaces.next(); - - next_simplices.push_back(std::make_pair(get_diameter(coface), get_index(coface))); - - if (pivot_column_index.find(get_index(coface)) == pivot_column_index.end()) - columns_to_reduce.push_back( - std::make_pair(get_diameter(coface), get_index(coface))); - } - } - - simplices.swap(next_simplices); - -#ifdef INDICATE_PROGRESS - std::cout << "\033[K" - << "sorting " << columns_to_reduce.size() << " columns" << std::flush << "\r"; -#endif + class simplex_coboundary_enumerator; - std::sort(columns_to_reduce.begin(), columns_to_reduce.end(), - greater_diameter_or_smaller_index()); -#ifdef INDICATE_PROGRESS - std::cout << "\033[K"; -#endif - } + void assemble_columns_to_reduce(std::vector& simplices, + std::vector& columns_to_reduce, + hash_map& pivot_column_index, + index_t dim); template void compute_pairs(std::vector& columns_to_reduce, @@ -829,11 +640,286 @@ public: #endif } - void compute_barcodes(); + std::vector get_edges(); + + void compute_barcodes() { + + std::vector columns_to_reduce; + + std::vector simplices; + + { + union_find dset(n); + std::vector& edges = simplices; + + edges = get_edges(); + + std::sort(edges.rbegin(), edges.rend(), + greater_diameter_or_smaller_index()); + +#ifdef PRINT_PERSISTENCE_PAIRS + std::cout << "persistence intervals in dim 0:" << std::endl; +#endif + + std::vector vertices_of_edge(2); + for (auto e : edges) { + vertices_of_edge.clear(); + get_simplex_vertices(get_index(e), 1, n, std::back_inserter(vertices_of_edge)); + index_t u = dset.find(vertices_of_edge[0]), v = dset.find(vertices_of_edge[1]); + + if (u != v) { +#ifdef PRINT_PERSISTENCE_PAIRS + if (get_diameter(e) != 0) + std::cout << " [0," << get_diameter(e) << ")" << std::endl; +#endif + dset.link(u, v); + } else + columns_to_reduce.push_back(e); + } + std::reverse(columns_to_reduce.begin(), columns_to_reduce.end()); + +#ifdef PRINT_PERSISTENCE_PAIRS + for (index_t i = 0; i < n; ++i) + if (dset.find(i) == i) std::cout << " [0, )" << std::endl << std::flush; +#endif + } + + for (index_t dim = 1; dim <= dim_max; ++dim) { + hash_map pivot_column_index; + pivot_column_index.reserve(columns_to_reduce.size()); + + compute_pairs(columns_to_reduce, pivot_column_index, + dim); + + if (dim < dim_max) { + assemble_columns_to_reduce(simplices, columns_to_reduce, pivot_column_index, + dim + 1); + } + } + } }; -template <> void ripser::compute_barcodes(); -template <> void ripser::compute_barcodes(); + +template <> +class ripser::simplex_coboundary_enumerator { +private: + index_t idx_below, idx_above, v, k; + std::vector vertices; + const diameter_entry_t simplex; + const coefficient_t modulus; + const compressed_lower_distance_matrix& dist; + const binomial_coeff_table& binomial_coeff; + +public: + simplex_coboundary_enumerator(const diameter_entry_t _simplex, index_t _dim, + const ripser& parent) + : idx_below(get_index(_simplex)), idx_above(0), v(parent.n - 1), k(_dim + 1), + vertices(_dim + 1), simplex(_simplex), modulus(parent.modulus), dist(parent.dist), + binomial_coeff(parent.binomial_coeff) { + parent.get_simplex_vertices(get_index(_simplex), _dim, parent.n, vertices.begin()); + } + + bool has_next() { + while ((v != -1) && (binomial_coeff(v, k) <= idx_below)) { + idx_below -= binomial_coeff(v, k); + idx_above += binomial_coeff(v, k + 1); + --v; + --k; + assert(k != -1); + } + return v != -1; + } + + diameter_entry_t next() { + value_t coface_diameter = get_diameter(simplex); + for (index_t w : vertices) coface_diameter = std::max(coface_diameter, dist(v, w)); + index_t coface_index = idx_above + binomial_coeff(v--, k + 1) + idx_below; + coefficient_t coface_coefficient = + (k & 1 ? -1 + modulus : 1) * get_coefficient(simplex) % modulus; + return diameter_entry_t(coface_diameter, coface_index, coface_coefficient); + } +}; + +template <> +class ripser::simplex_coboundary_enumerator { +private: + const ripser& parent; + + index_t idx_below, idx_above, v, k, max_vertex_below; + const diameter_entry_t simplex; + const coefficient_t modulus; + const sparse_distance_matrix& dist; + const binomial_coeff_table& binomial_coeff; + + std::vector& vertices; + std::vector::const_reverse_iterator>& neighbor_it; + std::vector::const_reverse_iterator>& neighbor_end; + diameter_index_t x; + +public: + simplex_coboundary_enumerator(const diameter_entry_t _simplex, index_t _dim, + const ripser& _parent) + : parent(_parent), idx_below(get_index(_simplex)), idx_above(0), + v(parent.n - 1), k(_dim + 1), max_vertex_below(parent.n - 1), simplex(_simplex), modulus(parent.modulus), + dist(parent.dist), binomial_coeff(parent.binomial_coeff), vertices(parent.vertices), + neighbor_it(parent.neighbor_it), neighbor_end(parent.neighbor_end) { + + neighbor_it.clear(); + neighbor_end.clear(); + vertices.clear(); + + parent.get_simplex_vertices(idx_below, _dim, parent.n, std::back_inserter(vertices)); + + for (auto v : vertices) { + neighbor_it.push_back(dist.neighbors[v].rbegin()); + neighbor_end.push_back(dist.neighbors[v].rend()); + } + } + + bool has_next(bool all_cofaces = true) { + for (auto &it0 = neighbor_it[0], &end0 = neighbor_end[0]; it0 != end0; ++it0) { + x = *it0; + for (size_t idx = 1; idx < neighbor_it.size(); ++idx) { + auto &it = neighbor_it[idx], end = neighbor_end[idx]; + while (get_index(*it) > get_index(x)) + if (++it == end) return false; + auto y = *it; + if (get_index(y) != get_index(x)) + goto continue_outer; + else + x = std::max(x, y); + } + return all_cofaces || + !(k > 0 && + parent.get_next_vertex(max_vertex_below, idx_below, k) > get_index(x)); + continue_outer:; + } + return false; + } + + diameter_entry_t next() { + ++neighbor_it[0]; + + while (k > 0 && parent.get_next_vertex(max_vertex_below, idx_below, k) > get_index(x)) { + idx_below -= binomial_coeff(max_vertex_below, k); + idx_above += binomial_coeff(max_vertex_below, k + 1); + --k; + } + + value_t coface_diameter = std::max(get_diameter(simplex), get_diameter(x)); + + coefficient_t coface_coefficient = + (k & 1 ? -1 + modulus : 1) * get_coefficient(simplex) % modulus; + + return diameter_entry_t(coface_diameter, + idx_above + binomial_coeff(get_index(x), k + 1) + idx_below, + coface_coefficient); + } +}; + + +template <> std::vector ripser::get_edges() { + std::vector edges; + for (index_t index = binomial_coeff(n, 2); index-- > 0;) { + value_t diameter = compute_diameter(index, 1); + if (diameter <= threshold) edges.push_back(std::make_pair(diameter, index)); + } + return edges; +} + +template <> std::vector ripser::get_edges() { + std::vector edges; + for (index_t i = 0; i < n; ++i) + for (auto n : dist.neighbors[i]) { + index_t j = get_index(n); + if (i > j) edges.push_back(std::make_pair(get_diameter(n), get_edge_index(i, j))); + } + return edges; +} + +template <> void ripser:: +assemble_columns_to_reduce(std::vector& simplices, + std::vector& columns_to_reduce, + hash_map& pivot_column_index, index_t dim) { + index_t num_simplices = binomial_coeff(n, dim + 1); + + columns_to_reduce.clear(); + +#ifdef INDICATE_PROGRESS + std::cout << "\033[K" + << "assembling " << num_simplices << " columns" << std::flush << "\r"; +#endif + + for (index_t index = 0; index < num_simplices; ++index) { + if (pivot_column_index.find(index) == pivot_column_index.end()) { + value_t diameter = compute_diameter(index, dim); + if (diameter <= threshold) + columns_to_reduce.push_back(std::make_pair(diameter, index)); +#ifdef INDICATE_PROGRESS + if ((index + 1) % 1000000 == 0) + std::cout << "\033[K" + << "assembled " << columns_to_reduce.size() << " out of " + << (index + 1) << "/" << num_simplices << " columns" << std::flush + << "\r"; +#endif + } + } + +#ifdef INDICATE_PROGRESS + std::cout << "\033[K" + << "sorting " << num_simplices << " columns" << std::flush << "\r"; +#endif + + std::sort(columns_to_reduce.begin(), columns_to_reduce.end(), + greater_diameter_or_smaller_index()); +#ifdef INDICATE_PROGRESS + std::cout << "\033[K"; +#endif +} + +template <> void ripser:: +assemble_columns_to_reduce(std::vector& simplices, + std::vector& columns_to_reduce, + hash_map& pivot_column_index, + index_t dim) { + +#ifdef INDICATE_PROGRESS + std::cout << "\033[K" + << "assembling columns" << std::flush << "\r"; +#endif + + --dim; + columns_to_reduce.clear(); + + std::vector next_simplices; + + for (diameter_index_t simplex : simplices) { + simplex_coboundary_enumerator cofaces(simplex, dim, *this); + + while (cofaces.has_next(false)) { + auto coface = cofaces.next(); + + next_simplices.push_back(std::make_pair(get_diameter(coface), get_index(coface))); + + if (pivot_column_index.find(get_index(coface)) == pivot_column_index.end()) + columns_to_reduce.push_back( + std::make_pair(get_diameter(coface), get_index(coface))); + } + } + + simplices.swap(next_simplices); + +#ifdef INDICATE_PROGRESS + std::cout << "\033[K" + << "sorting " << columns_to_reduce.size() << " columns" << std::flush << "\r"; +#endif + + std::sort(columns_to_reduce.begin(), columns_to_reduce.end(), + greater_diameter_or_smaller_index()); +#ifdef INDICATE_PROGRESS + std::cout << "\033[K"; +#endif +} enum file_format { LOWER_DISTANCE_MATRIX, @@ -1072,8 +1158,6 @@ int main(int argc, char** argv) { std::cout << "distance matrix with " << dist.size() << " points" << std::endl; - auto value_range = std::minmax_element(dist.distances.begin(), dist.distances.end()); - value_t min = std::numeric_limits::infinity(), max = -std::numeric_limits::infinity(); for (auto d: dist.distances) { @@ -1097,114 +1181,3 @@ int main(int argc, char** argv) { .compute_barcodes(); } - -template <> void ripser::compute_barcodes() { - - std::vector columns_to_reduce; - - { - union_find dset(n); - std::vector edges; - for (index_t index = binomial_coeff(n, 2); index-- > 0;) { - value_t diameter = compute_diameter(index, 1); - if (diameter <= threshold) edges.push_back(std::make_pair(diameter, index)); - } - std::sort(edges.rbegin(), edges.rend(), - greater_diameter_or_smaller_index()); - -#ifdef PRINT_PERSISTENCE_PAIRS - std::cout << "persistence intervals in dim 0:" << std::endl; -#endif - - std::vector vertices_of_edge(2); - for (auto e : edges) { - vertices_of_edge.clear(); - get_simplex_vertices(get_index(e), 1, n, std::back_inserter(vertices_of_edge)); - index_t u = dset.find(vertices_of_edge[0]), v = dset.find(vertices_of_edge[1]); - - if (u != v) { -#ifdef PRINT_PERSISTENCE_PAIRS - if (get_diameter(e) != 0) - std::cout << " [0," << get_diameter(e) << ")" << std::endl; -#endif - dset.link(u, v); - } else - columns_to_reduce.push_back(e); - } - std::reverse(columns_to_reduce.begin(), columns_to_reduce.end()); - -#ifdef PRINT_PERSISTENCE_PAIRS - for (index_t i = 0; i < n; ++i) - if (dset.find(i) == i) std::cout << " [0, )" << std::endl << std::flush; -#endif - } - - for (index_t dim = 1; dim <= dim_max; ++dim) { - hash_map pivot_column_index; - pivot_column_index.reserve(columns_to_reduce.size()); - - compute_pairs(columns_to_reduce, pivot_column_index, dim); - - if (dim < dim_max) { - assemble_columns_to_reduce(columns_to_reduce, pivot_column_index, dim + 1); - } - } -} - -template <> void ripser::compute_barcodes() { - - std::vector columns_to_reduce; - - std::vector simplices; - - { - union_find dset(n); - std::vector& edges = simplices; - for (index_t i = 0; i < n; ++i) - for (auto n : dist.neighbors[i]) { - index_t j = get_index(n); - if (i > j) edges.push_back(std::make_pair(get_diameter(n), get_edge_index(i, j))); - } - std::sort(edges.rbegin(), edges.rend(), - greater_diameter_or_smaller_index()); - -#ifdef PRINT_PERSISTENCE_PAIRS - std::cout << "persistence intervals in dim 0:" << std::endl; -#endif - - std::vector vertices_of_edge(2); - for (auto e : edges) { - vertices_of_edge.clear(); - get_simplex_vertices(get_index(e), 1, n, std::back_inserter(vertices_of_edge)); - index_t u = dset.find(vertices_of_edge[0]), v = dset.find(vertices_of_edge[1]); - - if (u != v) { -#ifdef PRINT_PERSISTENCE_PAIRS - if (get_diameter(e) != 0) - std::cout << " [0," << get_diameter(e) << ")" << std::endl; -#endif - dset.link(u, v); - } else - columns_to_reduce.push_back(e); - } - std::reverse(columns_to_reduce.begin(), columns_to_reduce.end()); - -#ifdef PRINT_PERSISTENCE_PAIRS - for (index_t i = 0; i < n; ++i) - if (dset.find(i) == i) std::cout << " [0, )" << std::endl << std::flush; -#endif - } - - for (index_t dim = 1; dim <= dim_max; ++dim) { - hash_map pivot_column_index; - pivot_column_index.reserve(columns_to_reduce.size()); - - compute_pairs(columns_to_reduce, pivot_column_index, - dim); - - if (dim < dim_max) { - assemble_sparse_columns_to_reduce(simplices, columns_to_reduce, pivot_column_index, - dim + 1); - } - } -} -- cgit v1.2.3