diff options
Diffstat (limited to 'ripser.cpp')
-rw-r--r-- | ripser.cpp | 703 |
1 files changed, 306 insertions, 397 deletions
@@ -43,49 +43,52 @@ //#define USE_GOOGLE_HASHMAP -#include <algorithm> -#include <cassert> -#include <cmath> #include <fstream> #include <iostream> #include <numeric> #include <queue> #include <sstream> #include <unordered_map> +#include <chrono> #ifdef USE_GOOGLE_HASHMAP #include <sparsehash/dense_hash_map> -template <class Key, class T> class hash_map : public google::dense_hash_map<Key, T> { +template <class Key, class T, class H, class E> +class hash_map : public google::dense_hash_map<Key, T, H, E> { public: - explicit hash_map() : google::dense_hash_map<Key, T>() { this->set_empty_key(-1); } + explicit hash_map() : google::dense_hash_map<Key, T, H, E>() { this->set_empty_key(-1); } inline void reserve(size_t hint) { this->resize(hint); } }; #else -template <class Key, class T> class hash_map : public std::unordered_map<Key, T> {}; +template <class Key, class T, class H, class E> +class hash_map : public std::unordered_map<Key, T, H, E> {}; #endif typedef float value_t; typedef int64_t index_t; -typedef int16_t coefficient_t; +typedef uint16_t coefficient_t; + +static const std::chrono::milliseconds time_step(40); + +static const std::string clear_line("\r\033[K"); class binomial_coeff_table { std::vector<std::vector<index_t>> B; public: binomial_coeff_table(index_t n, index_t k) : B(n + 1) { - for (index_t i = 0; i <= n; i++) { - B[i].resize(k + 1); - for (index_t j = 0; j <= std::min(i, k); j++) - if (j == 0 || j == i) - B[i][j] = 1; - else - B[i][j] = B[i - 1][j - 1] + B[i - 1][j]; + for (index_t i = 0; i <= n; ++i) { + B[i].resize(k + 1, 0); + B[i][0] = 1; + if (i <= k) B[i][i] = 1; + for (index_t j = 1; j < std::min(i, k + 1); ++j) + B[i][j] = B[i - 1][j - 1] + B[i - 1][j]; } } index_t operator()(index_t n, index_t k) const { - assert(n < B.size() && k < B[n].size()); + assert(n < B.size() && k < B[n].size() && n >= k - 1); return B[n][k]; } }; @@ -102,7 +105,7 @@ std::vector<coefficient_t> multiplicative_inverse_vector(const coefficient_t m) inverse[1] = 1; // m = a * (m / a) + m % a // Multipying with inverse(a) * inverse(m % a): - // 0 = inverse(m % a) * (m / a) + inverse(a) (mod m) + // 0 = inverse(m % a) * (m / a) + inverse(a) (mod m) for (coefficient_t a = 2; a < m; ++a) inverse[a] = m - (inverse[m % a] * (m / a)) % m; return inverse; } @@ -114,14 +117,12 @@ struct __attribute__((packed)) entry_t { coefficient_t coefficient; entry_t(index_t _index, coefficient_t _coefficient) : index(_index), coefficient(_coefficient) {} + entry_t(index_t _index) : index(_index), coefficient(0) {} entry_t() : index(0), coefficient(0) {} }; static_assert(sizeof(entry_t) == sizeof(index_t), "size of entry_t is not the same as index_t"); -entry_t make_entry(index_t _index, coefficient_t _coefficient) { - return entry_t(_index, _coefficient); -} index_t get_index(const entry_t& e) { return e.index; } index_t get_coefficient(const entry_t& e) { return e.coefficient; } void set_coefficient(entry_t& e, const coefficient_t c) { e.coefficient = c; } @@ -147,6 +148,18 @@ void set_coefficient(entry_t& e, const coefficient_t c) {} const entry_t& get_entry(const entry_t& e) { return e; } +struct entry_hash { + std::size_t operator()(const entry_t& e) const { return std::hash<index_t>()(get_index(e)); } +}; + +struct equal_index { + bool operator()(const entry_t& e, const entry_t& f) const { + return get_index(e) == get_index(f); + } +}; + +typedef hash_map<entry_t, index_t, entry_hash, equal_index> entry_hash_map; + typedef std::pair<value_t, index_t> diameter_index_t; value_t get_diameter(const diameter_index_t& i) { return i.first; } index_t get_index(const diameter_index_t& i) { return i.second; } @@ -155,14 +168,13 @@ typedef std::pair<index_t, value_t> index_diameter_t; index_t get_index(const index_diameter_t& i) { return i.first; } value_t get_diameter(const index_diameter_t& i) { return i.second; } -class diameter_entry_t : public std::pair<value_t, entry_t> { -public: - diameter_entry_t() {} +struct diameter_entry_t : std::pair<value_t, entry_t> { + using std::pair<value_t, entry_t>::pair; diameter_entry_t(value_t _diameter, index_t _index, coefficient_t _coefficient) - : std::pair<value_t, entry_t>(_diameter, make_entry(_index, _coefficient)) {} + : diameter_entry_t(_diameter, {_index, _coefficient}) {} diameter_entry_t(const diameter_index_t& _diameter_index, coefficient_t _coefficient) - : std::pair<value_t, entry_t>(get_diameter(_diameter_index), - make_entry(get_index(_diameter_index), _coefficient)) {} + : diameter_entry_t(get_diameter(_diameter_index), + {get_index(_diameter_index), _coefficient}) {} diameter_entry_t(const index_t& _index) : diameter_entry_t(0, _index, 0) {} }; @@ -186,13 +198,10 @@ template <typename Entry> struct greater_diameter_or_smaller_index { enum compressed_matrix_layout { LOWER_TRIANGULAR, UPPER_TRIANGULAR }; -template <compressed_matrix_layout Layout> class compressed_distance_matrix { -public: +template <compressed_matrix_layout Layout> struct compressed_distance_matrix { std::vector<value_t> distances; std::vector<value_t*> rows; - void init_rows(); - compressed_distance_matrix(std::vector<value_t>&& _distances) : distances(std::move(_distances)), rows((1 + std::sqrt(1 + 8 * distances.size())) / 2) { assert(distances.size() == size() * (size() - 1) / 2); @@ -209,36 +218,14 @@ public: } value_t operator()(const index_t i, const index_t j) const; - size_t size() const { return rows.size(); } + void init_rows(); }; -class sparse_distance_matrix { -public: - std::vector<std::vector<index_diameter_t>> neighbors; - - index_t num_edges; - - sparse_distance_matrix(std::vector<std::vector<index_diameter_t>>&& _neighbors, - index_t _num_edges) - : neighbors(std::move(_neighbors)), num_edges(_num_edges) {} - - template <typename DistanceMatrix> - sparse_distance_matrix(const DistanceMatrix& mat, value_t threshold) - : neighbors(mat.size()), num_edges(0) { - - for (index_t i = 0; i < size(); ++i) - for (index_t j = 0; j < size(); ++j) - if (i != j && mat(i, j) <= threshold) { - ++num_edges; - neighbors[i].push_back(std::make_pair(j, mat(i, j))); - } - } - - size_t size() const { return neighbors.size(); } -}; +typedef compressed_distance_matrix<LOWER_TRIANGULAR> compressed_lower_distance_matrix; +typedef compressed_distance_matrix<UPPER_TRIANGULAR> compressed_upper_distance_matrix; -template <> void compressed_distance_matrix<LOWER_TRIANGULAR>::init_rows() { +template <> void compressed_lower_distance_matrix::init_rows() { value_t* pointer = &distances[0]; for (index_t i = 1; i < size(); ++i) { rows[i] = pointer; @@ -246,7 +233,7 @@ template <> void compressed_distance_matrix<LOWER_TRIANGULAR>::init_rows() { } } -template <> void compressed_distance_matrix<UPPER_TRIANGULAR>::init_rows() { +template <> void compressed_upper_distance_matrix::init_rows() { value_t* pointer = &distances[0] - 1; for (index_t i = 0; i < size() - 1; ++i) { rows[i] = pointer; @@ -255,22 +242,43 @@ template <> void compressed_distance_matrix<UPPER_TRIANGULAR>::init_rows() { } template <> -value_t compressed_distance_matrix<UPPER_TRIANGULAR>::operator()(const index_t i, - const index_t j) const { - return i == j ? 0 : i > j ? rows[j][i] : rows[i][j]; +value_t compressed_lower_distance_matrix::operator()(const index_t i, const index_t j) const { + return i == j ? 0 : i < j ? rows[j][i] : rows[i][j]; } template <> -value_t compressed_distance_matrix<LOWER_TRIANGULAR>::operator()(const index_t i, - const index_t j) const { - return i == j ? 0 : i < j ? rows[j][i] : rows[i][j]; +value_t compressed_upper_distance_matrix::operator()(const index_t i, const index_t j) const { + return i == j ? 0 : i > j ? rows[j][i] : rows[i][j]; } -typedef compressed_distance_matrix<LOWER_TRIANGULAR> compressed_lower_distance_matrix; -typedef compressed_distance_matrix<UPPER_TRIANGULAR> compressed_upper_distance_matrix; +struct sparse_distance_matrix { + std::vector<std::vector<index_diameter_t>> neighbors; -class euclidean_distance_matrix { -public: + index_t num_edges; + + mutable std::vector<std::vector<index_diameter_t>::const_reverse_iterator> neighbor_it; + mutable std::vector<std::vector<index_diameter_t>::const_reverse_iterator> neighbor_end; + + sparse_distance_matrix(std::vector<std::vector<index_diameter_t>>&& _neighbors, + index_t _num_edges) + : neighbors(std::move(_neighbors)), num_edges(_num_edges) {} + + template <typename DistanceMatrix> + sparse_distance_matrix(const DistanceMatrix& mat, const value_t threshold) + : neighbors(mat.size()), num_edges(0) { + + for (index_t i = 0; i < size(); ++i) + for (index_t j = 0; j < size(); ++j) + if (i != j && mat(i, j) <= threshold) { + ++num_edges; + neighbors[i].push_back({j, mat(i, j)}); + } + } + + size_t size() const { return neighbors.size(); } +}; + +struct euclidean_distance_matrix { std::vector<std::vector<value_t>> points; euclidean_distance_matrix(std::vector<std::vector<value_t>>&& _points) @@ -294,7 +302,7 @@ class union_find { std::vector<uint8_t> rank; public: - union_find(index_t n) : parent(n), rank(n, 0) { + union_find(const index_t n) : parent(n), rank(n, 0) { for (index_t i = 0; i < n; ++i) parent[i] = i; } @@ -307,6 +315,7 @@ public: } return z; } + void link(index_t x, index_t y) { if ((x = find(x)) == (y = find(y))) return; if (rank[x] > rank[y]) @@ -318,7 +327,7 @@ public: } }; -template <typename Heap> diameter_entry_t pop_pivot(Heap& column, coefficient_t modulus) { +template <typename Column> diameter_entry_t pop_pivot(Column& column, coefficient_t modulus) { #ifdef USE_COEFFICIENTS diameter_entry_t pivot(-1); while (!column.empty()) { @@ -350,60 +359,47 @@ template <typename Heap> diameter_entry_t pop_pivot(Heap& column, coefficient_t return pivot; } -template <typename Heap> diameter_entry_t get_pivot(Heap& column, coefficient_t modulus) { +template <typename Column> diameter_entry_t get_pivot(Column& column, const coefficient_t modulus) { diameter_entry_t result = pop_pivot(column, modulus); if (get_index(result) != -1) column.push(result); return result; } +template <typename T> T begin(std::pair<T, T>& p) { return p.first; } +template <typename T> T end(std::pair<T, T>& p) { return p.second; } + template <typename ValueType> class compressed_sparse_matrix { std::vector<size_t> bounds; std::vector<ValueType> entries; + typedef typename std::vector<ValueType>::iterator iterator; + typedef std::pair<iterator, iterator> iterator_pair; + public: size_t size() const { return bounds.size(); } - typename std::vector<ValueType>::const_iterator cbegin(size_t index) const { - assert(index < size()); - return index == 0 ? entries.cbegin() : entries.cbegin() + bounds[index - 1]; - } - - typename std::vector<ValueType>::const_iterator cend(size_t index) const { - assert(index < size()); - return entries.cbegin() + bounds[index]; - } - - template <typename Iterator> void append_column(Iterator begin, Iterator end) { - for (Iterator it = begin; it != end; ++it) { entries.push_back(*it); } - bounds.push_back(entries.size()); + iterator_pair subrange(const index_t index) { + return {entries.begin() + (index == 0 ? 0 : bounds[index - 1]), + entries.begin() + bounds[index]}; } void append_column() { bounds.push_back(entries.size()); } - void push_back(ValueType e) { + void push_back(const ValueType e) { assert(0 < size()); entries.push_back(e); ++bounds.back(); } - - void pop_back() { - assert(0 < size()); - entries.pop_back(); - --bounds.back(); - } - - template <typename Collection> void append_column(const Collection collection) { - append_column(collection.cbegin(), collection.cend()); - } }; -template <class Predicate> index_t upper_bound(index_t top, Predicate pred) { +template <class Predicate> +index_t get_max(index_t top, const index_t bottom, const Predicate pred) { if (!pred(top)) { - index_t count = top; + index_t count = top - bottom; while (count > 0) { - index_t step = count >> 1; - if (!pred(top - step)) { - top -= step + 1; + index_t step = count >> 1, mid = top - step; + if (!pred(mid)) { + top = mid - 1; count -= step + 1; } else count = step; @@ -413,16 +409,13 @@ template <class Predicate> index_t upper_bound(index_t top, Predicate pred) { } template <typename DistanceMatrix> class ripser { - DistanceMatrix dist; - index_t n, dim_max; - value_t threshold; - float ratio; - coefficient_t modulus; + const DistanceMatrix dist; + const index_t n, dim_max; + const value_t threshold; + const float ratio; + const coefficient_t modulus; const binomial_coeff_table binomial_coeff; - std::vector<coefficient_t> multiplicative_inverse; - mutable std::vector<index_t> vertices; - mutable std::vector<std::vector<index_diameter_t>::const_reverse_iterator> neighbor_it; - mutable std::vector<std::vector<index_diameter_t>::const_reverse_iterator> neighbor_end; + const std::vector<coefficient_t> multiplicative_inverse; mutable std::vector<diameter_entry_t> coface_entries; public: @@ -433,9 +426,8 @@ public: ratio(_ratio), modulus(_modulus), binomial_coeff(n, dim_max + 2), multiplicative_inverse(multiplicative_inverse_vector(_modulus)) {} - index_t get_next_vertex(index_t& v, const index_t idx, const index_t k) const { - return v = upper_bound( - v, [&](const index_t& w) -> bool { return (binomial_coeff(w, k) <= idx); }); + index_t get_max_vertex(const index_t idx, const index_t k, const index_t n) const { + return get_max(n, k - 1, [&](index_t w) -> bool { return (binomial_coeff(w, k) <= idx); }); } index_t get_edge_index(const index_t i, const index_t j) const { @@ -443,53 +435,90 @@ public: } template <typename OutputIterator> - OutputIterator get_simplex_vertices(index_t idx, const index_t dim, index_t v, + OutputIterator get_simplex_vertices(index_t idx, const index_t dim, index_t n, OutputIterator out) const { - --v; + --n; for (index_t k = dim + 1; k > 0; --k) { - get_next_vertex(v, idx, k); - *out++ = v; - idx -= binomial_coeff(v, k); + n = get_max_vertex(idx, k, n); + *out++ = n; + idx -= binomial_coeff(n, k); } return out; } - value_t compute_diameter(const index_t index, index_t dim) const { - value_t diam = -std::numeric_limits<value_t>::infinity(); - - vertices.clear(); - get_simplex_vertices(index, dim, dist.size(), std::back_inserter(vertices)); - - for (index_t i = 0; i <= dim; ++i) - for (index_t j = 0; j < i; ++j) { - diam = std::max(diam, dist(vertices[i], vertices[j])); - } - return diam; - } - class simplex_coboundary_enumerator; void assemble_columns_to_reduce(std::vector<diameter_index_t>& simplices, std::vector<diameter_index_t>& columns_to_reduce, - hash_map<index_t, index_t>& pivot_column_index, index_t dim); + entry_hash_map& pivot_column_index, index_t dim) { - void compute_dim_0_pairs(std::vector<diameter_index_t>& edges, - std::vector<diameter_index_t>& columns_to_reduce) { - union_find dset(n); +#ifdef INDICATE_PROGRESS + std::cerr << clear_line + << "assembling columns" + << std::flush; +#endif + std::chrono::steady_clock::time_point next = std::chrono::steady_clock::now() + time_step; - edges = get_edges(); + --dim; + columns_to_reduce.clear(); - std::sort(edges.rbegin(), edges.rend(), + std::vector<diameter_index_t> next_simplices; + + index_t i = 0; + + for (diameter_index_t& simplex : simplices) { + ++i; + + simplex_coboundary_enumerator cofaces(diameter_entry_t(simplex, 1), dim, *this); + + while (cofaces.has_next(false)) { +#ifdef INDICATE_PROGRESS + if (std::chrono::steady_clock::now() > next) { + std::cerr << clear_line + << "assembling " << next_simplices.size() << " columns (processing " + << std::distance(&simplices[0], &simplex) << "/" << simplices.size() << " simplices)" + << std::flush; + next = std::chrono::steady_clock::now() + time_step; + } +#endif + auto coface = cofaces.next(); + + next_simplices.push_back({get_diameter(coface), get_index(coface)}); + + if (pivot_column_index.find(get_entry(coface)) == pivot_column_index.end()) + columns_to_reduce.push_back({get_diameter(coface), get_index(coface)}); + } + } + + simplices.swap(next_simplices); + +#ifdef INDICATE_PROGRESS + std::cerr << clear_line + << "sorting " << columns_to_reduce.size() << " columns" + << std::flush; +#endif + + std::sort(columns_to_reduce.begin(), columns_to_reduce.end(), greater_diameter_or_smaller_index<diameter_index_t>()); +#ifdef INDICATE_PROGRESS + std::cerr << clear_line << std::flush; +#endif + } + void compute_dim_0_pairs(std::vector<diameter_index_t>& edges, + std::vector<diameter_index_t>& columns_to_reduce) { #ifdef PRINT_PERSISTENCE_PAIRS std::cout << "persistence intervals in dim 0:" << std::endl; #endif + union_find dset(n); + + edges = get_edges(); + std::sort(edges.rbegin(), edges.rend(), + greater_diameter_or_smaller_index<diameter_index_t>()); std::vector<index_t> vertices_of_edge(2); for (auto e : edges) { - vertices_of_edge.clear(); - get_simplex_vertices(get_index(e), 1, n, std::back_inserter(vertices_of_edge)); + get_simplex_vertices(get_index(e), 1, n, vertices_of_edge.rbegin()); index_t u = dset.find(vertices_of_edge[0]), v = dset.find(vertices_of_edge[1]); if (u != v) { @@ -505,149 +534,166 @@ public: #ifdef PRINT_PERSISTENCE_PAIRS for (index_t i = 0; i < n; ++i) - if (dset.find(i) == i) std::cout << " [0, )" << std::endl << std::flush; + if (dset.find(i) == i) std::cout << " [0, )" << std::endl; #endif } - template <typename Column, typename Iterator> - diameter_entry_t add_coboundary_and_get_pivot(Iterator column_begin, Iterator column_end, - coefficient_t factor_column_to_add, - Column& working_reduction_column, - Column& working_coboundary, const index_t& dim, - hash_map<index_t, index_t>& pivot_column_index, - bool& might_be_apparent_pair); + template <typename Column> + diameter_entry_t init_coboundary_and_get_pivot(const diameter_entry_t simplex, + Column& working_coboundary, const index_t& dim, + entry_hash_map& pivot_column_index) { + bool check_for_emergent_pair = true; + coface_entries.clear(); + simplex_coboundary_enumerator cofaces(simplex, dim, *this); + while (cofaces.has_next()) { + diameter_entry_t coface = cofaces.next(); + if (get_diameter(coface) <= threshold) { + coface_entries.push_back(coface); + if (check_for_emergent_pair && (get_diameter(simplex) == get_diameter(coface))) { + if (pivot_column_index.find(get_entry(coface)) == pivot_column_index.end()) + return coface; + check_for_emergent_pair = false; + } + } + } + for (auto coface : coface_entries) working_coboundary.push(coface); + return get_pivot(working_coboundary, modulus); + } - void compute_pairs(std::vector<diameter_index_t>& columns_to_reduce, - hash_map<index_t, index_t>& pivot_column_index, index_t dim) { + template <typename Column> + void add_coboundary(const diameter_entry_t simplex, Column& working_reduction_column, + Column& working_coboundary, const index_t& dim) { + working_reduction_column.push(simplex); + simplex_coboundary_enumerator cofaces(simplex, dim, *this); + while (cofaces.has_next()) { + diameter_entry_t coface = cofaces.next(); + if (get_diameter(coface) <= threshold) working_coboundary.push(coface); + } + } + + template <typename Column> + void add_coboundary(compressed_sparse_matrix<diameter_entry_t>& reduction_matrix, + const std::vector<diameter_index_t>& columns_to_reduce, + const index_t index_column_to_add, const coefficient_t factor, + Column& working_reduction_column, Column& working_coboundary, + const index_t& dim) { + diameter_entry_t column_to_add(columns_to_reduce[index_column_to_add], factor); + add_coboundary(column_to_add, working_reduction_column, working_coboundary, dim); + + for (auto simplex : reduction_matrix.subrange(index_column_to_add)) { + set_coefficient(simplex, get_coefficient(simplex) * factor % modulus); + working_reduction_column.push(simplex); + add_coboundary(simplex, working_reduction_column, working_coboundary, dim); + } + } + + void compute_pairs(const std::vector<diameter_index_t>& columns_to_reduce, + entry_hash_map& pivot_column_index, const index_t dim) { #ifdef PRINT_PERSISTENCE_PAIRS std::cout << "persistence intervals in dim " << dim << ":" << std::endl; #endif compressed_sparse_matrix<diameter_entry_t> reduction_matrix; + + std::chrono::steady_clock::time_point next = std::chrono::steady_clock::now() + time_step; for (index_t index_column_to_reduce = 0; index_column_to_reduce < columns_to_reduce.size(); ++index_column_to_reduce) { - auto column_to_reduce = columns_to_reduce[index_column_to_reduce]; - std::priority_queue<diameter_entry_t, std::vector<diameter_entry_t>, - greater_diameter_or_smaller_index<diameter_entry_t>> - working_reduction_column; + diameter_entry_t column_to_reduce(columns_to_reduce[index_column_to_reduce], 1); + value_t diameter = get_diameter(column_to_reduce); + + reduction_matrix.append_column(); std::priority_queue<diameter_entry_t, std::vector<diameter_entry_t>, greater_diameter_or_smaller_index<diameter_entry_t>> - working_coboundary; + working_reduction_column, working_coboundary; - value_t diameter = get_diameter(column_to_reduce); + diameter_entry_t pivot = init_coboundary_and_get_pivot( + column_to_reduce, working_coboundary, dim, pivot_column_index); + while (true) { #ifdef INDICATE_PROGRESS - if ((index_column_to_reduce + 1) % 1000000 == 0) - std::cout << "\033[K" - << "reducing column " << index_column_to_reduce + 1 << "/" - << columns_to_reduce.size() << " (diameter " << diameter << ")" - << std::flush << "\r"; + if (std::chrono::steady_clock::now() > next) { + std::cerr << clear_line + << "reducing column " << index_column_to_reduce + 1 << "/" + << columns_to_reduce.size() << " (diameter " << diameter << ")" + << std::flush; + next = std::chrono::steady_clock::now() + time_step; + } #endif - - index_t index_column_to_add = index_column_to_reduce; - - diameter_entry_t pivot; - - // start with factor 1 in order to initialize working_coboundary - // with the coboundary of the simplex with index column_to_reduce - coefficient_t factor_column_to_add = 1; - - // initialize reduction_matrix as identity matrix - reduction_matrix.append_column(); - reduction_matrix.push_back(diameter_entry_t(column_to_reduce, 1)); - - bool might_be_apparent_pair = (index_column_to_reduce == index_column_to_add); - - while (true) { - pivot = add_coboundary_and_get_pivot( - reduction_matrix.cbegin(index_column_to_add), - reduction_matrix.cend(index_column_to_add), - factor_column_to_add, working_reduction_column, - working_coboundary, dim, pivot_column_index, might_be_apparent_pair); - if (get_index(pivot) != -1) { - auto pair = pivot_column_index.find(get_index(pivot)); - + auto pair = pivot_column_index.find(get_entry(pivot)); if (pair != pivot_column_index.end()) { - index_column_to_add = pair->second; - factor_column_to_add = modulus - get_coefficient(pivot); + entry_t other_pivot = pair->first; + index_t index_column_to_add = pair->second; + coefficient_t factor = + modulus - get_coefficient(pivot) * + multiplicative_inverse[get_coefficient(other_pivot)] % + modulus; + + add_coboundary(reduction_matrix, columns_to_reduce, index_column_to_add, + factor, working_reduction_column, working_coboundary, dim); + + pivot = get_pivot(working_coboundary, modulus); } else { #ifdef PRINT_PERSISTENCE_PAIRS value_t death = get_diameter(pivot); if (death > diameter * ratio) { #ifdef INDICATE_PROGRESS - std::cout << "\033[K"; + std::cerr << clear_line << std::flush; #endif - std::cout << " [" << diameter << "," << death << ")" << std::endl - << std::flush; + std::cout << " [" << diameter << "," << death << ")" << std::endl; } #endif - pivot_column_index.insert( - std::make_pair(get_index(pivot), index_column_to_reduce)); - -#ifdef USE_COEFFICIENTS - const coefficient_t inverse = - multiplicative_inverse[get_coefficient(pivot)]; -#endif - - // replace current column of reduction_matrix (with a single diagonal 1 - // entry) by reduction_column (possibly with a different entry on the - // diagonal) - reduction_matrix.pop_back(); + pivot_column_index.insert({get_entry(pivot), index_column_to_reduce}); while (true) { diameter_entry_t e = pop_pivot(working_reduction_column, modulus); if (get_index(e) == -1) break; -#ifdef USE_COEFFICIENTS - set_coefficient(e, inverse * get_coefficient(e) % modulus); assert(get_coefficient(e) > 0); -#endif reduction_matrix.push_back(e); } break; } } else { #ifdef PRINT_PERSISTENCE_PAIRS - std::cout << " [" << diameter << ", )" << std::endl << std::flush; +#ifdef INDICATE_PROGRESS + std::cerr << clear_line << std::flush; +#endif + std::cout << " [" << diameter << ", )" << std::endl; #endif break; } } } - #ifdef INDICATE_PROGRESS - std::cout << "\033[K"; + std::cerr << clear_line << std::flush; #endif } std::vector<diameter_index_t> get_edges(); void compute_barcodes() { - std::vector<diameter_index_t> simplices, columns_to_reduce; compute_dim_0_pairs(simplices, columns_to_reduce); for (index_t dim = 1; dim <= dim_max; ++dim) { - hash_map<index_t, index_t> pivot_column_index; + entry_hash_map pivot_column_index; pivot_column_index.reserve(columns_to_reduce.size()); compute_pairs(columns_to_reduce, pivot_column_index, dim); - if (dim < dim_max) { + if (dim < dim_max) assemble_columns_to_reduce(simplices, columns_to_reduce, pivot_column_index, dim + 1); - } } } }; template <> class ripser<compressed_lower_distance_matrix>::simplex_coboundary_enumerator { -private: index_t idx_below, idx_above, v, k; std::vector<index_t> vertices; const diameter_entry_t simplex; @@ -656,16 +702,17 @@ private: const binomial_coeff_table& binomial_coeff; public: - simplex_coboundary_enumerator(const diameter_entry_t _simplex, index_t _dim, + simplex_coboundary_enumerator(const diameter_entry_t _simplex, const index_t _dim, const ripser& parent) : idx_below(get_index(_simplex)), idx_above(0), v(parent.n - 1), k(_dim + 1), vertices(_dim + 1), simplex(_simplex), modulus(parent.modulus), dist(parent.dist), binomial_coeff(parent.binomial_coeff) { - parent.get_simplex_vertices(get_index(_simplex), _dim, parent.n, vertices.begin()); + parent.get_simplex_vertices(get_index(_simplex), _dim, parent.n, vertices.rbegin()); } - bool has_next() { + bool has_next(bool all_cofaces = true) { while ((v != -1) && (binomial_coeff(v, k) <= idx_below)) { + if (!all_cofaces) return false; idx_below -= binomial_coeff(v, k); idx_above += binomial_coeff(v, k + 1); --v; @@ -680,72 +727,34 @@ public: for (index_t w : vertices) coface_diameter = std::max(coface_diameter, dist(v, w)); index_t coface_index = idx_above + binomial_coeff(v--, k + 1) + idx_below; coefficient_t coface_coefficient = - (k & 1 ? -1 + modulus : 1) * get_coefficient(simplex) % modulus; + (k & 1 ? modulus - 1 : 1) * get_coefficient(simplex) % modulus; return diameter_entry_t(coface_diameter, coface_index, coface_coefficient); } }; -template <typename DistanceMatrix> -template <typename Column, typename Iterator> -diameter_entry_t ripser<DistanceMatrix>::add_coboundary_and_get_pivot( - Iterator column_begin, Iterator column_end, coefficient_t factor_column_to_add, - Column& working_reduction_column, Column& working_coboundary, const index_t& dim, - hash_map<index_t, index_t>& pivot_column_index, bool& might_be_apparent_pair) { - for (auto it = column_begin; it != column_end; ++it) { - diameter_entry_t simplex = *it; - set_coefficient(simplex, get_coefficient(simplex) * factor_column_to_add % modulus); - - working_reduction_column.push(simplex); - - coface_entries.clear(); - simplex_coboundary_enumerator cofaces(simplex, dim, *this); - while (cofaces.has_next()) { - diameter_entry_t coface = cofaces.next(); - if (get_diameter(coface) <= threshold) { - coface_entries.push_back(coface); - if (might_be_apparent_pair && (get_diameter(simplex) == get_diameter(coface))) { - if (pivot_column_index.find(get_index(coface)) == pivot_column_index.end()) { - return coface; - } - might_be_apparent_pair = false; - } - } - } - for (auto coface : coface_entries) working_coboundary.push(coface); - } - - return get_pivot(working_coboundary, modulus); -} - template <> class ripser<sparse_distance_matrix>::simplex_coboundary_enumerator { -private: const ripser& parent; - index_t idx_below, idx_above, v, k, max_vertex_below; + std::vector<index_t> vertices; const diameter_entry_t simplex; const coefficient_t modulus; const sparse_distance_matrix& dist; const binomial_coeff_table& binomial_coeff; - - std::vector<index_t>& vertices; std::vector<std::vector<index_diameter_t>::const_reverse_iterator>& neighbor_it; std::vector<std::vector<index_diameter_t>::const_reverse_iterator>& neighbor_end; - index_diameter_t x; + index_diameter_t neighbor; public: - simplex_coboundary_enumerator(const diameter_entry_t _simplex, index_t _dim, + simplex_coboundary_enumerator(const diameter_entry_t _simplex, const index_t _dim, const ripser& _parent) : parent(_parent), idx_below(get_index(_simplex)), idx_above(0), v(parent.n - 1), k(_dim + 1), max_vertex_below(parent.n - 1), simplex(_simplex), modulus(parent.modulus), - dist(parent.dist), binomial_coeff(parent.binomial_coeff), vertices(parent.vertices), - neighbor_it(parent.neighbor_it), neighbor_end(parent.neighbor_end) { - + dist(parent.dist), binomial_coeff(parent.binomial_coeff), vertices(_dim + 1), + neighbor_it(dist.neighbor_it), neighbor_end(dist.neighbor_end) { neighbor_it.clear(); neighbor_end.clear(); - vertices.clear(); - - parent.get_simplex_vertices(idx_below, _dim, parent.n, std::back_inserter(vertices)); + parent.get_simplex_vertices(idx_below, _dim, parent.n, vertices.rbegin()); for (auto v : vertices) { neighbor_it.push_back(dist.neighbors[v].rbegin()); neighbor_end.push_back(dist.neighbors[v].rend()); @@ -754,19 +763,23 @@ public: bool has_next(bool all_cofaces = true) { for (auto &it0 = neighbor_it[0], &end0 = neighbor_end[0]; it0 != end0; ++it0) { - x = *it0; + neighbor = *it0; for (size_t idx = 1; idx < neighbor_it.size(); ++idx) { auto &it = neighbor_it[idx], end = neighbor_end[idx]; - while (get_index(*it) > get_index(x)) + while (get_index(*it) > get_index(neighbor)) if (++it == end) return false; - auto y = *it; - if (get_index(y) != get_index(x)) + if (get_index(*it) != get_index(neighbor)) goto continue_outer; else - x = std::max(x, y); + neighbor = std::max(neighbor, *it); + } + while (k > 0 && vertices[k - 1] > get_index(neighbor)) { + if (!all_cofaces) return false; + idx_below -= binomial_coeff(vertices[k - 1], k); + idx_above += binomial_coeff(vertices[k - 1], k + 1); + --k; } - return all_cofaces || !(k > 0 && parent.get_next_vertex(max_vertex_below, idx_below, - k) > get_index(x)); + return true; continue_outer:; } return false; @@ -774,29 +787,21 @@ public: diameter_entry_t next() { ++neighbor_it[0]; - - while (k > 0 && parent.get_next_vertex(max_vertex_below, idx_below, k) > get_index(x)) { - idx_below -= binomial_coeff(max_vertex_below, k); - idx_above += binomial_coeff(max_vertex_below, k + 1); - --k; - } - - value_t coface_diameter = std::max(get_diameter(simplex), get_diameter(x)); - + value_t coface_diameter = std::max(get_diameter(simplex), get_diameter(neighbor)); + index_t coface_index = idx_above + binomial_coeff(get_index(neighbor), k + 1) + idx_below; coefficient_t coface_coefficient = - (k & 1 ? -1 + modulus : 1) * get_coefficient(simplex) % modulus; - - return diameter_entry_t(coface_diameter, - idx_above + binomial_coeff(get_index(x), k + 1) + idx_below, - coface_coefficient); + (k & 1 ? modulus - 1 : 1) * get_coefficient(simplex) % modulus; + return diameter_entry_t(coface_diameter, coface_index, coface_coefficient); } }; template <> std::vector<diameter_index_t> ripser<compressed_lower_distance_matrix>::get_edges() { std::vector<diameter_index_t> edges; + std::vector<index_t> vertices(2); for (index_t index = binomial_coeff(n, 2); index-- > 0;) { - value_t diameter = compute_diameter(index, 1); - if (diameter <= threshold) edges.push_back(std::make_pair(diameter, index)); + get_simplex_vertices(index, 1, dist.size(), vertices.rbegin()); + value_t length = dist(vertices[0], vertices[1]); + if (length <= threshold) edges.push_back({length, index}); } return edges; } @@ -806,92 +811,11 @@ template <> std::vector<diameter_index_t> ripser<sparse_distance_matrix>::get_ed for (index_t i = 0; i < n; ++i) for (auto n : dist.neighbors[i]) { index_t j = get_index(n); - if (i > j) edges.push_back(std::make_pair(get_diameter(n), get_edge_index(i, j))); + if (i > j) edges.push_back({get_diameter(n), get_edge_index(i, j)}); } return edges; } -template <> -void ripser<compressed_lower_distance_matrix>::assemble_columns_to_reduce( - std::vector<diameter_index_t>& simplices, std::vector<diameter_index_t>& columns_to_reduce, - hash_map<index_t, index_t>& pivot_column_index, index_t dim) { - index_t num_simplices = binomial_coeff(n, dim + 1); - - columns_to_reduce.clear(); - -#ifdef INDICATE_PROGRESS - std::cout << "\033[K" - << "assembling " << num_simplices << " columns" << std::flush << "\r"; -#endif - - for (index_t index = 0; index < num_simplices; ++index) { - if (pivot_column_index.find(index) == pivot_column_index.end()) { - value_t diameter = compute_diameter(index, dim); - if (diameter <= threshold) columns_to_reduce.push_back(std::make_pair(diameter, index)); -#ifdef INDICATE_PROGRESS - if ((index + 1) % 1000000 == 0) - std::cout << "\033[K" - << "assembled " << columns_to_reduce.size() << " out of " << (index + 1) - << "/" << num_simplices << " columns" << std::flush << "\r"; -#endif - } - } - -#ifdef INDICATE_PROGRESS - std::cout << "\033[K" - << "sorting " << num_simplices << " columns" << std::flush << "\r"; -#endif - - std::sort(columns_to_reduce.begin(), columns_to_reduce.end(), - greater_diameter_or_smaller_index<diameter_index_t>()); -#ifdef INDICATE_PROGRESS - std::cout << "\033[K"; -#endif -} - -template <> -void ripser<sparse_distance_matrix>::assemble_columns_to_reduce( - std::vector<diameter_index_t>& simplices, std::vector<diameter_index_t>& columns_to_reduce, - hash_map<index_t, index_t>& pivot_column_index, index_t dim) { - -#ifdef INDICATE_PROGRESS - std::cout << "\033[K" - << "assembling columns" << std::flush << "\r"; -#endif - - --dim; - columns_to_reduce.clear(); - - std::vector<diameter_index_t> next_simplices; - - for (diameter_index_t simplex : simplices) { - simplex_coboundary_enumerator cofaces(diameter_entry_t(simplex, 1), dim, *this); - - while (cofaces.has_next(false)) { - auto coface = cofaces.next(); - - next_simplices.push_back(std::make_pair(get_diameter(coface), get_index(coface))); - - if (pivot_column_index.find(get_index(coface)) == pivot_column_index.end()) - columns_to_reduce.push_back( - std::make_pair(get_diameter(coface), get_index(coface))); - } - } - - simplices.swap(next_simplices); - -#ifdef INDICATE_PROGRESS - std::cout << "\033[K" - << "sorting " << columns_to_reduce.size() << " columns" << std::flush << "\r"; -#endif - - std::sort(columns_to_reduce.begin(), columns_to_reduce.end(), - greater_diameter_or_smaller_index<diameter_index_t>()); -#ifdef INDICATE_PROGRESS - std::cout << "\033[K"; -#endif -} - enum file_format { LOWER_DISTANCE_MATRIX, UPPER_DISTANCE_MATRIX, @@ -899,12 +823,12 @@ enum file_format { POINT_CLOUD, DIPHA, SPARSE, - RIPSER + BINARY }; -template <typename T> T read(std::istream& s) { +template <typename T> T read(std::istream& input_stream) { T result; - s.read(reinterpret_cast<char*>(&result), sizeof(T)); + input_stream.read(reinterpret_cast<char*>(&result), sizeof(T)); return result; // on little endian: boost::endian::little_to_native(result); } @@ -925,14 +849,11 @@ compressed_lower_distance_matrix read_point_cloud(std::istream& input_stream) { } euclidean_distance_matrix eucl_dist(std::move(points)); - index_t n = eucl_dist.size(); - std::cout << "point cloud with " << n << " points in dimension " << eucl_dist.points.front().size() << std::endl; std::vector<value_t> distances; - for (int i = 0; i < n; ++i) for (int j = 0; j < i; ++j) distances.push_back(eucl_dist(i, j)); @@ -940,9 +861,7 @@ compressed_lower_distance_matrix read_point_cloud(std::istream& input_stream) { } sparse_distance_matrix read_sparse_distance_matrix(std::istream& input_stream) { - std::vector<std::vector<index_diameter_t>> neighbors; - index_t num_edges = 0; std::string line; @@ -955,8 +874,8 @@ sparse_distance_matrix read_sparse_distance_matrix(std::istream& input_stream) { s >> value; if (i != j) { neighbors.resize(std::max({neighbors.size(), i + 1, j + 1})); - neighbors[i].push_back(std::make_pair(j, value)); - neighbors[j].push_back(std::make_pair(i, value)); + neighbors[i].push_back({j, value}); + neighbors[j].push_back({i, value}); ++num_edges; } } @@ -1030,13 +949,13 @@ compressed_lower_distance_matrix read_dipha(std::istream& input_stream) { return compressed_lower_distance_matrix(std::move(distances)); } -compressed_lower_distance_matrix read_ripser(std::istream& input_stream) { +compressed_lower_distance_matrix read_binary(std::istream& input_stream) { std::vector<value_t> distances; while (!input_stream.eof()) distances.push_back(read<value_t>(input_stream)); return compressed_lower_distance_matrix(std::move(distances)); } -compressed_lower_distance_matrix read_file(std::istream& input_stream, file_format format) { +compressed_lower_distance_matrix read_file(std::istream& input_stream, const file_format format) { switch (format) { case LOWER_DISTANCE_MATRIX: return read_lower_distance_matrix(input_stream); @@ -1049,7 +968,7 @@ compressed_lower_distance_matrix read_file(std::istream& input_stream, file_form case DIPHA: return read_dipha(input_stream); default: - return read_ripser(input_stream); + return read_binary(input_stream); } } @@ -1072,34 +991,27 @@ void print_usage_and_exit(int exit_code) { << " dipha (distance matrix in DIPHA file format)" << std::endl << " sparse (sparse distance matrix in Sparse Triplet format)" << std::endl - << " ripser (distance matrix in Ripser binary file format)" + << " binary (lower triangular distance matrix in binary format)" << std::endl - << " --dim <k> compute persistent homology up to dimension <k>" << std::endl - << " --threshold <t> compute Rips complexes up to diameter <t>" << std::endl + << " --dim <k> compute persistent homology up to dimension k" << std::endl + << " --threshold <t> compute Rips complexes up to diameter t" << std::endl #ifdef USE_COEFFICIENTS - << " --modulus <p> compute homology with coefficients in the prime field Z/<p>Z" + << " --modulus <p> compute homology with coefficients in the prime field Z/pZ" #endif - << std::endl; - + << std::endl + << " --ratio <r> only show persistence pairs with death/birth ratio > r" << std::endl; exit(exit_code); } int main(int argc, char** argv) { - const char* filename = nullptr; file_format format = DISTANCE_MATRIX; index_t dim_max = 1; value_t threshold = std::numeric_limits<value_t>::max(); - float ratio = 1; - -#ifdef USE_COEFFICIENTS coefficient_t modulus = 2; -#else - const coefficient_t modulus = 2; -#endif for (index_t i = 1; i < argc; ++i) { const std::string arg(argv[i]); @@ -1122,20 +1034,20 @@ int main(int argc, char** argv) { if (next_pos != parameter.size()) print_usage_and_exit(-1); } else if (arg == "--format") { std::string parameter = std::string(argv[++i]); - if (parameter == "lower-distance") + if (parameter.rfind("lower", 0) == 0) format = LOWER_DISTANCE_MATRIX; - else if (parameter == "upper-distance") + else if (parameter.rfind("upper", 0) == 0) format = UPPER_DISTANCE_MATRIX; - else if (parameter == "distance") + else if (parameter.rfind("dist", 0) == 0) format = DISTANCE_MATRIX; - else if (parameter == "point-cloud") + else if (parameter.rfind("point", 0) == 0) format = POINT_CLOUD; else if (parameter == "dipha") format = DIPHA; else if (parameter == "sparse") format = SPARSE; - else if (parameter == "ripser") - format = RIPSER; + else if (parameter == "binary") + format = BINARY; else print_usage_and_exit(-1); #ifdef USE_COEFFICIENTS @@ -1167,7 +1079,6 @@ int main(int argc, char** argv) { ripser<sparse_distance_matrix>(std::move(dist), dim_max, threshold, ratio, modulus) .compute_barcodes(); } else { - compressed_lower_distance_matrix dist = read_file(filename ? file_stream : std::cin, format); @@ -1182,7 +1093,6 @@ int main(int argc, char** argv) { for (index_t j = 0; j < dist.size(); ++j) r_i = std::max(r_i, dist(i, j)); enclosing_radius = std::min(enclosing_radius, r_i); } - threshold = enclosing_radius; } @@ -1193,7 +1103,6 @@ int main(int argc, char** argv) { d != std::numeric_limits<value_t>::infinity() ? std::max(max, d) : max_finite; if (d <= threshold) ++num_edges; } - std::cout << "value range: [" << min << "," << max_finite << "]" << std::endl; if (threshold >= max) { |