diff options
author | Ulrich Bauer <mail@ulrich-bauer.org> | 2018-08-08 08:32:53 -0500 |
---|---|---|
committer | Ulrich Bauer <mail@ulrich-bauer.org> | 2018-08-08 08:32:53 -0500 |
commit | 9ca3cf8c664721686d16ff8f4326bcbc02e7d42f (patch) | |
tree | 1e7d12ad2f35d806291837dc0a7bd50d140fbdfc | |
parent | 2dee30433bc1a09d2cc2240c5fbb92328cde3d81 (diff) | |
parent | 22017ba657b7068fed2f1ff4f2d76413aa8fa1e7 (diff) |
Merge branch 'sparse-distance-matrix'
# Conflicts:
# benchmarks/benchmarks.txt
# ripser.xcodeproj/project.pbxproj
-rw-r--r-- | .clang-format | 2 | ||||
-rw-r--r-- | Makefile | 7 | ||||
-rw-r--r-- | README.md | 17 | ||||
-rw-r--r-- | ripser.cpp | 1002 |
4 files changed, 634 insertions, 394 deletions
diff --git a/.clang-format b/.clang-format index 598d7c2..1975b19 100644 --- a/.clang-format +++ b/.clang-format @@ -1,7 +1,7 @@ BasedOnStyle: LLVM IndentWidth: 4 TabWidth: 4 -ColumnLimit: 120 +ColumnLimit: 100 AccessModifierOffset: -4 AllowShortIfStatementsOnASingleLine: true AllowShortLoopsOnASingleLine: true @@ -1,7 +1,7 @@ build: ripser -all: ripser ripser-coeff ripser-reduction ripser-debug +all: ripser ripser-coeff ripser-reduction ripser-coeff-reduction ripser-debug ripser: ripser.cpp @@ -13,9 +13,12 @@ ripser-coeff: ripser.cpp ripser-reduction: ripser.cpp c++ -std=c++11 ripser.cpp -o ripser-reduction -Ofast -D NDEBUG -D ASSEMBLE_REDUCTION_MATRIX +ripser-coeff-reduction: ripser.cpp + c++ -std=c++11 ripser.cpp -o ripser-coeff-reduction -Ofast -D NDEBUG -D USE_COEFFICIENTS -D ASSEMBLE_REDUCTION_MATRIX + ripser-debug: ripser.cpp c++ -std=c++11 ripser.cpp -o ripser-debug -g clean: - rm -f ripser ripser-coeff ripser-reduction ripser-debug + rm -f ripser ripser-coeff ripser-reduction ripser-coeff-reduction ripser-debug @@ -1,13 +1,13 @@ # Ripser -Copyright © 2015–2016 [Ulrich Bauer]. +Copyright © 2015–2018 [Ulrich Bauer]. ### Description Ripser is a lean C++ code for the computation of Vietoris–Rips persistence barcodes. It can do just this one thing, but does it extremely well. -To see a live demo of Ripser's capabilities, go to [live.ripser.org]. The computation happens inside the browser (using [PNaCl] on Chrome and JavaScript via [Emscripten] on other browsers). +To see a live demo of Ripser's capabilities, go to [live.ripser.org]. The computation happens inside the browser (using [PNaCl] on Chrome and JavaScript via [Emscripten] on other browsers). The main features of Ripser: @@ -26,13 +26,14 @@ Input formats currently supported by Ripser: - [DIPHA] distance matrix data - point cloud data -Ripser's efficiency is based on a few important concepts and principles: +Ripser's efficiency is based on a few important concepts and principles, building on key previous and concurrent developments by other researchers in computational topology: - - Compute persistent *co*homology + - Compute persistent *co*homology (as suggested by [Vin de Silva, Dmitriy Morozov, and Mikael Vejdemo-Johansson](https://doi.org/10.1088/0266-5611/27/12/124003)) - Don't compute information that is never needed - (for the experts: employ the *clearing* optimization, aka *persistence with a twist*) - - Don't store information that can be readily recomputed - - Take obvious shortcuts (*apparent persistence pairs*) + (for the experts: employ the *clearing* optimization, aka *persistence with a twist*, as suggested by [Chao Chen and Michael Kerber](http://www.geometrie.tugraz.at/kerber/kerber_papers/ck-phcwat-11.pdf)) + - Don't store information that can be readily recomputed (in particular, the boundary matrix and the reduced boundary matrix) + - Take computational shortcuts (*apparent* and *emergent persistence pairs*) + - If no threshold is specified, choose the *enclosing radius* as the threshold, from which on homology is guaranteed to be trivial (as suggested by [Greg Henselman-Petrusek](https://github.com/Eetion/Eirene.jl)) ### Version @@ -74,7 +75,7 @@ The input is given either in a file whose name is passed as an argument, or thro - `--format`: use the specified file format for the input. The following formats are supported: - `lower-distance` (default if no format is specified): lower triangular distance matrix; a comma (or whitespace, or other non-numerical character) separated list of the distance matrix entries below the diagonal, sorted lexicographically by row index, then column index - `upper-distance`: upper triangular distance matrix; similar to the previous, but for the entries above the diagonal; suitable for output from the MATLAB functions `pdist` or `seqpdist`, exported to a CSV file - - `distances`: full distance matrix; similar to the above, but for all entries of the distance matrix + - `distance`: full distance matrix; similar to the above, but for all entries of the distance matrix - `dipha`: DIPHA distance matrix as described on the [DIPHA] website - `point-cloud`: point cloud; a comma (or whitespace, or other non-numerical character) separated list of coordinates of the points in some Euclidean space, one point per line - `--dim k`: compute persistent homology up to dimension *k* @@ -47,35 +47,26 @@ template <class Key, class T> class hash_map : public std::unordered_map<Key, T> #endif typedef float value_t; -// typedef uint16_t value_t; - typedef int64_t index_t; typedef int16_t coefficient_t; class binomial_coeff_table { std::vector<std::vector<index_t>> B; - index_t n_max, k_max; public: - binomial_coeff_table(index_t n, index_t k) { - n_max = n; - k_max = k; - - B.resize(n + 1); + binomial_coeff_table(index_t n, index_t k) : B(n + 1) { for (index_t i = 0; i <= n; i++) { B[i].resize(k + 1); - for (index_t j = 0; j <= std::min(i, k); j++) { + for (index_t j = 0; j <= std::min(i, k); j++) if (j == 0 || j == i) B[i][j] = 1; else B[i][j] = B[i - 1][j - 1] + B[i - 1][j]; - } } } index_t operator()(index_t n, index_t k) const { - assert(n <= n_max); - assert(k <= k_max); + assert(n < B.size() && k < B[n].size()); return B[n][k]; } }; @@ -97,58 +88,23 @@ std::vector<coefficient_t> multiplicative_inverse_vector(const coefficient_t m) return inverse; } -index_t get_next_vertex(index_t& v, const index_t idx, const index_t k, const binomial_coeff_table& binomial_coeff) { - if (binomial_coeff(v, k) > idx) { - index_t count = v; - while (count > 0) { - index_t i = v; - index_t step = count >> 1; - i -= step; - if (binomial_coeff(i, k) > idx) { - v = --i; - count -= step + 1; - } else - count = step; - } - } - assert(binomial_coeff(v, k) <= idx); - assert(binomial_coeff(v + 1, k) > idx); - return v; -} - -template <typename OutputIterator> -OutputIterator get_simplex_vertices(index_t idx, const index_t dim, index_t v, - const binomial_coeff_table& binomial_coeff, OutputIterator out) { - --v; - for (index_t k = dim + 1; k > 0; --k) { - get_next_vertex(v, idx, k, binomial_coeff); - *out++ = v; - idx -= binomial_coeff(v, k); - } - return out; -} - -std::vector<index_t> vertices_of_simplex(const index_t simplex_index, const index_t dim, const index_t n, - const binomial_coeff_table& binomial_coeff) { - std::vector<index_t> vertices; - get_simplex_vertices(simplex_index, dim, n, binomial_coeff, std::back_inserter(vertices)); - return vertices; -} - #ifdef USE_COEFFICIENTS -struct entry_t { +struct __attribute__((packed)) entry_t { index_t index : 8 * (sizeof(index_t) - sizeof(coefficient_t)); coefficient_t coefficient; - entry_t(index_t _index, coefficient_t _coefficient) : index(_index), coefficient(_coefficient) {} + entry_t(index_t _index, coefficient_t _coefficient) + : index(_index), coefficient(_coefficient) {} entry_t(index_t _index) : index(_index), coefficient(1) {} entry_t() : index(0), coefficient(1) {} -} __attribute__((packed)); +}; static_assert(sizeof(entry_t) == sizeof(index_t), "size of entry_t is not the same as index_t"); -entry_t make_entry(index_t _index, coefficient_t _coefficient) { return entry_t(_index, _coefficient); } -index_t get_index(entry_t e) { return e.index; } -index_t get_coefficient(entry_t e) { return e.coefficient; } +entry_t make_entry(index_t _index, coefficient_t _coefficient) { + return entry_t(_index, _coefficient); +} +index_t get_index(const entry_t& e) { return e.index; } +index_t get_coefficient(const entry_t& e) { return e.coefficient; } void set_coefficient(entry_t& e, const coefficient_t c) { e.coefficient = c; } bool operator==(const entry_t& e1, const entry_t& e2) { @@ -163,46 +119,42 @@ std::ostream& operator<<(std::ostream& stream, const entry_t& e) { #else typedef index_t entry_t; -const index_t get_index(entry_t i) { return i; } -index_t get_coefficient(entry_t i) { return 1; } +const index_t get_index(const entry_t& i) { return i; } +index_t get_coefficient(const entry_t& i) { return 1; } entry_t make_entry(index_t _index, coefficient_t _value) { return entry_t(_index); } -void set_coefficient(index_t& e, const coefficient_t c) {} +void set_coefficient(entry_t& e, const coefficient_t c) {} #endif const entry_t& get_entry(const entry_t& e) { return e; } -template <typename Entry> struct smaller_index { - bool operator()(const Entry& a, const Entry& b) { return get_index(a) < get_index(b); } -}; - -class diameter_index_t : public std::pair<value_t, index_t> { -public: - diameter_index_t() : std::pair<value_t, index_t>() {} - diameter_index_t(std::pair<value_t, index_t> p) : std::pair<value_t, index_t>(p) {} -}; -value_t get_diameter(diameter_index_t i) { return i.first; } -index_t get_index(diameter_index_t i) { return i.second; } +typedef std::pair<value_t, index_t> diameter_index_t; +value_t get_diameter(const diameter_index_t& i) { return i.first; } +index_t get_index(const diameter_index_t& i) { return i.second; } class diameter_entry_t : public std::pair<value_t, entry_t> { public: - diameter_entry_t(std::pair<value_t, entry_t> p) : std::pair<value_t, entry_t>(p) {} - diameter_entry_t(entry_t e) : std::pair<value_t, entry_t>(0, e) {} - diameter_entry_t() : diameter_entry_t(0) {} + diameter_entry_t() {} + diameter_entry_t(const entry_t& e) : std::pair<value_t, entry_t>(0, e) {} diameter_entry_t(value_t _diameter, index_t _index, coefficient_t _coefficient) : std::pair<value_t, entry_t>(_diameter, make_entry(_index, _coefficient)) {} - diameter_entry_t(diameter_index_t _diameter_index, coefficient_t _coefficient) + diameter_entry_t(const diameter_index_t& _diameter_index, coefficient_t _coefficient) : std::pair<value_t, entry_t>(get_diameter(_diameter_index), make_entry(get_index(_diameter_index), _coefficient)) {} - diameter_entry_t(diameter_index_t _diameter_index) : diameter_entry_t(_diameter_index, 1) {} + diameter_entry_t(const diameter_index_t& _diameter_index) + : diameter_entry_t(_diameter_index, 1) {} }; const entry_t& get_entry(const diameter_entry_t& p) { return p.second; } entry_t& get_entry(diameter_entry_t& p) { return p.second; } const index_t get_index(const diameter_entry_t& p) { return get_index(get_entry(p)); } -const coefficient_t get_coefficient(const diameter_entry_t& p) { return get_coefficient(get_entry(p)); } +const coefficient_t get_coefficient(const diameter_entry_t& p) { + return get_coefficient(get_entry(p)); +} const value_t& get_diameter(const diameter_entry_t& p) { return p.first; } -void set_coefficient(diameter_entry_t& p, const coefficient_t c) { set_coefficient(get_entry(p), c); } +void set_coefficient(diameter_entry_t& p, const coefficient_t c) { + set_coefficient(get_entry(p), c); +} template <typename Entry> struct greater_diameter_or_smaller_index { bool operator()(const Entry& a, const Entry& b) { @@ -211,83 +163,6 @@ template <typename Entry> struct greater_diameter_or_smaller_index { } }; -template <typename DistanceMatrix> class rips_filtration_comparator { -public: - const DistanceMatrix& dist; - const index_t dim; - -private: - mutable std::vector<index_t> vertices; - const binomial_coeff_table& binomial_coeff; - -public: - rips_filtration_comparator(const DistanceMatrix& _dist, const index_t _dim, - const binomial_coeff_table& _binomial_coeff) - : dist(_dist), dim(_dim), vertices(_dim + 1), binomial_coeff(_binomial_coeff){}; - - value_t diameter(const index_t index) const { - value_t diam = 0; - get_simplex_vertices(index, dim, dist.size(), binomial_coeff, vertices.begin()); - - for (index_t i = 0; i <= dim; ++i) - for (index_t j = 0; j < i; ++j) { diam = std::max(diam, dist(vertices[i], vertices[j])); } - return diam; - } - - bool operator()(const index_t a, const index_t b) const { - assert(a < binomial_coeff(dist.size(), dim + 1)); - assert(b < binomial_coeff(dist.size(), dim + 1)); - - return greater_diameter_or_smaller_index<diameter_index_t>()(diameter_index_t(diameter(a), a), - diameter_index_t(diameter(b), b)); - } - - template <typename Entry> bool operator()(const Entry& a, const Entry& b) const { - return operator()(get_index(a), get_index(b)); - } -}; - -template <class DistanceMatrix> class simplex_coboundary_enumerator { -private: - index_t idx_below, idx_above, v, k; - std::vector<index_t> vertices; - const diameter_entry_t simplex; - const coefficient_t modulus; - const DistanceMatrix& dist; - const binomial_coeff_table& binomial_coeff; - -public: - simplex_coboundary_enumerator(const diameter_entry_t _simplex, index_t _dim, index_t _n, - const coefficient_t _modulus, const DistanceMatrix& _dist, - const binomial_coeff_table& _binomial_coeff) - : simplex(_simplex), idx_below(get_index(_simplex)), idx_above(0), v(_n - 1), k(_dim + 1), modulus(_modulus), - binomial_coeff(_binomial_coeff), dist(_dist), vertices(_dim + 1) { - get_simplex_vertices(get_index(_simplex), _dim, _n, binomial_coeff, vertices.begin()); - } - - bool has_next() { - while ((v != -1) && (binomial_coeff(v, k) <= idx_below)) { - idx_below -= binomial_coeff(v, k); - idx_above += binomial_coeff(v, k + 1); - - --v; - --k; - assert(k != -1); - } - return v != -1; - } - - index_t next_index() { return idx_above + binomial_coeff(v--, k + 1) + idx_below; } - - diameter_entry_t next() { - value_t coface_diameter = get_diameter(simplex); - for (index_t w : vertices) coface_diameter = std::max(coface_diameter, dist(v, w)); - coefficient_t coface_coefficient = (k & 1 ? -1 + modulus : 1) * get_coefficient(simplex) % modulus; - return diameter_entry_t(coface_diameter, idx_above + binomial_coeff(v--, k + 1) + idx_below, - coface_coefficient); - } -}; - enum compressed_matrix_layout { LOWER_TRIANGULAR, UPPER_TRIANGULAR }; template <compressed_matrix_layout Layout> class compressed_distance_matrix { @@ -298,7 +173,7 @@ public: void init_rows(); compressed_distance_matrix(std::vector<value_t>&& _distances) - : distances(_distances), rows((1 + std::sqrt(1 + 8 * distances.size())) / 2) { + : distances(std::move(_distances)), rows((1 + std::sqrt(1 + 8 * distances.size())) / 2) { assert(distances.size() == size() * (size() - 1) / 2); init_rows(); } @@ -317,6 +192,22 @@ public: size_t size() const { return rows.size(); } }; +class sparse_distance_matrix { +public: + std::vector<std::vector<diameter_index_t>> neighbors; + + template <typename DistanceMatrix> + sparse_distance_matrix(const DistanceMatrix& mat, value_t threshold) : neighbors(mat.size()) { + + for (index_t i = 0; i < size(); ++i) + for (index_t j = 0; j < size(); ++j) + if (i != j && mat(i, j) <= threshold) + neighbors[i].push_back(std::make_pair(mat(i, j), j)); + } + + size_t size() const { return neighbors.size(); } +}; + template <> void compressed_distance_matrix<LOWER_TRIANGULAR>::init_rows() { value_t* pointer = &distances[0]; for (index_t i = 1; i < size(); ++i) { @@ -333,14 +224,16 @@ template <> void compressed_distance_matrix<UPPER_TRIANGULAR>::init_rows() { } } -template <> value_t compressed_distance_matrix<UPPER_TRIANGULAR>::operator()(index_t i, index_t j) const { - if (i > j) std::swap(i, j); - return i == j ? 0 : rows[i][j]; +template <> +value_t compressed_distance_matrix<UPPER_TRIANGULAR>::operator()(const index_t i, + const index_t j) const { + return i == j ? 0 : i > j ? rows[j][i] : rows[i][j]; } -template <> value_t compressed_distance_matrix<LOWER_TRIANGULAR>::operator()(index_t i, index_t j) const { - if (i > j) std::swap(i, j); - return i == j ? 0 : rows[j][i]; +template <> +value_t compressed_distance_matrix<LOWER_TRIANGULAR>::operator()(const index_t i, + const index_t j) const { + return i == j ? 0 : i < j ? rows[j][i] : rows[i][j]; } typedef compressed_distance_matrix<LOWER_TRIANGULAR> compressed_lower_distance_matrix; @@ -350,12 +243,17 @@ class euclidean_distance_matrix { public: std::vector<std::vector<value_t>> points; - euclidean_distance_matrix(std::vector<std::vector<value_t>>&& _points) : points(_points) {} + euclidean_distance_matrix(std::vector<std::vector<value_t>>&& _points) + : points(std::move(_points)) { + for (auto p : points) { assert(p.size() == points.front().size()); } + } value_t operator()(const index_t i, const index_t j) const { - return std::sqrt(std::inner_product(points[i].begin(), points[i].end(), points[j].begin(), value_t(), - std::plus<value_t>(), - [](value_t u, value_t v) { return (u - v) * (u - v); })); + assert(i < points.size()); + assert(j < points.size()); + return std::sqrt(std::inner_product( + points[i].begin(), points[i].end(), points[j].begin(), value_t(), std::plus<value_t>(), + [](value_t u, value_t v) { return (u - v) * (u - v); })); } size_t size() const { return points.size(); } @@ -371,23 +269,16 @@ public: } index_t find(index_t x) { - index_t y = x, z = parent[y]; - while (z != y) { - y = z; - z = parent[y]; - } - y = parent[x]; - while (z != y) { - parent[x] = z; - x = y; - y = parent[x]; + index_t y = x, z; + while ((z = parent[y]) != y) y = z; + while ((z = parent[x]) != y) { + parent[x] = y; + x = z; } return z; } void link(index_t x, index_t y) { - x = find(x); - y = find(y); - if (x == y) return; + if ((x = find(x)) == (y = find(y))) return; if (rank[x] > rank[y]) parent[y] = x; else { @@ -480,213 +371,562 @@ public: } }; -template <typename Heap> void push_entry(Heap& column, index_t i, coefficient_t c, value_t diameter) { - entry_t e = make_entry(i, c); - column.push(std::make_pair(diameter, e)); -} +template <typename DistanceMatrix> class ripser { + DistanceMatrix dist; + index_t n, dim_max; + value_t threshold; + float ratio; + coefficient_t modulus; + const binomial_coeff_table binomial_coeff; + std::vector<coefficient_t> multiplicative_inverse; + mutable std::vector<index_t> vertices; + mutable std::vector<std::vector<diameter_index_t>::const_reverse_iterator> neighbor_it; + mutable std::vector<std::vector<diameter_index_t>::const_reverse_iterator> neighbor_end; + mutable std::vector<diameter_entry_t> coface_entries; + +public: + ripser(DistanceMatrix&& _dist, index_t _dim_max, value_t _threshold, float _ratio, + coefficient_t _modulus) + : dist(std::move(_dist)), n(dist.size()), + dim_max(std::min(_dim_max, index_t(dist.size() - 2))), threshold(_threshold), + ratio(_ratio), modulus(_modulus), binomial_coeff(n, dim_max + 2), + multiplicative_inverse(multiplicative_inverse_vector(_modulus)) {} + + index_t get_next_vertex(index_t& v, const index_t idx, const index_t k) const { + if (binomial_coeff(v, k) > idx) { + index_t count = v; + while (count > 0) { + index_t i = v; + index_t step = count >> 1; + i -= step; + if (binomial_coeff(i, k) > idx) { + v = --i; + count -= step + 1; + } else + count = step; + } + } + assert(binomial_coeff(v, k) <= idx && binomial_coeff(v + 1, k) > idx); + return v; + } -template <typename Comparator> -void assemble_columns_to_reduce(std::vector<diameter_index_t>& columns_to_reduce, - hash_map<index_t, index_t>& pivot_column_index, const Comparator& comp, index_t dim, - index_t n, value_t threshold, const binomial_coeff_table& binomial_coeff) { - index_t num_simplices = binomial_coeff(n, dim + 2); + index_t get_edge_index(const index_t i, const index_t j) const { + return binomial_coeff(i, 2) + j; + } - columns_to_reduce.clear(); + template <typename OutputIterator> + OutputIterator get_simplex_vertices(index_t idx, const index_t dim, index_t v, + OutputIterator out) const { + --v; + for (index_t k = dim + 1; k > 0; --k) { + get_next_vertex(v, idx, k); + *out++ = v; + idx -= binomial_coeff(v, k); + } + return out; + } -#ifdef INDICATE_PROGRESS - std::cout << "\033[K" - << "assembling " << num_simplices << " columns" << std::flush << "\r"; + value_t compute_diameter(const index_t index, index_t dim) const { + value_t diam = -std::numeric_limits<value_t>::infinity(); + + vertices.clear(); + get_simplex_vertices(index, dim, dist.size(), std::back_inserter(vertices)); + + for (index_t i = 0; i <= dim; ++i) + for (index_t j = 0; j < i; ++j) { + diam = std::max(diam, dist(vertices[i], vertices[j])); + } + return diam; + } + + class simplex_coboundary_enumerator; + + void assemble_columns_to_reduce(std::vector<diameter_index_t>& simplices, + std::vector<diameter_index_t>& columns_to_reduce, + hash_map<index_t, index_t>& pivot_column_index, index_t dim); + + void compute_dim_0_pairs(std::vector<diameter_index_t>& edges, + std::vector<diameter_index_t>& columns_to_reduce) { + union_find dset(n); + + edges = get_edges(); + + std::sort(edges.rbegin(), edges.rend(), + greater_diameter_or_smaller_index<diameter_index_t>()); + +#ifdef PRINT_PERSISTENCE_PAIRS + std::cout << "persistence intervals in dim 0:" << std::endl; #endif - for (index_t index = 0; index < num_simplices; ++index) { - if (pivot_column_index.find(index) == pivot_column_index.end()) { - value_t diameter = comp.diameter(index); - if (diameter <= threshold) columns_to_reduce.push_back(std::make_pair(diameter, index)); -#ifdef INDICATE_PROGRESS - if ((index + 1) % 1000 == 0) - std::cout << "\033[K" - << "assembled " << columns_to_reduce.size() << " out of " << (index + 1) << "/" - << num_simplices << " columns" << std::flush << "\r"; + std::vector<index_t> vertices_of_edge(2); + for (auto e : edges) { + vertices_of_edge.clear(); + get_simplex_vertices(get_index(e), 1, n, std::back_inserter(vertices_of_edge)); + index_t u = dset.find(vertices_of_edge[0]), v = dset.find(vertices_of_edge[1]); + + if (u != v) { +#ifdef PRINT_PERSISTENCE_PAIRS + if (get_diameter(e) != 0) + std::cout << " [0," << get_diameter(e) << ")" << std::endl; #endif + dset.link(u, v); + } else + columns_to_reduce.push_back(e); } - } + std::reverse(columns_to_reduce.begin(), columns_to_reduce.end()); -#ifdef INDICATE_PROGRESS - std::cout << "\033[K" - << "sorting " << num_simplices << " columns" << std::flush << "\r"; +#ifdef PRINT_PERSISTENCE_PAIRS + for (index_t i = 0; i < n; ++i) + if (dset.find(i) == i) std::cout << " [0, )" << std::endl << std::flush; #endif + } - std::sort(columns_to_reduce.begin(), columns_to_reduce.end(), - greater_diameter_or_smaller_index<diameter_index_t>()); -#ifdef INDICATE_PROGRESS - std::cout << "\033[K"; + template <typename Column, typename Iterator> + diameter_entry_t add_coboundary_and_get_pivot(Iterator column_begin, Iterator column_end, + coefficient_t factor_column_to_add, +#ifdef ASSEMBLE_REDUCTION_MATRIX + Column& working_reduction_column, #endif -} + Column& working_coboundary, const index_t& dim, + hash_map<index_t, index_t>& pivot_column_index, + bool& might_be_apparent_pair); -template <typename DistanceMatrix, typename ComparatorCofaces, typename Comparator> -void compute_pairs(std::vector<diameter_index_t>& columns_to_reduce, hash_map<index_t, index_t>& pivot_column_index, - index_t dim, index_t n, value_t threshold, coefficient_t modulus, - const std::vector<coefficient_t>& multiplicative_inverse, const DistanceMatrix& dist, - const ComparatorCofaces& comp, const Comparator& comp_prev, - const binomial_coeff_table& binomial_coeff) { + void compute_pairs(std::vector<diameter_index_t>& columns_to_reduce, + hash_map<index_t, index_t>& pivot_column_index, index_t dim) { #ifdef PRINT_PERSISTENCE_PAIRS - std::cout << "persistence intervals in dim " << dim << ":" << std::endl; + std::cout << "persistence intervals in dim " << dim << ":" << std::endl; #endif #ifdef ASSEMBLE_REDUCTION_MATRIX - compressed_sparse_matrix<diameter_entry_t> reduction_coefficients; + compressed_sparse_matrix<diameter_entry_t> reduction_matrix; #else #ifdef USE_COEFFICIENTS - std::vector<diameter_entry_t> reduction_coefficients; + std::vector<diameter_entry_t> reduction_matrix; #endif #endif - std::vector<diameter_entry_t> coface_entries; + std::vector<diameter_entry_t> coface_entries; - for (index_t i = 0; i < columns_to_reduce.size(); ++i) { - auto column_to_reduce = columns_to_reduce[i]; + for (index_t index_column_to_reduce = 0; index_column_to_reduce < columns_to_reduce.size(); + ++index_column_to_reduce) { + auto column_to_reduce = columns_to_reduce[index_column_to_reduce]; #ifdef ASSEMBLE_REDUCTION_MATRIX - std::priority_queue<diameter_entry_t, std::vector<diameter_entry_t>, smaller_index<diameter_entry_t>> - reduction_column; + std::priority_queue<diameter_entry_t, std::vector<diameter_entry_t>, + greater_diameter_or_smaller_index<diameter_entry_t>> + working_reduction_column; #endif - std::priority_queue<diameter_entry_t, std::vector<diameter_entry_t>, - greater_diameter_or_smaller_index<diameter_entry_t>> - working_coboundary; + std::priority_queue<diameter_entry_t, std::vector<diameter_entry_t>, + greater_diameter_or_smaller_index<diameter_entry_t>> + working_coboundary; - value_t diameter = get_diameter(column_to_reduce); + value_t diameter = get_diameter(column_to_reduce); #ifdef INDICATE_PROGRESS - if ((i + 1) % 1000 == 0) - std::cout << "\033[K" - << "reducing column " << i + 1 << "/" << columns_to_reduce.size() << " (diameter " << diameter - << ")" << std::flush << "\r"; + if ((index_column_to_reduce + 1) % 1000000 == 0) + std::cout << "\033[K" + << "reducing column " << index_column_to_reduce + 1 << "/" + << columns_to_reduce.size() << " (diameter " << diameter << ")" + << std::flush << "\r"; #endif - index_t j = i; + index_t index_column_to_add = index_column_to_reduce; + + diameter_entry_t pivot; - // start with a dummy pivot entry with coefficient -1 in order to initialize - // working_coboundary with the coboundary of the simplex with index column_to_reduce - diameter_entry_t pivot(0, -1, -1 + modulus); + // start with factor 1 in order to initialize working_coboundary + // with the coboundary of the simplex with index column_to_reduce + coefficient_t factor_column_to_add = 1; #ifdef ASSEMBLE_REDUCTION_MATRIX - // initialize reduction_coefficients as identity matrix - reduction_coefficients.append_column(); - reduction_coefficients.push_back(diameter_entry_t(column_to_reduce, 1)); + // initialize reduction_matrix as identity matrix + reduction_matrix.append_column(); +#endif +#ifdef USE_COEFFICIENTS + reduction_matrix.push_back(diameter_entry_t(column_to_reduce, 1)); +#endif + + bool might_be_apparent_pair = (index_column_to_reduce == index_column_to_add); + + while (true) { +#ifdef ASSEMBLE_REDUCTION_MATRIX +#ifdef USE_COEFFICIENTS + auto reduction_column_begin = reduction_matrix.cbegin(index_column_to_add), + reduction_column_end = reduction_matrix.cend(index_column_to_add); +#else + std::vector<diameter_entry_t> coeffs; + coeffs.push_back(columns_to_reduce[index_column_to_add]); + for (auto it = reduction_matrix.cbegin(index_column_to_add); + it != reduction_matrix.cend(index_column_to_add); ++it) + coeffs.push_back(*it); + auto reduction_column_begin = coeffs.begin(), reduction_column_end = coeffs.end(); +#endif #else #ifdef USE_COEFFICIENTS - reduction_coefficients.push_back(diameter_entry_t(column_to_reduce, 1)); + auto reduction_column_begin = &reduction_matrix[index_column_to_add], + reduction_column_end = &reduction_matrix[index_column_to_add] + 1; +#else + auto reduction_column_begin = &columns_to_reduce[index_column_to_add], + reduction_column_end = &columns_to_reduce[index_column_to_add] + 1; #endif #endif - bool might_be_apparent_pair = (i == j); + pivot = add_coboundary_and_get_pivot( + reduction_column_begin, reduction_column_end, factor_column_to_add, +#ifdef ASSEMBLE_REDUCTION_MATRIX + working_reduction_column, +#endif + working_coboundary, dim, pivot_column_index, might_be_apparent_pair); - do { - const coefficient_t factor = modulus - get_coefficient(pivot); + if (get_index(pivot) != -1) { + auto pair = pivot_column_index.find(get_index(pivot)); + + if (pair != pivot_column_index.end()) { + index_column_to_add = pair->second; + factor_column_to_add = modulus - get_coefficient(pivot); + } else { +#ifdef PRINT_PERSISTENCE_PAIRS + value_t death = get_diameter(pivot); + if (death > diameter * ratio) { +#ifdef INDICATE_PROGRESS + std::cout << "\033[K"; +#endif + std::cout << " [" << diameter << "," << death << ")" << std::endl + << std::flush; + } +#endif + pivot_column_index.insert( + std::make_pair(get_index(pivot), index_column_to_reduce)); + +#ifdef USE_COEFFICIENTS + const coefficient_t inverse = + multiplicative_inverse[get_coefficient(pivot)]; +#endif #ifdef ASSEMBLE_REDUCTION_MATRIX - auto coeffs_begin = reduction_coefficients.cbegin(j), coeffs_end = reduction_coefficients.cend(j); + // replace current column of reduction_matrix (with a single diagonal 1 + // entry) by reduction_column (possibly with a different entry on the + // diagonal) +#ifdef USE_COEFFICIENTS + reduction_matrix.pop_back(); #else + pop_pivot(working_reduction_column, modulus); +#endif + + while (true) { + diameter_entry_t e = pop_pivot(working_reduction_column, modulus); + if (get_index(e) == -1) break; #ifdef USE_COEFFICIENTS - auto coeffs_begin = &reduction_coefficients[j], coeffs_end = &reduction_coefficients[j] + 1; + set_coefficient(e, inverse * get_coefficient(e) % modulus); + assert(get_coefficient(e) > 0); +#endif + reduction_matrix.push_back(e); + } #else - auto coeffs_begin = &columns_to_reduce[j], coeffs_end = &columns_to_reduce[j] + 1; +#ifdef USE_COEFFICIENTS + reduction_matrix.pop_back(); + reduction_matrix.push_back(diameter_entry_t(column_to_reduce, inverse)); #endif #endif + break; + } + } else { +#ifdef PRINT_PERSISTENCE_PAIRS + std::cout << " [" << diameter << ", )" << std::endl << std::flush; +#endif + break; + } + } + } + +#ifdef INDICATE_PROGRESS + std::cout << "\033[K"; +#endif + } + + std::vector<diameter_index_t> get_edges(); + + void compute_barcodes() { + + std::vector<diameter_index_t> simplices, columns_to_reduce; + + compute_dim_0_pairs(simplices, columns_to_reduce); + + for (index_t dim = 1; dim <= dim_max; ++dim) { + hash_map<index_t, index_t> pivot_column_index; + pivot_column_index.reserve(columns_to_reduce.size()); + + compute_pairs(columns_to_reduce, pivot_column_index, dim); + + if (dim < dim_max) { + assemble_columns_to_reduce(simplices, columns_to_reduce, pivot_column_index, + dim + 1); + } + } + } +}; + +template <> class ripser<compressed_lower_distance_matrix>::simplex_coboundary_enumerator { +private: + index_t idx_below, idx_above, v, k; + std::vector<index_t> vertices; + const diameter_entry_t simplex; + const coefficient_t modulus; + const compressed_lower_distance_matrix& dist; + const binomial_coeff_table& binomial_coeff; + +public: + simplex_coboundary_enumerator(const diameter_entry_t _simplex, index_t _dim, + const ripser& parent) + : idx_below(get_index(_simplex)), idx_above(0), v(parent.n - 1), k(_dim + 1), + vertices(_dim + 1), simplex(_simplex), modulus(parent.modulus), dist(parent.dist), + binomial_coeff(parent.binomial_coeff) { + parent.get_simplex_vertices(get_index(_simplex), _dim, parent.n, vertices.begin()); + } + + bool has_next() { + while ((v != -1) && (binomial_coeff(v, k) <= idx_below)) { + idx_below -= binomial_coeff(v, k); + idx_above += binomial_coeff(v, k + 1); + --v; + --k; + assert(k != -1); + } + return v != -1; + } - for (auto it = coeffs_begin; it != coeffs_end; ++it) { - diameter_entry_t simplex = *it; - set_coefficient(simplex, get_coefficient(simplex) * factor % modulus); + diameter_entry_t next() { + value_t coface_diameter = get_diameter(simplex); + for (index_t w : vertices) coface_diameter = std::max(coface_diameter, dist(v, w)); + index_t coface_index = idx_above + binomial_coeff(v--, k + 1) + idx_below; + coefficient_t coface_coefficient = + (k & 1 ? -1 + modulus : 1) * get_coefficient(simplex) % modulus; + return diameter_entry_t(coface_diameter, coface_index, coface_coefficient); + } +}; +template <typename DistanceMatrix> +template <typename Column, typename Iterator> +diameter_entry_t ripser<DistanceMatrix>::add_coboundary_and_get_pivot( + Iterator column_begin, Iterator column_end, coefficient_t factor_column_to_add, #ifdef ASSEMBLE_REDUCTION_MATRIX - reduction_column.push(simplex); -#endif - - coface_entries.clear(); - simplex_coboundary_enumerator<decltype(dist)> cofaces(simplex, dim, n, modulus, dist, binomial_coeff); - while (cofaces.has_next()) { - diameter_entry_t coface = cofaces.next(); - if (get_diameter(coface) <= threshold) { - coface_entries.push_back(coface); - if (might_be_apparent_pair && (get_diameter(simplex) == get_diameter(coface))) { - if (pivot_column_index.find(get_index(coface)) == pivot_column_index.end()) { - pivot = coface; - goto found_persistence_pair; - } - might_be_apparent_pair = false; - } + Column& working_reduction_column, +#endif + Column& working_coboundary, const index_t& dim, hash_map<index_t, index_t>& pivot_column_index, + bool& might_be_apparent_pair) { + for (auto it = column_begin; it != column_end; ++it) { + diameter_entry_t simplex = *it; + set_coefficient(simplex, get_coefficient(simplex) * factor_column_to_add % modulus); + +#ifdef ASSEMBLE_REDUCTION_MATRIX + working_reduction_column.push(simplex); +#endif + + coface_entries.clear(); + simplex_coboundary_enumerator cofaces(simplex, dim, *this); + while (cofaces.has_next()) { + diameter_entry_t coface = cofaces.next(); + if (get_diameter(coface) <= threshold) { + coface_entries.push_back(coface); + if (might_be_apparent_pair && (get_diameter(simplex) == get_diameter(coface))) { + if (pivot_column_index.find(get_index(coface)) == pivot_column_index.end()) { + return coface; } + might_be_apparent_pair = false; } - for (auto e : coface_entries) working_coboundary.push(e); } + } + for (auto coface : coface_entries) working_coboundary.push(coface); + } - pivot = get_pivot(working_coboundary, modulus); + return get_pivot(working_coboundary, modulus); +} - if (get_index(pivot) != -1) { - auto pair = pivot_column_index.find(get_index(pivot)); +template <> class ripser<sparse_distance_matrix>::simplex_coboundary_enumerator { +private: + const ripser& parent; + + index_t idx_below, idx_above, v, k, max_vertex_below; + const diameter_entry_t simplex; + const coefficient_t modulus; + const sparse_distance_matrix& dist; + const binomial_coeff_table& binomial_coeff; + + std::vector<index_t>& vertices; + std::vector<std::vector<diameter_index_t>::const_reverse_iterator>& neighbor_it; + std::vector<std::vector<diameter_index_t>::const_reverse_iterator>& neighbor_end; + diameter_index_t x; + +public: + simplex_coboundary_enumerator(const diameter_entry_t _simplex, index_t _dim, + const ripser& _parent) + : parent(_parent), idx_below(get_index(_simplex)), idx_above(0), v(parent.n - 1), + k(_dim + 1), max_vertex_below(parent.n - 1), simplex(_simplex), modulus(parent.modulus), + dist(parent.dist), binomial_coeff(parent.binomial_coeff), vertices(parent.vertices), + neighbor_it(parent.neighbor_it), neighbor_end(parent.neighbor_end) { + + neighbor_it.clear(); + neighbor_end.clear(); + vertices.clear(); + + parent.get_simplex_vertices(idx_below, _dim, parent.n, std::back_inserter(vertices)); + + for (auto v : vertices) { + neighbor_it.push_back(dist.neighbors[v].rbegin()); + neighbor_end.push_back(dist.neighbors[v].rend()); + } + } + + bool has_next(bool all_cofaces = true) { + for (auto &it0 = neighbor_it[0], &end0 = neighbor_end[0]; it0 != end0; ++it0) { + x = *it0; + for (size_t idx = 1; idx < neighbor_it.size(); ++idx) { + auto &it = neighbor_it[idx], end = neighbor_end[idx]; + while (get_index(*it) > get_index(x)) + if (++it == end) return false; + auto y = *it; + if (get_index(y) != get_index(x)) + goto continue_outer; + else + x = std::max(x, y); + } + return all_cofaces || !(k > 0 && parent.get_next_vertex(max_vertex_below, idx_below, + k) > get_index(x)); + continue_outer:; + } + return false; + } + + diameter_entry_t next() { + ++neighbor_it[0]; + + while (k > 0 && parent.get_next_vertex(max_vertex_below, idx_below, k) > get_index(x)) { + idx_below -= binomial_coeff(max_vertex_below, k); + idx_above += binomial_coeff(max_vertex_below, k + 1); + --k; + } + + value_t coface_diameter = std::max(get_diameter(simplex), get_diameter(x)); + + coefficient_t coface_coefficient = + (k & 1 ? -1 + modulus : 1) * get_coefficient(simplex) % modulus; + + return diameter_entry_t(coface_diameter, + idx_above + binomial_coeff(get_index(x), k + 1) + idx_below, + coface_coefficient); + } +}; + +template <> std::vector<diameter_index_t> ripser<compressed_lower_distance_matrix>::get_edges() { + std::vector<diameter_index_t> edges; + for (index_t index = binomial_coeff(n, 2); index-- > 0;) { + value_t diameter = compute_diameter(index, 1); + if (diameter <= threshold) edges.push_back(std::make_pair(diameter, index)); + } + return edges; +} + +template <> std::vector<diameter_index_t> ripser<sparse_distance_matrix>::get_edges() { + std::vector<diameter_index_t> edges; + for (index_t i = 0; i < n; ++i) + for (auto n : dist.neighbors[i]) { + index_t j = get_index(n); + if (i > j) edges.push_back(std::make_pair(get_diameter(n), get_edge_index(i, j))); + } + return edges; +} + +template <> +void ripser<compressed_lower_distance_matrix>::assemble_columns_to_reduce( + std::vector<diameter_index_t>& simplices, std::vector<diameter_index_t>& columns_to_reduce, + hash_map<index_t, index_t>& pivot_column_index, index_t dim) { + index_t num_simplices = binomial_coeff(n, dim + 1); + + columns_to_reduce.clear(); - if (pair != pivot_column_index.end()) { - j = pair->second; - continue; - } - } else { -#ifdef PRINT_PERSISTENCE_PAIRS #ifdef INDICATE_PROGRESS - std::cout << "\033[K"; + std::cout << "\033[K" + << "assembling " << num_simplices << " columns" << std::flush << "\r"; #endif - std::cout << " [" << diameter << ", )" << std::endl << std::flush; + + for (index_t index = 0; index < num_simplices; ++index) { + if (pivot_column_index.find(index) == pivot_column_index.end()) { + value_t diameter = compute_diameter(index, dim); + if (diameter <= threshold) columns_to_reduce.push_back(std::make_pair(diameter, index)); +#ifdef INDICATE_PROGRESS + if ((index + 1) % 1000000 == 0) + std::cout << "\033[K" + << "assembled " << columns_to_reduce.size() << " out of " << (index + 1) + << "/" << num_simplices << " columns" << std::flush << "\r"; #endif - break; - } + } + } - found_persistence_pair: -#ifdef PRINT_PERSISTENCE_PAIRS - value_t death = get_diameter(pivot); - if (diameter != death) { #ifdef INDICATE_PROGRESS - std::cout << "\033[K"; + std::cout << "\033[K" + << "sorting " << num_simplices << " columns" << std::flush << "\r"; #endif - std::cout << " [" << diameter << "," << death << ")" << std::endl << std::flush; - } + + std::sort(columns_to_reduce.begin(), columns_to_reduce.end(), + greater_diameter_or_smaller_index<diameter_index_t>()); +#ifdef INDICATE_PROGRESS + std::cout << "\033[K"; #endif +} - pivot_column_index.insert(std::make_pair(get_index(pivot), i)); +template <> +void ripser<sparse_distance_matrix>::assemble_columns_to_reduce( + std::vector<diameter_index_t>& simplices, std::vector<diameter_index_t>& columns_to_reduce, + hash_map<index_t, index_t>& pivot_column_index, index_t dim) { -#ifdef USE_COEFFICIENTS - const coefficient_t inverse = multiplicative_inverse[get_coefficient(pivot)]; +#ifdef INDICATE_PROGRESS + std::cout << "\033[K" + << "assembling columns" << std::flush << "\r"; #endif -#ifdef ASSEMBLE_REDUCTION_MATRIX - // replace current column of reduction_coefficients (with a single diagonal 1 entry) - // by reduction_column (possibly with a different entry on the diagonal) - reduction_coefficients.pop_back(); - while (true) { - diameter_entry_t e = pop_pivot(reduction_column, modulus); - if (get_index(e) == -1) break; -#ifdef USE_COEFFICIENTS - set_coefficient(e, inverse * get_coefficient(e) % modulus); - assert(get_coefficient(e) > 0); -#endif - reduction_coefficients.push_back(e); - } -#else -#ifdef USE_COEFFICIENTS - reduction_coefficients.pop_back(); - reduction_coefficients.push_back(diameter_entry_t(column_to_reduce, inverse)); -#endif -#endif - break; - } while (true); + --dim; + columns_to_reduce.clear(); + + std::vector<diameter_index_t> next_simplices; + + for (diameter_index_t simplex : simplices) { + simplex_coboundary_enumerator cofaces(simplex, dim, *this); + + while (cofaces.has_next(false)) { + auto coface = cofaces.next(); + + next_simplices.push_back(std::make_pair(get_diameter(coface), get_index(coface))); + + if (pivot_column_index.find(get_index(coface)) == pivot_column_index.end()) + columns_to_reduce.push_back( + std::make_pair(get_diameter(coface), get_index(coface))); + } } + simplices.swap(next_simplices); + +#ifdef INDICATE_PROGRESS + std::cout << "\033[K" + << "sorting " << columns_to_reduce.size() << " columns" << std::flush << "\r"; +#endif + + std::sort(columns_to_reduce.begin(), columns_to_reduce.end(), + greater_diameter_or_smaller_index<diameter_index_t>()); #ifdef INDICATE_PROGRESS std::cout << "\033[K"; #endif } -enum file_format { LOWER_DISTANCE_MATRIX, UPPER_DISTANCE_MATRIX, DISTANCE_MATRIX, POINT_CLOUD, DIPHA }; +enum file_format { + LOWER_DISTANCE_MATRIX, + UPPER_DISTANCE_MATRIX, + DISTANCE_MATRIX, + POINT_CLOUD, + DIPHA, + RIPSER +}; template <typename T> T read(std::istream& s) { T result; @@ -714,7 +954,8 @@ compressed_lower_distance_matrix read_point_cloud(std::istream& input_stream) { index_t n = eucl_dist.size(); - std::cout << "point cloud with " << n << " points in dimension " << eucl_dist.points.front().size() << std::endl; + std::cout << "point cloud with " << n << " points in dimension " + << eucl_dist.points.front().size() << std::endl; std::vector<value_t> distances; @@ -787,6 +1028,12 @@ compressed_lower_distance_matrix read_dipha(std::istream& input_stream) { return compressed_lower_distance_matrix(std::move(distances)); } +compressed_lower_distance_matrix read_ripser(std::istream& input_stream) { + std::vector<value_t> distances; + while (!input_stream.eof()) distances.push_back(read<value_t>(input_stream)); + return compressed_lower_distance_matrix(std::move(distances)); +} + compressed_lower_distance_matrix read_file(std::istream& input_stream, file_format format) { switch (format) { case LOWER_DISTANCE_MATRIX: @@ -799,29 +1046,36 @@ compressed_lower_distance_matrix read_file(std::istream& input_stream, file_form return read_point_cloud(input_stream); case DIPHA: return read_dipha(input_stream); + case RIPSER: + return read_ripser(input_stream); } } void print_usage_and_exit(int exit_code) { - std::cerr << "Usage: " - << "ripser " - << "[options] [filename]" << std::endl - << std::endl - << "Options:" << std::endl - << std::endl - << " --help print this screen" << std::endl - << " --format use the specified file format for the input. Options are:" << std::endl - << " lower-distance (lower triangular distance matrix; default)" << std::endl - << " upper-distance (upper triangular distance matrix)" << std::endl - << " distance (full distance matrix)" << std::endl - << " point-cloud (point cloud in Euclidean space)" << std::endl - << " dipha (distance matrix in DIPHA file format)" << std::endl - << " --dim <k> compute persistent homology up to dimension <k>" << std::endl - << " --threshold <t> compute Rips complexes up to diameter <t>" << std::endl + std::cerr + << "Usage: " + << "ripser " + << "[options] [filename]" << std::endl + << std::endl + << "Options:" << std::endl + << std::endl + << " --help print this screen" << std::endl + << " --format use the specified file format for the input. Options are:" + << std::endl + << " lower-distance (lower triangular distance matrix; default)" + << std::endl + << " upper-distance (upper triangular distance matrix)" << std::endl + << " distance (full distance matrix)" << std::endl + << " point-cloud (point cloud in Euclidean space)" << std::endl + << " dipha (distance matrix in DIPHA file format)" << std::endl + << " ripser (distance matrix in Ripser binary file format)" + << std::endl + << " --dim <k> compute persistent homology up to dimension <k>" << std::endl + << " --threshold <t> compute Rips complexes up to diameter <t>" << std::endl #ifdef USE_COEFFICIENTS - << " --modulus <p> compute homology with coefficients in the prime field Z/<p>Z" + << " --modulus <p> compute homology with coefficients in the prime field Z/<p>Z" #endif - << std::endl; + << std::endl; exit(exit_code); } @@ -835,6 +1089,8 @@ int main(int argc, char** argv) { index_t dim_max = 1; value_t threshold = std::numeric_limits<value_t>::max(); + float ratio = 1; + #ifdef USE_COEFFICIENTS coefficient_t modulus = 2; #else @@ -855,6 +1111,11 @@ int main(int argc, char** argv) { size_t next_pos; threshold = std::stof(parameter, &next_pos); if (next_pos != parameter.size()) print_usage_and_exit(-1); + } else if (arg == "--ratio") { + std::string parameter = std::string(argv[++i]); + size_t next_pos; + ratio = std::stof(parameter, &next_pos); + if (next_pos != parameter.size()) print_usage_and_exit(-1); } else if (arg == "--format") { std::string parameter = std::string(argv[++i]); if (parameter == "lower-distance") @@ -867,6 +1128,8 @@ int main(int argc, char** argv) { format = POINT_CLOUD; else if (parameter == "dipha") format = DIPHA; + else if (parameter == "ripser") + format = RIPSER; else print_usage_and_exit(-1); #ifdef USE_COEFFICIENTS @@ -890,68 +1153,41 @@ int main(int argc, char** argv) { compressed_lower_distance_matrix dist = read_file(filename ? file_stream : std::cin, format); - index_t n = dist.size(); - - std::cout << "distance matrix with " << n << " points" << std::endl; - - auto value_range = std::minmax_element(dist.distances.begin(), dist.distances.end()); - std::cout << "value range: [" << *value_range.first << "," << *value_range.second << "]" << std::endl; - - dim_max = std::min(dim_max, n - 2); - - binomial_coeff_table binomial_coeff(n, dim_max + 2); - std::vector<coefficient_t> multiplicative_inverse(multiplicative_inverse_vector(modulus)); + value_t min = std::numeric_limits<value_t>::infinity(), + max = -std::numeric_limits<value_t>::infinity(), max_finite = max; + int num_edges = 0; - std::vector<diameter_index_t> columns_to_reduce; - - { - union_find dset(n); - std::vector<diameter_index_t> edges; - rips_filtration_comparator<decltype(dist)> comp(dist, 1, binomial_coeff); - for (index_t index = binomial_coeff(n, 2); index-- > 0;) { - value_t diameter = comp.diameter(index); - if (diameter <= threshold) edges.push_back(std::make_pair(diameter, index)); + if (threshold == std::numeric_limits<value_t>::max()) { + value_t enclosing_radius = std::numeric_limits<value_t>::infinity(); + for (index_t i = 0; i < dist.size(); ++i) { + value_t r_i = -std::numeric_limits<value_t>::infinity(); + for (index_t j = 0; j < dist.size(); ++j) r_i = std::max(r_i, dist(i, j)); + enclosing_radius = std::min(enclosing_radius, r_i); } - std::sort(edges.rbegin(), edges.rend(), greater_diameter_or_smaller_index<diameter_index_t>()); -#ifdef PRINT_PERSISTENCE_PAIRS - std::cout << "persistence intervals in dim 0:" << std::endl; -#endif - - std::vector<index_t> vertices_of_edge(2); - for (auto e : edges) { - vertices_of_edge.clear(); - get_simplex_vertices(get_index(e), 1, n, binomial_coeff, std::back_inserter(vertices_of_edge)); - index_t u = dset.find(vertices_of_edge[0]), v = dset.find(vertices_of_edge[1]); - - if (u != v) { -#ifdef PRINT_PERSISTENCE_PAIRS - if (get_diameter(e) > 0) std::cout << " [0," << get_diameter(e) << ")" << std::endl; -#endif - dset.link(u, v); - } else - columns_to_reduce.push_back(e); - } - std::reverse(columns_to_reduce.begin(), columns_to_reduce.end()); - -#ifdef PRINT_PERSISTENCE_PAIRS - for (index_t i = 0; i < n; ++i) - if (dset.find(i) == i) std::cout << " [0, )" << std::endl << std::flush; -#endif + threshold = enclosing_radius; } - for (index_t dim = 1; dim <= dim_max; ++dim) { - rips_filtration_comparator<decltype(dist)> comp(dist, dim + 1, binomial_coeff); - rips_filtration_comparator<decltype(dist)> comp_prev(dist, dim, binomial_coeff); + for (auto d : dist.distances) { + min = std::min(min, d); + max = std::max(max, d); + max_finite = d != std::numeric_limits<value_t>::infinity() ? std::max(max, d) : max_finite; + if (d <= threshold) ++num_edges; + } - hash_map<index_t, index_t> pivot_column_index; - pivot_column_index.reserve(columns_to_reduce.size()); + std::cout << "value range: [" << min << "," << max_finite << "]" << std::endl; - compute_pairs(columns_to_reduce, pivot_column_index, dim, n, threshold, modulus, multiplicative_inverse, dist, - comp, comp_prev, binomial_coeff); + if (threshold >= max) { + std::cout << "distance matrix with " << dist.size() << " points" << std::endl; + ripser<compressed_lower_distance_matrix>(std::move(dist), dim_max, threshold, ratio, + modulus) + .compute_barcodes(); + } else { + std::cout << "sparse distance matrix with " << dist.size() << " points and " << num_edges + << "/" << (dist.size()*dist.size()-1)/2 << " entries" << std::endl; - if (dim < dim_max) { - assemble_columns_to_reduce(columns_to_reduce, pivot_column_index, comp, dim, n, threshold, binomial_coeff); - } + ripser<sparse_distance_matrix>(sparse_distance_matrix(std::move(dist), threshold), dim_max, + threshold, ratio, modulus) + .compute_barcodes(); } } |