summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUlrich Bauer <mail@ulrich-bauer.org>2018-08-08 08:32:53 -0500
committerUlrich Bauer <mail@ulrich-bauer.org>2018-08-08 08:32:53 -0500
commit9ca3cf8c664721686d16ff8f4326bcbc02e7d42f (patch)
tree1e7d12ad2f35d806291837dc0a7bd50d140fbdfc
parent2dee30433bc1a09d2cc2240c5fbb92328cde3d81 (diff)
parent22017ba657b7068fed2f1ff4f2d76413aa8fa1e7 (diff)
Merge branch 'sparse-distance-matrix'
# Conflicts: # benchmarks/benchmarks.txt # ripser.xcodeproj/project.pbxproj
-rw-r--r--.clang-format2
-rw-r--r--Makefile7
-rw-r--r--README.md17
-rw-r--r--ripser.cpp1002
4 files changed, 634 insertions, 394 deletions
diff --git a/.clang-format b/.clang-format
index 598d7c2..1975b19 100644
--- a/.clang-format
+++ b/.clang-format
@@ -1,7 +1,7 @@
BasedOnStyle: LLVM
IndentWidth: 4
TabWidth: 4
-ColumnLimit: 120
+ColumnLimit: 100
AccessModifierOffset: -4
AllowShortIfStatementsOnASingleLine: true
AllowShortLoopsOnASingleLine: true
diff --git a/Makefile b/Makefile
index 73e09fb..893d776 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
build: ripser
-all: ripser ripser-coeff ripser-reduction ripser-debug
+all: ripser ripser-coeff ripser-reduction ripser-coeff-reduction ripser-debug
ripser: ripser.cpp
@@ -13,9 +13,12 @@ ripser-coeff: ripser.cpp
ripser-reduction: ripser.cpp
c++ -std=c++11 ripser.cpp -o ripser-reduction -Ofast -D NDEBUG -D ASSEMBLE_REDUCTION_MATRIX
+ripser-coeff-reduction: ripser.cpp
+ c++ -std=c++11 ripser.cpp -o ripser-coeff-reduction -Ofast -D NDEBUG -D USE_COEFFICIENTS -D ASSEMBLE_REDUCTION_MATRIX
+
ripser-debug: ripser.cpp
c++ -std=c++11 ripser.cpp -o ripser-debug -g
clean:
- rm -f ripser ripser-coeff ripser-reduction ripser-debug
+ rm -f ripser ripser-coeff ripser-reduction ripser-coeff-reduction ripser-debug
diff --git a/README.md b/README.md
index 0c5a2e6..73bbe59 100644
--- a/README.md
+++ b/README.md
@@ -1,13 +1,13 @@
# Ripser
-Copyright © 2015–2016 [Ulrich Bauer].
+Copyright © 2015–2018 [Ulrich Bauer].
### Description
Ripser is a lean C++ code for the computation of Vietoris–Rips persistence barcodes. It can do just this one thing, but does it extremely well.
-To see a live demo of Ripser's capabilities, go to [live.ripser.org]. The computation happens inside the browser (using [PNaCl] on Chrome and JavaScript via [Emscripten] on other browsers).
+To see a live demo of Ripser's capabilities, go to [live.ripser.org]. The computation happens inside the browser (using [PNaCl] on Chrome and JavaScript via [Emscripten] on other browsers).
The main features of Ripser:
@@ -26,13 +26,14 @@ Input formats currently supported by Ripser:
- [DIPHA] distance matrix data
- point cloud data
-Ripser's efficiency is based on a few important concepts and principles:
+Ripser's efficiency is based on a few important concepts and principles, building on key previous and concurrent developments by other researchers in computational topology:
- - Compute persistent *co*homology
+ - Compute persistent *co*homology (as suggested by [Vin de Silva, Dmitriy Morozov, and Mikael Vejdemo-Johansson](https://doi.org/10.1088/0266-5611/27/12/124003))
- Don't compute information that is never needed
- (for the experts: employ the *clearing* optimization, aka *persistence with a twist*)
- - Don't store information that can be readily recomputed
- - Take obvious shortcuts (*apparent persistence pairs*)
+ (for the experts: employ the *clearing* optimization, aka *persistence with a twist*, as suggested by [Chao Chen and Michael Kerber](http://www.geometrie.tugraz.at/kerber/kerber_papers/ck-phcwat-11.pdf))
+ - Don't store information that can be readily recomputed (in particular, the boundary matrix and the reduced boundary matrix)
+ - Take computational shortcuts (*apparent* and *emergent persistence pairs*)
+ - If no threshold is specified, choose the *enclosing radius* as the threshold, from which on homology is guaranteed to be trivial (as suggested by [Greg Henselman-Petrusek](https://github.com/Eetion/Eirene.jl))
### Version
@@ -74,7 +75,7 @@ The input is given either in a file whose name is passed as an argument, or thro
- `--format`: use the specified file format for the input. The following formats are supported:
- `lower-distance` (default if no format is specified): lower triangular distance matrix; a comma (or whitespace, or other non-numerical character) separated list of the distance matrix entries below the diagonal, sorted lexicographically by row index, then column index
- `upper-distance`: upper triangular distance matrix; similar to the previous, but for the entries above the diagonal; suitable for output from the MATLAB functions `pdist` or `seqpdist`, exported to a CSV file
- - `distances`: full distance matrix; similar to the above, but for all entries of the distance matrix
+ - `distance`: full distance matrix; similar to the above, but for all entries of the distance matrix
- `dipha`: DIPHA distance matrix as described on the [DIPHA] website
- `point-cloud`: point cloud; a comma (or whitespace, or other non-numerical character) separated list of coordinates of the points in some Euclidean space, one point per line
- `--dim k`: compute persistent homology up to dimension *k*
diff --git a/ripser.cpp b/ripser.cpp
index 9d57fa9..f012591 100644
--- a/ripser.cpp
+++ b/ripser.cpp
@@ -47,35 +47,26 @@ template <class Key, class T> class hash_map : public std::unordered_map<Key, T>
#endif
typedef float value_t;
-// typedef uint16_t value_t;
-
typedef int64_t index_t;
typedef int16_t coefficient_t;
class binomial_coeff_table {
std::vector<std::vector<index_t>> B;
- index_t n_max, k_max;
public:
- binomial_coeff_table(index_t n, index_t k) {
- n_max = n;
- k_max = k;
-
- B.resize(n + 1);
+ binomial_coeff_table(index_t n, index_t k) : B(n + 1) {
for (index_t i = 0; i <= n; i++) {
B[i].resize(k + 1);
- for (index_t j = 0; j <= std::min(i, k); j++) {
+ for (index_t j = 0; j <= std::min(i, k); j++)
if (j == 0 || j == i)
B[i][j] = 1;
else
B[i][j] = B[i - 1][j - 1] + B[i - 1][j];
- }
}
}
index_t operator()(index_t n, index_t k) const {
- assert(n <= n_max);
- assert(k <= k_max);
+ assert(n < B.size() && k < B[n].size());
return B[n][k];
}
};
@@ -97,58 +88,23 @@ std::vector<coefficient_t> multiplicative_inverse_vector(const coefficient_t m)
return inverse;
}
-index_t get_next_vertex(index_t& v, const index_t idx, const index_t k, const binomial_coeff_table& binomial_coeff) {
- if (binomial_coeff(v, k) > idx) {
- index_t count = v;
- while (count > 0) {
- index_t i = v;
- index_t step = count >> 1;
- i -= step;
- if (binomial_coeff(i, k) > idx) {
- v = --i;
- count -= step + 1;
- } else
- count = step;
- }
- }
- assert(binomial_coeff(v, k) <= idx);
- assert(binomial_coeff(v + 1, k) > idx);
- return v;
-}
-
-template <typename OutputIterator>
-OutputIterator get_simplex_vertices(index_t idx, const index_t dim, index_t v,
- const binomial_coeff_table& binomial_coeff, OutputIterator out) {
- --v;
- for (index_t k = dim + 1; k > 0; --k) {
- get_next_vertex(v, idx, k, binomial_coeff);
- *out++ = v;
- idx -= binomial_coeff(v, k);
- }
- return out;
-}
-
-std::vector<index_t> vertices_of_simplex(const index_t simplex_index, const index_t dim, const index_t n,
- const binomial_coeff_table& binomial_coeff) {
- std::vector<index_t> vertices;
- get_simplex_vertices(simplex_index, dim, n, binomial_coeff, std::back_inserter(vertices));
- return vertices;
-}
-
#ifdef USE_COEFFICIENTS
-struct entry_t {
+struct __attribute__((packed)) entry_t {
index_t index : 8 * (sizeof(index_t) - sizeof(coefficient_t));
coefficient_t coefficient;
- entry_t(index_t _index, coefficient_t _coefficient) : index(_index), coefficient(_coefficient) {}
+ entry_t(index_t _index, coefficient_t _coefficient)
+ : index(_index), coefficient(_coefficient) {}
entry_t(index_t _index) : index(_index), coefficient(1) {}
entry_t() : index(0), coefficient(1) {}
-} __attribute__((packed));
+};
static_assert(sizeof(entry_t) == sizeof(index_t), "size of entry_t is not the same as index_t");
-entry_t make_entry(index_t _index, coefficient_t _coefficient) { return entry_t(_index, _coefficient); }
-index_t get_index(entry_t e) { return e.index; }
-index_t get_coefficient(entry_t e) { return e.coefficient; }
+entry_t make_entry(index_t _index, coefficient_t _coefficient) {
+ return entry_t(_index, _coefficient);
+}
+index_t get_index(const entry_t& e) { return e.index; }
+index_t get_coefficient(const entry_t& e) { return e.coefficient; }
void set_coefficient(entry_t& e, const coefficient_t c) { e.coefficient = c; }
bool operator==(const entry_t& e1, const entry_t& e2) {
@@ -163,46 +119,42 @@ std::ostream& operator<<(std::ostream& stream, const entry_t& e) {
#else
typedef index_t entry_t;
-const index_t get_index(entry_t i) { return i; }
-index_t get_coefficient(entry_t i) { return 1; }
+const index_t get_index(const entry_t& i) { return i; }
+index_t get_coefficient(const entry_t& i) { return 1; }
entry_t make_entry(index_t _index, coefficient_t _value) { return entry_t(_index); }
-void set_coefficient(index_t& e, const coefficient_t c) {}
+void set_coefficient(entry_t& e, const coefficient_t c) {}
#endif
const entry_t& get_entry(const entry_t& e) { return e; }
-template <typename Entry> struct smaller_index {
- bool operator()(const Entry& a, const Entry& b) { return get_index(a) < get_index(b); }
-};
-
-class diameter_index_t : public std::pair<value_t, index_t> {
-public:
- diameter_index_t() : std::pair<value_t, index_t>() {}
- diameter_index_t(std::pair<value_t, index_t> p) : std::pair<value_t, index_t>(p) {}
-};
-value_t get_diameter(diameter_index_t i) { return i.first; }
-index_t get_index(diameter_index_t i) { return i.second; }
+typedef std::pair<value_t, index_t> diameter_index_t;
+value_t get_diameter(const diameter_index_t& i) { return i.first; }
+index_t get_index(const diameter_index_t& i) { return i.second; }
class diameter_entry_t : public std::pair<value_t, entry_t> {
public:
- diameter_entry_t(std::pair<value_t, entry_t> p) : std::pair<value_t, entry_t>(p) {}
- diameter_entry_t(entry_t e) : std::pair<value_t, entry_t>(0, e) {}
- diameter_entry_t() : diameter_entry_t(0) {}
+ diameter_entry_t() {}
+ diameter_entry_t(const entry_t& e) : std::pair<value_t, entry_t>(0, e) {}
diameter_entry_t(value_t _diameter, index_t _index, coefficient_t _coefficient)
: std::pair<value_t, entry_t>(_diameter, make_entry(_index, _coefficient)) {}
- diameter_entry_t(diameter_index_t _diameter_index, coefficient_t _coefficient)
+ diameter_entry_t(const diameter_index_t& _diameter_index, coefficient_t _coefficient)
: std::pair<value_t, entry_t>(get_diameter(_diameter_index),
make_entry(get_index(_diameter_index), _coefficient)) {}
- diameter_entry_t(diameter_index_t _diameter_index) : diameter_entry_t(_diameter_index, 1) {}
+ diameter_entry_t(const diameter_index_t& _diameter_index)
+ : diameter_entry_t(_diameter_index, 1) {}
};
const entry_t& get_entry(const diameter_entry_t& p) { return p.second; }
entry_t& get_entry(diameter_entry_t& p) { return p.second; }
const index_t get_index(const diameter_entry_t& p) { return get_index(get_entry(p)); }
-const coefficient_t get_coefficient(const diameter_entry_t& p) { return get_coefficient(get_entry(p)); }
+const coefficient_t get_coefficient(const diameter_entry_t& p) {
+ return get_coefficient(get_entry(p));
+}
const value_t& get_diameter(const diameter_entry_t& p) { return p.first; }
-void set_coefficient(diameter_entry_t& p, const coefficient_t c) { set_coefficient(get_entry(p), c); }
+void set_coefficient(diameter_entry_t& p, const coefficient_t c) {
+ set_coefficient(get_entry(p), c);
+}
template <typename Entry> struct greater_diameter_or_smaller_index {
bool operator()(const Entry& a, const Entry& b) {
@@ -211,83 +163,6 @@ template <typename Entry> struct greater_diameter_or_smaller_index {
}
};
-template <typename DistanceMatrix> class rips_filtration_comparator {
-public:
- const DistanceMatrix& dist;
- const index_t dim;
-
-private:
- mutable std::vector<index_t> vertices;
- const binomial_coeff_table& binomial_coeff;
-
-public:
- rips_filtration_comparator(const DistanceMatrix& _dist, const index_t _dim,
- const binomial_coeff_table& _binomial_coeff)
- : dist(_dist), dim(_dim), vertices(_dim + 1), binomial_coeff(_binomial_coeff){};
-
- value_t diameter(const index_t index) const {
- value_t diam = 0;
- get_simplex_vertices(index, dim, dist.size(), binomial_coeff, vertices.begin());
-
- for (index_t i = 0; i <= dim; ++i)
- for (index_t j = 0; j < i; ++j) { diam = std::max(diam, dist(vertices[i], vertices[j])); }
- return diam;
- }
-
- bool operator()(const index_t a, const index_t b) const {
- assert(a < binomial_coeff(dist.size(), dim + 1));
- assert(b < binomial_coeff(dist.size(), dim + 1));
-
- return greater_diameter_or_smaller_index<diameter_index_t>()(diameter_index_t(diameter(a), a),
- diameter_index_t(diameter(b), b));
- }
-
- template <typename Entry> bool operator()(const Entry& a, const Entry& b) const {
- return operator()(get_index(a), get_index(b));
- }
-};
-
-template <class DistanceMatrix> class simplex_coboundary_enumerator {
-private:
- index_t idx_below, idx_above, v, k;
- std::vector<index_t> vertices;
- const diameter_entry_t simplex;
- const coefficient_t modulus;
- const DistanceMatrix& dist;
- const binomial_coeff_table& binomial_coeff;
-
-public:
- simplex_coboundary_enumerator(const diameter_entry_t _simplex, index_t _dim, index_t _n,
- const coefficient_t _modulus, const DistanceMatrix& _dist,
- const binomial_coeff_table& _binomial_coeff)
- : simplex(_simplex), idx_below(get_index(_simplex)), idx_above(0), v(_n - 1), k(_dim + 1), modulus(_modulus),
- binomial_coeff(_binomial_coeff), dist(_dist), vertices(_dim + 1) {
- get_simplex_vertices(get_index(_simplex), _dim, _n, binomial_coeff, vertices.begin());
- }
-
- bool has_next() {
- while ((v != -1) && (binomial_coeff(v, k) <= idx_below)) {
- idx_below -= binomial_coeff(v, k);
- idx_above += binomial_coeff(v, k + 1);
-
- --v;
- --k;
- assert(k != -1);
- }
- return v != -1;
- }
-
- index_t next_index() { return idx_above + binomial_coeff(v--, k + 1) + idx_below; }
-
- diameter_entry_t next() {
- value_t coface_diameter = get_diameter(simplex);
- for (index_t w : vertices) coface_diameter = std::max(coface_diameter, dist(v, w));
- coefficient_t coface_coefficient = (k & 1 ? -1 + modulus : 1) * get_coefficient(simplex) % modulus;
- return diameter_entry_t(coface_diameter, idx_above + binomial_coeff(v--, k + 1) + idx_below,
- coface_coefficient);
- }
-};
-
enum compressed_matrix_layout { LOWER_TRIANGULAR, UPPER_TRIANGULAR };
template <compressed_matrix_layout Layout> class compressed_distance_matrix {
@@ -298,7 +173,7 @@ public:
void init_rows();
compressed_distance_matrix(std::vector<value_t>&& _distances)
- : distances(_distances), rows((1 + std::sqrt(1 + 8 * distances.size())) / 2) {
+ : distances(std::move(_distances)), rows((1 + std::sqrt(1 + 8 * distances.size())) / 2) {
assert(distances.size() == size() * (size() - 1) / 2);
init_rows();
}
@@ -317,6 +192,22 @@ public:
size_t size() const { return rows.size(); }
};
+class sparse_distance_matrix {
+public:
+ std::vector<std::vector<diameter_index_t>> neighbors;
+
+ template <typename DistanceMatrix>
+ sparse_distance_matrix(const DistanceMatrix& mat, value_t threshold) : neighbors(mat.size()) {
+
+ for (index_t i = 0; i < size(); ++i)
+ for (index_t j = 0; j < size(); ++j)
+ if (i != j && mat(i, j) <= threshold)
+ neighbors[i].push_back(std::make_pair(mat(i, j), j));
+ }
+
+ size_t size() const { return neighbors.size(); }
+};
+
template <> void compressed_distance_matrix<LOWER_TRIANGULAR>::init_rows() {
value_t* pointer = &distances[0];
for (index_t i = 1; i < size(); ++i) {
@@ -333,14 +224,16 @@ template <> void compressed_distance_matrix<UPPER_TRIANGULAR>::init_rows() {
}
}
-template <> value_t compressed_distance_matrix<UPPER_TRIANGULAR>::operator()(index_t i, index_t j) const {
- if (i > j) std::swap(i, j);
- return i == j ? 0 : rows[i][j];
+template <>
+value_t compressed_distance_matrix<UPPER_TRIANGULAR>::operator()(const index_t i,
+ const index_t j) const {
+ return i == j ? 0 : i > j ? rows[j][i] : rows[i][j];
}
-template <> value_t compressed_distance_matrix<LOWER_TRIANGULAR>::operator()(index_t i, index_t j) const {
- if (i > j) std::swap(i, j);
- return i == j ? 0 : rows[j][i];
+template <>
+value_t compressed_distance_matrix<LOWER_TRIANGULAR>::operator()(const index_t i,
+ const index_t j) const {
+ return i == j ? 0 : i < j ? rows[j][i] : rows[i][j];
}
typedef compressed_distance_matrix<LOWER_TRIANGULAR> compressed_lower_distance_matrix;
@@ -350,12 +243,17 @@ class euclidean_distance_matrix {
public:
std::vector<std::vector<value_t>> points;
- euclidean_distance_matrix(std::vector<std::vector<value_t>>&& _points) : points(_points) {}
+ euclidean_distance_matrix(std::vector<std::vector<value_t>>&& _points)
+ : points(std::move(_points)) {
+ for (auto p : points) { assert(p.size() == points.front().size()); }
+ }
value_t operator()(const index_t i, const index_t j) const {
- return std::sqrt(std::inner_product(points[i].begin(), points[i].end(), points[j].begin(), value_t(),
- std::plus<value_t>(),
- [](value_t u, value_t v) { return (u - v) * (u - v); }));
+ assert(i < points.size());
+ assert(j < points.size());
+ return std::sqrt(std::inner_product(
+ points[i].begin(), points[i].end(), points[j].begin(), value_t(), std::plus<value_t>(),
+ [](value_t u, value_t v) { return (u - v) * (u - v); }));
}
size_t size() const { return points.size(); }
@@ -371,23 +269,16 @@ public:
}
index_t find(index_t x) {
- index_t y = x, z = parent[y];
- while (z != y) {
- y = z;
- z = parent[y];
- }
- y = parent[x];
- while (z != y) {
- parent[x] = z;
- x = y;
- y = parent[x];
+ index_t y = x, z;
+ while ((z = parent[y]) != y) y = z;
+ while ((z = parent[x]) != y) {
+ parent[x] = y;
+ x = z;
}
return z;
}
void link(index_t x, index_t y) {
- x = find(x);
- y = find(y);
- if (x == y) return;
+ if ((x = find(x)) == (y = find(y))) return;
if (rank[x] > rank[y])
parent[y] = x;
else {
@@ -480,213 +371,562 @@ public:
}
};
-template <typename Heap> void push_entry(Heap& column, index_t i, coefficient_t c, value_t diameter) {
- entry_t e = make_entry(i, c);
- column.push(std::make_pair(diameter, e));
-}
+template <typename DistanceMatrix> class ripser {
+ DistanceMatrix dist;
+ index_t n, dim_max;
+ value_t threshold;
+ float ratio;
+ coefficient_t modulus;
+ const binomial_coeff_table binomial_coeff;
+ std::vector<coefficient_t> multiplicative_inverse;
+ mutable std::vector<index_t> vertices;
+ mutable std::vector<std::vector<diameter_index_t>::const_reverse_iterator> neighbor_it;
+ mutable std::vector<std::vector<diameter_index_t>::const_reverse_iterator> neighbor_end;
+ mutable std::vector<diameter_entry_t> coface_entries;
+
+public:
+ ripser(DistanceMatrix&& _dist, index_t _dim_max, value_t _threshold, float _ratio,
+ coefficient_t _modulus)
+ : dist(std::move(_dist)), n(dist.size()),
+ dim_max(std::min(_dim_max, index_t(dist.size() - 2))), threshold(_threshold),
+ ratio(_ratio), modulus(_modulus), binomial_coeff(n, dim_max + 2),
+ multiplicative_inverse(multiplicative_inverse_vector(_modulus)) {}
+
+ index_t get_next_vertex(index_t& v, const index_t idx, const index_t k) const {
+ if (binomial_coeff(v, k) > idx) {
+ index_t count = v;
+ while (count > 0) {
+ index_t i = v;
+ index_t step = count >> 1;
+ i -= step;
+ if (binomial_coeff(i, k) > idx) {
+ v = --i;
+ count -= step + 1;
+ } else
+ count = step;
+ }
+ }
+ assert(binomial_coeff(v, k) <= idx && binomial_coeff(v + 1, k) > idx);
+ return v;
+ }
-template <typename Comparator>
-void assemble_columns_to_reduce(std::vector<diameter_index_t>& columns_to_reduce,
- hash_map<index_t, index_t>& pivot_column_index, const Comparator& comp, index_t dim,
- index_t n, value_t threshold, const binomial_coeff_table& binomial_coeff) {
- index_t num_simplices = binomial_coeff(n, dim + 2);
+ index_t get_edge_index(const index_t i, const index_t j) const {
+ return binomial_coeff(i, 2) + j;
+ }
- columns_to_reduce.clear();
+ template <typename OutputIterator>
+ OutputIterator get_simplex_vertices(index_t idx, const index_t dim, index_t v,
+ OutputIterator out) const {
+ --v;
+ for (index_t k = dim + 1; k > 0; --k) {
+ get_next_vertex(v, idx, k);
+ *out++ = v;
+ idx -= binomial_coeff(v, k);
+ }
+ return out;
+ }
-#ifdef INDICATE_PROGRESS
- std::cout << "\033[K"
- << "assembling " << num_simplices << " columns" << std::flush << "\r";
+ value_t compute_diameter(const index_t index, index_t dim) const {
+ value_t diam = -std::numeric_limits<value_t>::infinity();
+
+ vertices.clear();
+ get_simplex_vertices(index, dim, dist.size(), std::back_inserter(vertices));
+
+ for (index_t i = 0; i <= dim; ++i)
+ for (index_t j = 0; j < i; ++j) {
+ diam = std::max(diam, dist(vertices[i], vertices[j]));
+ }
+ return diam;
+ }
+
+ class simplex_coboundary_enumerator;
+
+ void assemble_columns_to_reduce(std::vector<diameter_index_t>& simplices,
+ std::vector<diameter_index_t>& columns_to_reduce,
+ hash_map<index_t, index_t>& pivot_column_index, index_t dim);
+
+ void compute_dim_0_pairs(std::vector<diameter_index_t>& edges,
+ std::vector<diameter_index_t>& columns_to_reduce) {
+ union_find dset(n);
+
+ edges = get_edges();
+
+ std::sort(edges.rbegin(), edges.rend(),
+ greater_diameter_or_smaller_index<diameter_index_t>());
+
+#ifdef PRINT_PERSISTENCE_PAIRS
+ std::cout << "persistence intervals in dim 0:" << std::endl;
#endif
- for (index_t index = 0; index < num_simplices; ++index) {
- if (pivot_column_index.find(index) == pivot_column_index.end()) {
- value_t diameter = comp.diameter(index);
- if (diameter <= threshold) columns_to_reduce.push_back(std::make_pair(diameter, index));
-#ifdef INDICATE_PROGRESS
- if ((index + 1) % 1000 == 0)
- std::cout << "\033[K"
- << "assembled " << columns_to_reduce.size() << " out of " << (index + 1) << "/"
- << num_simplices << " columns" << std::flush << "\r";
+ std::vector<index_t> vertices_of_edge(2);
+ for (auto e : edges) {
+ vertices_of_edge.clear();
+ get_simplex_vertices(get_index(e), 1, n, std::back_inserter(vertices_of_edge));
+ index_t u = dset.find(vertices_of_edge[0]), v = dset.find(vertices_of_edge[1]);
+
+ if (u != v) {
+#ifdef PRINT_PERSISTENCE_PAIRS
+ if (get_diameter(e) != 0)
+ std::cout << " [0," << get_diameter(e) << ")" << std::endl;
#endif
+ dset.link(u, v);
+ } else
+ columns_to_reduce.push_back(e);
}
- }
+ std::reverse(columns_to_reduce.begin(), columns_to_reduce.end());
-#ifdef INDICATE_PROGRESS
- std::cout << "\033[K"
- << "sorting " << num_simplices << " columns" << std::flush << "\r";
+#ifdef PRINT_PERSISTENCE_PAIRS
+ for (index_t i = 0; i < n; ++i)
+ if (dset.find(i) == i) std::cout << " [0, )" << std::endl << std::flush;
#endif
+ }
- std::sort(columns_to_reduce.begin(), columns_to_reduce.end(),
- greater_diameter_or_smaller_index<diameter_index_t>());
-#ifdef INDICATE_PROGRESS
- std::cout << "\033[K";
+ template <typename Column, typename Iterator>
+ diameter_entry_t add_coboundary_and_get_pivot(Iterator column_begin, Iterator column_end,
+ coefficient_t factor_column_to_add,
+#ifdef ASSEMBLE_REDUCTION_MATRIX
+ Column& working_reduction_column,
#endif
-}
+ Column& working_coboundary, const index_t& dim,
+ hash_map<index_t, index_t>& pivot_column_index,
+ bool& might_be_apparent_pair);
-template <typename DistanceMatrix, typename ComparatorCofaces, typename Comparator>
-void compute_pairs(std::vector<diameter_index_t>& columns_to_reduce, hash_map<index_t, index_t>& pivot_column_index,
- index_t dim, index_t n, value_t threshold, coefficient_t modulus,
- const std::vector<coefficient_t>& multiplicative_inverse, const DistanceMatrix& dist,
- const ComparatorCofaces& comp, const Comparator& comp_prev,
- const binomial_coeff_table& binomial_coeff) {
+ void compute_pairs(std::vector<diameter_index_t>& columns_to_reduce,
+ hash_map<index_t, index_t>& pivot_column_index, index_t dim) {
#ifdef PRINT_PERSISTENCE_PAIRS
- std::cout << "persistence intervals in dim " << dim << ":" << std::endl;
+ std::cout << "persistence intervals in dim " << dim << ":" << std::endl;
#endif
#ifdef ASSEMBLE_REDUCTION_MATRIX
- compressed_sparse_matrix<diameter_entry_t> reduction_coefficients;
+ compressed_sparse_matrix<diameter_entry_t> reduction_matrix;
#else
#ifdef USE_COEFFICIENTS
- std::vector<diameter_entry_t> reduction_coefficients;
+ std::vector<diameter_entry_t> reduction_matrix;
#endif
#endif
- std::vector<diameter_entry_t> coface_entries;
+ std::vector<diameter_entry_t> coface_entries;
- for (index_t i = 0; i < columns_to_reduce.size(); ++i) {
- auto column_to_reduce = columns_to_reduce[i];
+ for (index_t index_column_to_reduce = 0; index_column_to_reduce < columns_to_reduce.size();
+ ++index_column_to_reduce) {
+ auto column_to_reduce = columns_to_reduce[index_column_to_reduce];
#ifdef ASSEMBLE_REDUCTION_MATRIX
- std::priority_queue<diameter_entry_t, std::vector<diameter_entry_t>, smaller_index<diameter_entry_t>>
- reduction_column;
+ std::priority_queue<diameter_entry_t, std::vector<diameter_entry_t>,
+ greater_diameter_or_smaller_index<diameter_entry_t>>
+ working_reduction_column;
#endif
- std::priority_queue<diameter_entry_t, std::vector<diameter_entry_t>,
- greater_diameter_or_smaller_index<diameter_entry_t>>
- working_coboundary;
+ std::priority_queue<diameter_entry_t, std::vector<diameter_entry_t>,
+ greater_diameter_or_smaller_index<diameter_entry_t>>
+ working_coboundary;
- value_t diameter = get_diameter(column_to_reduce);
+ value_t diameter = get_diameter(column_to_reduce);
#ifdef INDICATE_PROGRESS
- if ((i + 1) % 1000 == 0)
- std::cout << "\033[K"
- << "reducing column " << i + 1 << "/" << columns_to_reduce.size() << " (diameter " << diameter
- << ")" << std::flush << "\r";
+ if ((index_column_to_reduce + 1) % 1000000 == 0)
+ std::cout << "\033[K"
+ << "reducing column " << index_column_to_reduce + 1 << "/"
+ << columns_to_reduce.size() << " (diameter " << diameter << ")"
+ << std::flush << "\r";
#endif
- index_t j = i;
+ index_t index_column_to_add = index_column_to_reduce;
+
+ diameter_entry_t pivot;
- // start with a dummy pivot entry with coefficient -1 in order to initialize
- // working_coboundary with the coboundary of the simplex with index column_to_reduce
- diameter_entry_t pivot(0, -1, -1 + modulus);
+ // start with factor 1 in order to initialize working_coboundary
+ // with the coboundary of the simplex with index column_to_reduce
+ coefficient_t factor_column_to_add = 1;
#ifdef ASSEMBLE_REDUCTION_MATRIX
- // initialize reduction_coefficients as identity matrix
- reduction_coefficients.append_column();
- reduction_coefficients.push_back(diameter_entry_t(column_to_reduce, 1));
+ // initialize reduction_matrix as identity matrix
+ reduction_matrix.append_column();
+#endif
+#ifdef USE_COEFFICIENTS
+ reduction_matrix.push_back(diameter_entry_t(column_to_reduce, 1));
+#endif
+
+ bool might_be_apparent_pair = (index_column_to_reduce == index_column_to_add);
+
+ while (true) {
+#ifdef ASSEMBLE_REDUCTION_MATRIX
+#ifdef USE_COEFFICIENTS
+ auto reduction_column_begin = reduction_matrix.cbegin(index_column_to_add),
+ reduction_column_end = reduction_matrix.cend(index_column_to_add);
+#else
+ std::vector<diameter_entry_t> coeffs;
+ coeffs.push_back(columns_to_reduce[index_column_to_add]);
+ for (auto it = reduction_matrix.cbegin(index_column_to_add);
+ it != reduction_matrix.cend(index_column_to_add); ++it)
+ coeffs.push_back(*it);
+ auto reduction_column_begin = coeffs.begin(), reduction_column_end = coeffs.end();
+#endif
#else
#ifdef USE_COEFFICIENTS
- reduction_coefficients.push_back(diameter_entry_t(column_to_reduce, 1));
+ auto reduction_column_begin = &reduction_matrix[index_column_to_add],
+ reduction_column_end = &reduction_matrix[index_column_to_add] + 1;
+#else
+ auto reduction_column_begin = &columns_to_reduce[index_column_to_add],
+ reduction_column_end = &columns_to_reduce[index_column_to_add] + 1;
#endif
#endif
- bool might_be_apparent_pair = (i == j);
+ pivot = add_coboundary_and_get_pivot(
+ reduction_column_begin, reduction_column_end, factor_column_to_add,
+#ifdef ASSEMBLE_REDUCTION_MATRIX
+ working_reduction_column,
+#endif
+ working_coboundary, dim, pivot_column_index, might_be_apparent_pair);
- do {
- const coefficient_t factor = modulus - get_coefficient(pivot);
+ if (get_index(pivot) != -1) {
+ auto pair = pivot_column_index.find(get_index(pivot));
+
+ if (pair != pivot_column_index.end()) {
+ index_column_to_add = pair->second;
+ factor_column_to_add = modulus - get_coefficient(pivot);
+ } else {
+#ifdef PRINT_PERSISTENCE_PAIRS
+ value_t death = get_diameter(pivot);
+ if (death > diameter * ratio) {
+#ifdef INDICATE_PROGRESS
+ std::cout << "\033[K";
+#endif
+ std::cout << " [" << diameter << "," << death << ")" << std::endl
+ << std::flush;
+ }
+#endif
+ pivot_column_index.insert(
+ std::make_pair(get_index(pivot), index_column_to_reduce));
+
+#ifdef USE_COEFFICIENTS
+ const coefficient_t inverse =
+ multiplicative_inverse[get_coefficient(pivot)];
+#endif
#ifdef ASSEMBLE_REDUCTION_MATRIX
- auto coeffs_begin = reduction_coefficients.cbegin(j), coeffs_end = reduction_coefficients.cend(j);
+ // replace current column of reduction_matrix (with a single diagonal 1
+ // entry) by reduction_column (possibly with a different entry on the
+ // diagonal)
+#ifdef USE_COEFFICIENTS
+ reduction_matrix.pop_back();
#else
+ pop_pivot(working_reduction_column, modulus);
+#endif
+
+ while (true) {
+ diameter_entry_t e = pop_pivot(working_reduction_column, modulus);
+ if (get_index(e) == -1) break;
#ifdef USE_COEFFICIENTS
- auto coeffs_begin = &reduction_coefficients[j], coeffs_end = &reduction_coefficients[j] + 1;
+ set_coefficient(e, inverse * get_coefficient(e) % modulus);
+ assert(get_coefficient(e) > 0);
+#endif
+ reduction_matrix.push_back(e);
+ }
#else
- auto coeffs_begin = &columns_to_reduce[j], coeffs_end = &columns_to_reduce[j] + 1;
+#ifdef USE_COEFFICIENTS
+ reduction_matrix.pop_back();
+ reduction_matrix.push_back(diameter_entry_t(column_to_reduce, inverse));
#endif
#endif
+ break;
+ }
+ } else {
+#ifdef PRINT_PERSISTENCE_PAIRS
+ std::cout << " [" << diameter << ", )" << std::endl << std::flush;
+#endif
+ break;
+ }
+ }
+ }
+
+#ifdef INDICATE_PROGRESS
+ std::cout << "\033[K";
+#endif
+ }
+
+ std::vector<diameter_index_t> get_edges();
+
+ void compute_barcodes() {
+
+ std::vector<diameter_index_t> simplices, columns_to_reduce;
+
+ compute_dim_0_pairs(simplices, columns_to_reduce);
+
+ for (index_t dim = 1; dim <= dim_max; ++dim) {
+ hash_map<index_t, index_t> pivot_column_index;
+ pivot_column_index.reserve(columns_to_reduce.size());
+
+ compute_pairs(columns_to_reduce, pivot_column_index, dim);
+
+ if (dim < dim_max) {
+ assemble_columns_to_reduce(simplices, columns_to_reduce, pivot_column_index,
+ dim + 1);
+ }
+ }
+ }
+};
+
+template <> class ripser<compressed_lower_distance_matrix>::simplex_coboundary_enumerator {
+private:
+ index_t idx_below, idx_above, v, k;
+ std::vector<index_t> vertices;
+ const diameter_entry_t simplex;
+ const coefficient_t modulus;
+ const compressed_lower_distance_matrix& dist;
+ const binomial_coeff_table& binomial_coeff;
+
+public:
+ simplex_coboundary_enumerator(const diameter_entry_t _simplex, index_t _dim,
+ const ripser& parent)
+ : idx_below(get_index(_simplex)), idx_above(0), v(parent.n - 1), k(_dim + 1),
+ vertices(_dim + 1), simplex(_simplex), modulus(parent.modulus), dist(parent.dist),
+ binomial_coeff(parent.binomial_coeff) {
+ parent.get_simplex_vertices(get_index(_simplex), _dim, parent.n, vertices.begin());
+ }
+
+ bool has_next() {
+ while ((v != -1) && (binomial_coeff(v, k) <= idx_below)) {
+ idx_below -= binomial_coeff(v, k);
+ idx_above += binomial_coeff(v, k + 1);
+ --v;
+ --k;
+ assert(k != -1);
+ }
+ return v != -1;
+ }
- for (auto it = coeffs_begin; it != coeffs_end; ++it) {
- diameter_entry_t simplex = *it;
- set_coefficient(simplex, get_coefficient(simplex) * factor % modulus);
+ diameter_entry_t next() {
+ value_t coface_diameter = get_diameter(simplex);
+ for (index_t w : vertices) coface_diameter = std::max(coface_diameter, dist(v, w));
+ index_t coface_index = idx_above + binomial_coeff(v--, k + 1) + idx_below;
+ coefficient_t coface_coefficient =
+ (k & 1 ? -1 + modulus : 1) * get_coefficient(simplex) % modulus;
+ return diameter_entry_t(coface_diameter, coface_index, coface_coefficient);
+ }
+};
+template <typename DistanceMatrix>
+template <typename Column, typename Iterator>
+diameter_entry_t ripser<DistanceMatrix>::add_coboundary_and_get_pivot(
+ Iterator column_begin, Iterator column_end, coefficient_t factor_column_to_add,
#ifdef ASSEMBLE_REDUCTION_MATRIX
- reduction_column.push(simplex);
-#endif
-
- coface_entries.clear();
- simplex_coboundary_enumerator<decltype(dist)> cofaces(simplex, dim, n, modulus, dist, binomial_coeff);
- while (cofaces.has_next()) {
- diameter_entry_t coface = cofaces.next();
- if (get_diameter(coface) <= threshold) {
- coface_entries.push_back(coface);
- if (might_be_apparent_pair && (get_diameter(simplex) == get_diameter(coface))) {
- if (pivot_column_index.find(get_index(coface)) == pivot_column_index.end()) {
- pivot = coface;
- goto found_persistence_pair;
- }
- might_be_apparent_pair = false;
- }
+ Column& working_reduction_column,
+#endif
+ Column& working_coboundary, const index_t& dim, hash_map<index_t, index_t>& pivot_column_index,
+ bool& might_be_apparent_pair) {
+ for (auto it = column_begin; it != column_end; ++it) {
+ diameter_entry_t simplex = *it;
+ set_coefficient(simplex, get_coefficient(simplex) * factor_column_to_add % modulus);
+
+#ifdef ASSEMBLE_REDUCTION_MATRIX
+ working_reduction_column.push(simplex);
+#endif
+
+ coface_entries.clear();
+ simplex_coboundary_enumerator cofaces(simplex, dim, *this);
+ while (cofaces.has_next()) {
+ diameter_entry_t coface = cofaces.next();
+ if (get_diameter(coface) <= threshold) {
+ coface_entries.push_back(coface);
+ if (might_be_apparent_pair && (get_diameter(simplex) == get_diameter(coface))) {
+ if (pivot_column_index.find(get_index(coface)) == pivot_column_index.end()) {
+ return coface;
}
+ might_be_apparent_pair = false;
}
- for (auto e : coface_entries) working_coboundary.push(e);
}
+ }
+ for (auto coface : coface_entries) working_coboundary.push(coface);
+ }
- pivot = get_pivot(working_coboundary, modulus);
+ return get_pivot(working_coboundary, modulus);
+}
- if (get_index(pivot) != -1) {
- auto pair = pivot_column_index.find(get_index(pivot));
+template <> class ripser<sparse_distance_matrix>::simplex_coboundary_enumerator {
+private:
+ const ripser& parent;
+
+ index_t idx_below, idx_above, v, k, max_vertex_below;
+ const diameter_entry_t simplex;
+ const coefficient_t modulus;
+ const sparse_distance_matrix& dist;
+ const binomial_coeff_table& binomial_coeff;
+
+ std::vector<index_t>& vertices;
+ std::vector<std::vector<diameter_index_t>::const_reverse_iterator>& neighbor_it;
+ std::vector<std::vector<diameter_index_t>::const_reverse_iterator>& neighbor_end;
+ diameter_index_t x;
+
+public:
+ simplex_coboundary_enumerator(const diameter_entry_t _simplex, index_t _dim,
+ const ripser& _parent)
+ : parent(_parent), idx_below(get_index(_simplex)), idx_above(0), v(parent.n - 1),
+ k(_dim + 1), max_vertex_below(parent.n - 1), simplex(_simplex), modulus(parent.modulus),
+ dist(parent.dist), binomial_coeff(parent.binomial_coeff), vertices(parent.vertices),
+ neighbor_it(parent.neighbor_it), neighbor_end(parent.neighbor_end) {
+
+ neighbor_it.clear();
+ neighbor_end.clear();
+ vertices.clear();
+
+ parent.get_simplex_vertices(idx_below, _dim, parent.n, std::back_inserter(vertices));
+
+ for (auto v : vertices) {
+ neighbor_it.push_back(dist.neighbors[v].rbegin());
+ neighbor_end.push_back(dist.neighbors[v].rend());
+ }
+ }
+
+ bool has_next(bool all_cofaces = true) {
+ for (auto &it0 = neighbor_it[0], &end0 = neighbor_end[0]; it0 != end0; ++it0) {
+ x = *it0;
+ for (size_t idx = 1; idx < neighbor_it.size(); ++idx) {
+ auto &it = neighbor_it[idx], end = neighbor_end[idx];
+ while (get_index(*it) > get_index(x))
+ if (++it == end) return false;
+ auto y = *it;
+ if (get_index(y) != get_index(x))
+ goto continue_outer;
+ else
+ x = std::max(x, y);
+ }
+ return all_cofaces || !(k > 0 && parent.get_next_vertex(max_vertex_below, idx_below,
+ k) > get_index(x));
+ continue_outer:;
+ }
+ return false;
+ }
+
+ diameter_entry_t next() {
+ ++neighbor_it[0];
+
+ while (k > 0 && parent.get_next_vertex(max_vertex_below, idx_below, k) > get_index(x)) {
+ idx_below -= binomial_coeff(max_vertex_below, k);
+ idx_above += binomial_coeff(max_vertex_below, k + 1);
+ --k;
+ }
+
+ value_t coface_diameter = std::max(get_diameter(simplex), get_diameter(x));
+
+ coefficient_t coface_coefficient =
+ (k & 1 ? -1 + modulus : 1) * get_coefficient(simplex) % modulus;
+
+ return diameter_entry_t(coface_diameter,
+ idx_above + binomial_coeff(get_index(x), k + 1) + idx_below,
+ coface_coefficient);
+ }
+};
+
+template <> std::vector<diameter_index_t> ripser<compressed_lower_distance_matrix>::get_edges() {
+ std::vector<diameter_index_t> edges;
+ for (index_t index = binomial_coeff(n, 2); index-- > 0;) {
+ value_t diameter = compute_diameter(index, 1);
+ if (diameter <= threshold) edges.push_back(std::make_pair(diameter, index));
+ }
+ return edges;
+}
+
+template <> std::vector<diameter_index_t> ripser<sparse_distance_matrix>::get_edges() {
+ std::vector<diameter_index_t> edges;
+ for (index_t i = 0; i < n; ++i)
+ for (auto n : dist.neighbors[i]) {
+ index_t j = get_index(n);
+ if (i > j) edges.push_back(std::make_pair(get_diameter(n), get_edge_index(i, j)));
+ }
+ return edges;
+}
+
+template <>
+void ripser<compressed_lower_distance_matrix>::assemble_columns_to_reduce(
+ std::vector<diameter_index_t>& simplices, std::vector<diameter_index_t>& columns_to_reduce,
+ hash_map<index_t, index_t>& pivot_column_index, index_t dim) {
+ index_t num_simplices = binomial_coeff(n, dim + 1);
+
+ columns_to_reduce.clear();
- if (pair != pivot_column_index.end()) {
- j = pair->second;
- continue;
- }
- } else {
-#ifdef PRINT_PERSISTENCE_PAIRS
#ifdef INDICATE_PROGRESS
- std::cout << "\033[K";
+ std::cout << "\033[K"
+ << "assembling " << num_simplices << " columns" << std::flush << "\r";
#endif
- std::cout << " [" << diameter << ", )" << std::endl << std::flush;
+
+ for (index_t index = 0; index < num_simplices; ++index) {
+ if (pivot_column_index.find(index) == pivot_column_index.end()) {
+ value_t diameter = compute_diameter(index, dim);
+ if (diameter <= threshold) columns_to_reduce.push_back(std::make_pair(diameter, index));
+#ifdef INDICATE_PROGRESS
+ if ((index + 1) % 1000000 == 0)
+ std::cout << "\033[K"
+ << "assembled " << columns_to_reduce.size() << " out of " << (index + 1)
+ << "/" << num_simplices << " columns" << std::flush << "\r";
#endif
- break;
- }
+ }
+ }
- found_persistence_pair:
-#ifdef PRINT_PERSISTENCE_PAIRS
- value_t death = get_diameter(pivot);
- if (diameter != death) {
#ifdef INDICATE_PROGRESS
- std::cout << "\033[K";
+ std::cout << "\033[K"
+ << "sorting " << num_simplices << " columns" << std::flush << "\r";
#endif
- std::cout << " [" << diameter << "," << death << ")" << std::endl << std::flush;
- }
+
+ std::sort(columns_to_reduce.begin(), columns_to_reduce.end(),
+ greater_diameter_or_smaller_index<diameter_index_t>());
+#ifdef INDICATE_PROGRESS
+ std::cout << "\033[K";
#endif
+}
- pivot_column_index.insert(std::make_pair(get_index(pivot), i));
+template <>
+void ripser<sparse_distance_matrix>::assemble_columns_to_reduce(
+ std::vector<diameter_index_t>& simplices, std::vector<diameter_index_t>& columns_to_reduce,
+ hash_map<index_t, index_t>& pivot_column_index, index_t dim) {
-#ifdef USE_COEFFICIENTS
- const coefficient_t inverse = multiplicative_inverse[get_coefficient(pivot)];
+#ifdef INDICATE_PROGRESS
+ std::cout << "\033[K"
+ << "assembling columns" << std::flush << "\r";
#endif
-#ifdef ASSEMBLE_REDUCTION_MATRIX
- // replace current column of reduction_coefficients (with a single diagonal 1 entry)
- // by reduction_column (possibly with a different entry on the diagonal)
- reduction_coefficients.pop_back();
- while (true) {
- diameter_entry_t e = pop_pivot(reduction_column, modulus);
- if (get_index(e) == -1) break;
-#ifdef USE_COEFFICIENTS
- set_coefficient(e, inverse * get_coefficient(e) % modulus);
- assert(get_coefficient(e) > 0);
-#endif
- reduction_coefficients.push_back(e);
- }
-#else
-#ifdef USE_COEFFICIENTS
- reduction_coefficients.pop_back();
- reduction_coefficients.push_back(diameter_entry_t(column_to_reduce, inverse));
-#endif
-#endif
- break;
- } while (true);
+ --dim;
+ columns_to_reduce.clear();
+
+ std::vector<diameter_index_t> next_simplices;
+
+ for (diameter_index_t simplex : simplices) {
+ simplex_coboundary_enumerator cofaces(simplex, dim, *this);
+
+ while (cofaces.has_next(false)) {
+ auto coface = cofaces.next();
+
+ next_simplices.push_back(std::make_pair(get_diameter(coface), get_index(coface)));
+
+ if (pivot_column_index.find(get_index(coface)) == pivot_column_index.end())
+ columns_to_reduce.push_back(
+ std::make_pair(get_diameter(coface), get_index(coface)));
+ }
}
+ simplices.swap(next_simplices);
+
+#ifdef INDICATE_PROGRESS
+ std::cout << "\033[K"
+ << "sorting " << columns_to_reduce.size() << " columns" << std::flush << "\r";
+#endif
+
+ std::sort(columns_to_reduce.begin(), columns_to_reduce.end(),
+ greater_diameter_or_smaller_index<diameter_index_t>());
#ifdef INDICATE_PROGRESS
std::cout << "\033[K";
#endif
}
-enum file_format { LOWER_DISTANCE_MATRIX, UPPER_DISTANCE_MATRIX, DISTANCE_MATRIX, POINT_CLOUD, DIPHA };
+enum file_format {
+ LOWER_DISTANCE_MATRIX,
+ UPPER_DISTANCE_MATRIX,
+ DISTANCE_MATRIX,
+ POINT_CLOUD,
+ DIPHA,
+ RIPSER
+};
template <typename T> T read(std::istream& s) {
T result;
@@ -714,7 +954,8 @@ compressed_lower_distance_matrix read_point_cloud(std::istream& input_stream) {
index_t n = eucl_dist.size();
- std::cout << "point cloud with " << n << " points in dimension " << eucl_dist.points.front().size() << std::endl;
+ std::cout << "point cloud with " << n << " points in dimension "
+ << eucl_dist.points.front().size() << std::endl;
std::vector<value_t> distances;
@@ -787,6 +1028,12 @@ compressed_lower_distance_matrix read_dipha(std::istream& input_stream) {
return compressed_lower_distance_matrix(std::move(distances));
}
+compressed_lower_distance_matrix read_ripser(std::istream& input_stream) {
+ std::vector<value_t> distances;
+ while (!input_stream.eof()) distances.push_back(read<value_t>(input_stream));
+ return compressed_lower_distance_matrix(std::move(distances));
+}
+
compressed_lower_distance_matrix read_file(std::istream& input_stream, file_format format) {
switch (format) {
case LOWER_DISTANCE_MATRIX:
@@ -799,29 +1046,36 @@ compressed_lower_distance_matrix read_file(std::istream& input_stream, file_form
return read_point_cloud(input_stream);
case DIPHA:
return read_dipha(input_stream);
+ case RIPSER:
+ return read_ripser(input_stream);
}
}
void print_usage_and_exit(int exit_code) {
- std::cerr << "Usage: "
- << "ripser "
- << "[options] [filename]" << std::endl
- << std::endl
- << "Options:" << std::endl
- << std::endl
- << " --help print this screen" << std::endl
- << " --format use the specified file format for the input. Options are:" << std::endl
- << " lower-distance (lower triangular distance matrix; default)" << std::endl
- << " upper-distance (upper triangular distance matrix)" << std::endl
- << " distance (full distance matrix)" << std::endl
- << " point-cloud (point cloud in Euclidean space)" << std::endl
- << " dipha (distance matrix in DIPHA file format)" << std::endl
- << " --dim <k> compute persistent homology up to dimension <k>" << std::endl
- << " --threshold <t> compute Rips complexes up to diameter <t>" << std::endl
+ std::cerr
+ << "Usage: "
+ << "ripser "
+ << "[options] [filename]" << std::endl
+ << std::endl
+ << "Options:" << std::endl
+ << std::endl
+ << " --help print this screen" << std::endl
+ << " --format use the specified file format for the input. Options are:"
+ << std::endl
+ << " lower-distance (lower triangular distance matrix; default)"
+ << std::endl
+ << " upper-distance (upper triangular distance matrix)" << std::endl
+ << " distance (full distance matrix)" << std::endl
+ << " point-cloud (point cloud in Euclidean space)" << std::endl
+ << " dipha (distance matrix in DIPHA file format)" << std::endl
+ << " ripser (distance matrix in Ripser binary file format)"
+ << std::endl
+ << " --dim <k> compute persistent homology up to dimension <k>" << std::endl
+ << " --threshold <t> compute Rips complexes up to diameter <t>" << std::endl
#ifdef USE_COEFFICIENTS
- << " --modulus <p> compute homology with coefficients in the prime field Z/<p>Z"
+ << " --modulus <p> compute homology with coefficients in the prime field Z/<p>Z"
#endif
- << std::endl;
+ << std::endl;
exit(exit_code);
}
@@ -835,6 +1089,8 @@ int main(int argc, char** argv) {
index_t dim_max = 1;
value_t threshold = std::numeric_limits<value_t>::max();
+ float ratio = 1;
+
#ifdef USE_COEFFICIENTS
coefficient_t modulus = 2;
#else
@@ -855,6 +1111,11 @@ int main(int argc, char** argv) {
size_t next_pos;
threshold = std::stof(parameter, &next_pos);
if (next_pos != parameter.size()) print_usage_and_exit(-1);
+ } else if (arg == "--ratio") {
+ std::string parameter = std::string(argv[++i]);
+ size_t next_pos;
+ ratio = std::stof(parameter, &next_pos);
+ if (next_pos != parameter.size()) print_usage_and_exit(-1);
} else if (arg == "--format") {
std::string parameter = std::string(argv[++i]);
if (parameter == "lower-distance")
@@ -867,6 +1128,8 @@ int main(int argc, char** argv) {
format = POINT_CLOUD;
else if (parameter == "dipha")
format = DIPHA;
+ else if (parameter == "ripser")
+ format = RIPSER;
else
print_usage_and_exit(-1);
#ifdef USE_COEFFICIENTS
@@ -890,68 +1153,41 @@ int main(int argc, char** argv) {
compressed_lower_distance_matrix dist = read_file(filename ? file_stream : std::cin, format);
- index_t n = dist.size();
-
- std::cout << "distance matrix with " << n << " points" << std::endl;
-
- auto value_range = std::minmax_element(dist.distances.begin(), dist.distances.end());
- std::cout << "value range: [" << *value_range.first << "," << *value_range.second << "]" << std::endl;
-
- dim_max = std::min(dim_max, n - 2);
-
- binomial_coeff_table binomial_coeff(n, dim_max + 2);
- std::vector<coefficient_t> multiplicative_inverse(multiplicative_inverse_vector(modulus));
+ value_t min = std::numeric_limits<value_t>::infinity(),
+ max = -std::numeric_limits<value_t>::infinity(), max_finite = max;
+ int num_edges = 0;
- std::vector<diameter_index_t> columns_to_reduce;
-
- {
- union_find dset(n);
- std::vector<diameter_index_t> edges;
- rips_filtration_comparator<decltype(dist)> comp(dist, 1, binomial_coeff);
- for (index_t index = binomial_coeff(n, 2); index-- > 0;) {
- value_t diameter = comp.diameter(index);
- if (diameter <= threshold) edges.push_back(std::make_pair(diameter, index));
+ if (threshold == std::numeric_limits<value_t>::max()) {
+ value_t enclosing_radius = std::numeric_limits<value_t>::infinity();
+ for (index_t i = 0; i < dist.size(); ++i) {
+ value_t r_i = -std::numeric_limits<value_t>::infinity();
+ for (index_t j = 0; j < dist.size(); ++j) r_i = std::max(r_i, dist(i, j));
+ enclosing_radius = std::min(enclosing_radius, r_i);
}
- std::sort(edges.rbegin(), edges.rend(), greater_diameter_or_smaller_index<diameter_index_t>());
-#ifdef PRINT_PERSISTENCE_PAIRS
- std::cout << "persistence intervals in dim 0:" << std::endl;
-#endif
-
- std::vector<index_t> vertices_of_edge(2);
- for (auto e : edges) {
- vertices_of_edge.clear();
- get_simplex_vertices(get_index(e), 1, n, binomial_coeff, std::back_inserter(vertices_of_edge));
- index_t u = dset.find(vertices_of_edge[0]), v = dset.find(vertices_of_edge[1]);
-
- if (u != v) {
-#ifdef PRINT_PERSISTENCE_PAIRS
- if (get_diameter(e) > 0) std::cout << " [0," << get_diameter(e) << ")" << std::endl;
-#endif
- dset.link(u, v);
- } else
- columns_to_reduce.push_back(e);
- }
- std::reverse(columns_to_reduce.begin(), columns_to_reduce.end());
-
-#ifdef PRINT_PERSISTENCE_PAIRS
- for (index_t i = 0; i < n; ++i)
- if (dset.find(i) == i) std::cout << " [0, )" << std::endl << std::flush;
-#endif
+ threshold = enclosing_radius;
}
- for (index_t dim = 1; dim <= dim_max; ++dim) {
- rips_filtration_comparator<decltype(dist)> comp(dist, dim + 1, binomial_coeff);
- rips_filtration_comparator<decltype(dist)> comp_prev(dist, dim, binomial_coeff);
+ for (auto d : dist.distances) {
+ min = std::min(min, d);
+ max = std::max(max, d);
+ max_finite = d != std::numeric_limits<value_t>::infinity() ? std::max(max, d) : max_finite;
+ if (d <= threshold) ++num_edges;
+ }
- hash_map<index_t, index_t> pivot_column_index;
- pivot_column_index.reserve(columns_to_reduce.size());
+ std::cout << "value range: [" << min << "," << max_finite << "]" << std::endl;
- compute_pairs(columns_to_reduce, pivot_column_index, dim, n, threshold, modulus, multiplicative_inverse, dist,
- comp, comp_prev, binomial_coeff);
+ if (threshold >= max) {
+ std::cout << "distance matrix with " << dist.size() << " points" << std::endl;
+ ripser<compressed_lower_distance_matrix>(std::move(dist), dim_max, threshold, ratio,
+ modulus)
+ .compute_barcodes();
+ } else {
+ std::cout << "sparse distance matrix with " << dist.size() << " points and " << num_edges
+ << "/" << (dist.size()*dist.size()-1)/2 << " entries" << std::endl;
- if (dim < dim_max) {
- assemble_columns_to_reduce(columns_to_reduce, pivot_column_index, comp, dim, n, threshold, binomial_coeff);
- }
+ ripser<sparse_distance_matrix>(sparse_distance_matrix(std::move(dist), threshold), dim_max,
+ threshold, ratio, modulus)
+ .compute_barcodes();
}
}