diff options
-rw-r--r-- | ripser.cpp | 242 |
1 files changed, 228 insertions, 14 deletions
@@ -24,6 +24,8 @@ with this program. If not, see <http://www.gnu.org/licenses/>. //#define INDICATE_PROGRESS #define PRINT_PERSISTENCE_PAIRS +#define SPARSE_DISTANCE_MATRIX + //#define USE_GOOGLE_HASHMAP #include <algorithm> @@ -196,6 +198,22 @@ public: size_t size() const { return rows.size(); } }; +class sparse_distance_matrix { +public: + std::vector<std::vector<diameter_index_t>> neighbors; + + template <typename DistanceMatrix> + sparse_distance_matrix(const DistanceMatrix& mat, value_t threshold) : neighbors(mat.size()) { + + for (index_t i = 0; i < size(); ++i) + for (index_t j = 0; j < size(); ++j) + if (i != j && mat(i, j) <= threshold) + neighbors[i].push_back(std::make_pair(mat(i, j), j)); + } + + size_t size() const { return neighbors.size(); } +}; + template <> void compressed_distance_matrix<LOWER_TRIANGULAR>::init_rows() { value_t* pointer = &distances[0]; for (index_t i = 1; i < size(); ++i) { @@ -213,12 +231,14 @@ template <> void compressed_distance_matrix<UPPER_TRIANGULAR>::init_rows() { } template <> -value_t compressed_distance_matrix<UPPER_TRIANGULAR>::operator()(index_t i, index_t j) const { +value_t compressed_distance_matrix<UPPER_TRIANGULAR>::operator()(const index_t i, + const index_t j) const { return i == j ? 0 : i > j ? rows[j][i] : rows[i][j]; } template <> -value_t compressed_distance_matrix<LOWER_TRIANGULAR>::operator()(index_t i, index_t j) const { +value_t compressed_distance_matrix<LOWER_TRIANGULAR>::operator()(const index_t i, + const index_t j) const { return i == j ? 0 : i < j ? rows[j][i] : rows[i][j]; } @@ -366,20 +386,22 @@ void push_entry(Heap& column, index_t i, coefficient_t c, value_t diameter) { column.push(std::make_pair(diameter, e)); } -class ripser { - compressed_lower_distance_matrix dist; - index_t dim_max, n; +template <typename DistanceMatrix> class ripser { + DistanceMatrix dist; + index_t n, dim_max; value_t threshold; coefficient_t modulus; const binomial_coeff_table binomial_coeff; std::vector<coefficient_t> multiplicative_inverse; mutable std::vector<index_t> vertices; + mutable std::vector<std::vector<diameter_index_t>::const_reverse_iterator> neighbor_it; + mutable std::vector<std::vector<diameter_index_t>::const_reverse_iterator> neighbor_end; public: - ripser(compressed_lower_distance_matrix&& _dist, index_t _dim_max, value_t _threshold, - coefficient_t _modulus) - : dist(std::move(_dist)), dim_max(std::min(_dim_max, index_t(dist.size() - 2))), - n(dist.size()), threshold(_threshold), modulus(_modulus), binomial_coeff(n, dim_max + 2), + ripser(DistanceMatrix&& _dist, index_t _dim_max, value_t _threshold, coefficient_t _modulus) + : dist(std::move(_dist)), n(dist.size()), + dim_max(std::min(_dim_max, index_t(dist.size() - 2))), threshold(_threshold), + modulus(_modulus), binomial_coeff(n, dim_max + 2), multiplicative_inverse(multiplicative_inverse_vector(_modulus)) {} index_t get_next_vertex(index_t& v, const index_t idx, const index_t k) const { @@ -400,6 +422,10 @@ public: return v; } + index_t get_edge_index(const index_t i, const index_t j) const { + return binomial_coeff(i, 2) + j; + } + template <typename OutputIterator> OutputIterator get_simplex_vertices(index_t idx, const index_t dim, index_t v, OutputIterator out) const { @@ -464,7 +490,81 @@ public: } }; - void compute_barcodes(); + class simplex_sparse_coboundary_enumerator { + private: + const ripser& parent; + + index_t idx_below, idx_above, v, k, max_vertex_below; + const diameter_entry_t simplex; + const coefficient_t modulus; + const DistanceMatrix& dist; + const binomial_coeff_table& binomial_coeff; + + std::vector<index_t>& vertices; + std::vector<std::vector<diameter_index_t>::const_reverse_iterator>& neighbor_it; + std::vector<std::vector<diameter_index_t>::const_reverse_iterator>& neighbor_end; + diameter_index_t x; + + public: + simplex_sparse_coboundary_enumerator(const diameter_entry_t _simplex, index_t _dim, + const ripser& _parent) + : parent(_parent), simplex(_simplex), idx_below(get_index(_simplex)), idx_above(0), + v(parent.n - 1), k(_dim + 1), max_vertex_below(parent.n - 1), modulus(parent.modulus), + dist(parent.dist), binomial_coeff(parent.binomial_coeff), vertices(parent.vertices), + neighbor_it(parent.neighbor_it), neighbor_end(parent.neighbor_end) { + + neighbor_it.clear(); + neighbor_end.clear(); + vertices.clear(); + + parent.get_simplex_vertices(idx_below, _dim, parent.n, std::back_inserter(vertices)); + + for (auto v : vertices) { + neighbor_it.push_back(dist.neighbors[v].rbegin()); + neighbor_end.push_back(dist.neighbors[v].rend()); + } + } + + bool has_next(bool all_cofaces = true) { + for (auto &it0 = neighbor_it[0], &end0 = neighbor_end[0]; it0 != end0; ++it0) { + x = *it0; + for (size_t idx = 1; idx < neighbor_it.size(); ++idx) { + auto &it = neighbor_it[idx], end = neighbor_end[idx]; + while (get_index(*it) > get_index(x)) + if (++it == end) return false; + auto y = *it; + if (get_index(y) != get_index(x)) + goto continue_outer; + else + x = std::max(x, y); + } + return all_cofaces || + !(k > 0 && + parent.get_next_vertex(max_vertex_below, idx_below, k) > get_index(x)); + continue_outer:; + } + return false; + } + + diameter_entry_t next() { + ++neighbor_it[0]; + + while (k > 0 && parent.get_next_vertex(max_vertex_below, idx_below, k) > get_index(x)) { + idx_below -= binomial_coeff(max_vertex_below, k); + idx_above += binomial_coeff(max_vertex_below, k + 1); + --k; + } + + value_t coface_diameter = std::max(get_diameter(simplex), get_diameter(x)); + + coefficient_t coface_coefficient = + (k & 1 ? -1 + modulus : 1) * get_coefficient(simplex) % modulus; + + return diameter_entry_t(coface_diameter, + idx_above + binomial_coeff(get_index(x), k + 1) + idx_below, + coface_coefficient); + } + }; void assemble_columns_to_reduce(std::vector<diameter_index_t>& columns_to_reduce, hash_map<index_t, index_t>& pivot_column_index, index_t dim) { @@ -504,6 +604,50 @@ public: #endif } + void assemble_sparse_columns_to_reduce(std::vector<diameter_index_t>& simplices, + std::vector<diameter_index_t>& columns_to_reduce, + hash_map<index_t, index_t>& pivot_column_index, + index_t dim) { + +#ifdef INDICATE_PROGRESS + std::cout << "\033[K" + << "assembling columns" << std::flush << "\r"; +#endif + + --dim; + columns_to_reduce.clear(); + + std::vector<diameter_index_t> next_simplices; + + for (diameter_index_t simplex : simplices) { + simplex_sparse_coboundary_enumerator cofaces(simplex, dim, *this); + + while (cofaces.has_next(false)) { + auto coface = cofaces.next(); + + next_simplices.push_back(std::make_pair(get_diameter(coface), get_index(coface))); + + if (pivot_column_index.find(get_index(coface)) == pivot_column_index.end()) + columns_to_reduce.push_back( + std::make_pair(get_diameter(coface), get_index(coface))); + } + } + + simplices.swap(next_simplices); + +#ifdef INDICATE_PROGRESS + std::cout << "\033[K" + << "sorting " << columns_to_reduce.size() << " columns" << std::flush << "\r"; +#endif + + std::sort(columns_to_reduce.begin(), columns_to_reduce.end(), + greater_diameter_or_smaller_index<diameter_index_t>()); +#ifdef INDICATE_PROGRESS + std::cout << "\033[K"; +#endif + } + + template <typename BoundaryEnumerator> void compute_pairs(std::vector<diameter_index_t>& columns_to_reduce, hash_map<index_t, index_t>& pivot_column_index, index_t dim) { @@ -592,7 +736,7 @@ public: #endif coface_entries.clear(); - simplex_coboundary_enumerator cofaces(simplex, dim, *this); + BoundaryEnumerator cofaces(simplex, dim, *this); while (cofaces.has_next()) { diameter_entry_t coface = cofaces.next(); if (get_diameter(coface) <= threshold) { @@ -679,8 +823,13 @@ public: std::cout << "\033[K"; #endif } + + void compute_barcodes(); }; +template <> void ripser<compressed_lower_distance_matrix>::compute_barcodes(); +template <> void ripser<sparse_distance_matrix>::compute_barcodes(); + enum file_format { LOWER_DISTANCE_MATRIX, UPPER_DISTANCE_MATRIX, @@ -914,10 +1063,17 @@ int main(int argc, char** argv) { std::cout << "value range: [" << *value_range.first << "," << *value_range.second << "]" << std::endl; - ripser(std::move(dist), dim_max, threshold, modulus).compute_barcodes(); +#ifdef SPARSE_DISTANCE_MATRIX + ripser<compressed_lower_distance_matrix>(std::move(dist), dim_max, threshold, modulus) + .compute_barcodes(); +#else + ripser<sparse_distance_matrix>(sparse_distance_matrix(dist, threshold), dim_max, threshold, + modulus) + .compute_barcodes(); +#endif } -void ripser::compute_barcodes() { +template <> void ripser<compressed_lower_distance_matrix>::compute_barcodes() { std::vector<diameter_index_t> columns_to_reduce; @@ -962,10 +1118,68 @@ void ripser::compute_barcodes() { hash_map<index_t, index_t> pivot_column_index; pivot_column_index.reserve(columns_to_reduce.size()); - compute_pairs(columns_to_reduce, pivot_column_index, dim); + compute_pairs<simplex_coboundary_enumerator>(columns_to_reduce, pivot_column_index, dim); if (dim < dim_max) { assemble_columns_to_reduce(columns_to_reduce, pivot_column_index, dim + 1); } } } + +template <> void ripser<sparse_distance_matrix>::compute_barcodes() { + + std::vector<diameter_index_t> columns_to_reduce; + + std::vector<diameter_index_t> simplices; + + { + union_find dset(n); + std::vector<diameter_index_t>& edges = simplices; + for (index_t i = 0; i < n; ++i) + for (auto n : dist.neighbors[i]) { + index_t j = get_index(n); + if (i > j) edges.push_back(std::make_pair(get_diameter(n), get_edge_index(i, j))); + } + std::sort(edges.rbegin(), edges.rend(), + greater_diameter_or_smaller_index<diameter_index_t>()); + +#ifdef PRINT_PERSISTENCE_PAIRS + std::cout << "persistence intervals in dim 0:" << std::endl; +#endif + + std::vector<index_t> vertices_of_edge(2); + for (auto e : edges) { + vertices_of_edge.clear(); + get_simplex_vertices(get_index(e), 1, n, std::back_inserter(vertices_of_edge)); + index_t u = dset.find(vertices_of_edge[0]), v = dset.find(vertices_of_edge[1]); + + if (u != v) { +#ifdef PRINT_PERSISTENCE_PAIRS + if (get_diameter(e) != 0) + std::cout << " [0," << get_diameter(e) << ")" << std::endl; +#endif + dset.link(u, v); + } else + columns_to_reduce.push_back(e); + } + std::reverse(columns_to_reduce.begin(), columns_to_reduce.end()); + +#ifdef PRINT_PERSISTENCE_PAIRS + for (index_t i = 0; i < n; ++i) + if (dset.find(i) == i) std::cout << " [0, )" << std::endl << std::flush; +#endif + } + + for (index_t dim = 1; dim <= dim_max; ++dim) { + hash_map<index_t, index_t> pivot_column_index; + pivot_column_index.reserve(columns_to_reduce.size()); + + compute_pairs<simplex_sparse_coboundary_enumerator>(columns_to_reduce, pivot_column_index, + dim); + + if (dim < dim_max) { + assemble_sparse_columns_to_reduce(simplices, columns_to_reduce, pivot_column_index, + dim + 1); + } + } +} |