/* Ripser: a lean C++ code for computation of Vietoris-Rips persistence barcodes Copyright 2015-2016 Ulrich Bauer. This program is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this program. If not, see . */ //#define ASSEMBLE_REDUCTION_MATRIX //#define USE_COEFFICIENTS //#define INDICATE_PROGRESS #define PRINT_PERSISTENCE_PAIRS //#define USE_GOOGLE_HASHMAP #include #include #include #include #include #include #include #include #include #ifdef USE_GOOGLE_HASHMAP #include template class hash_map : public google::sparse_hash_map { public: inline void reserve(size_t hint) { this->resize(hint); } }; #else template class hash_map : public std::unordered_map {}; #endif static const int64_t DIPHA_MAGIC = 8067171840; static const int64_t DIPHA_TYPE_DISTANCE_MATRIX = 7; static const int64_t DIPHA_TYPE_PERSISTENCE_DIAGRAM = 2; typedef float value_t; // typedef uint16_t value_t; typedef int64_t index_t; typedef int16_t coefficient_t; enum input_file_format { LOWER_DISTANCE_MATRIX, UPPER_DISTANCE_MATRIX, DISTANCE_MATRIX, POINT_CLOUD, DIPHA_DISTANCE_MATRIX }; enum output_file_format { TEXT_PD, DIPHA_PD }; class binomial_coeff_table { std::vector> B; index_t n_max, k_max; public: binomial_coeff_table(index_t n, index_t k) { n_max = n; k_max = k; B.resize(n + 1); for (index_t i = 0; i <= n; i++) { B[i].resize(k + 1); for (index_t j = 0; j <= std::min(i, k); j++) { if (j == 0 || j == i) B[i][j] = 1; else B[i][j] = B[i - 1][j - 1] + B[i - 1][j]; } } } index_t operator()(index_t n, index_t k) const { assert(n <= n_max); assert(k <= k_max); return B[n][k]; } }; bool is_prime(const coefficient_t n) { if (!(n & 1) || n < 2) return n == 2; for (coefficient_t p = 3, q = n / p, r = n % p; p <= q; p += 2, q = n / p, r = n % p) if (!r) return false; return true; } std::vector multiplicative_inverse_vector(const coefficient_t m) { std::vector inverse(m); inverse[1] = 1; // m = a * (m / a) + m % a // Multipying with inverse(a) * inverse(m % a): // 0 = inverse(m % a) * (m / a) + inverse(a) (mod m) for (coefficient_t a = 2; a < m; ++a) inverse[a] = m - (inverse[m % a] * (m / a)) % m; return inverse; } template inline void reverse_endianness(T & x) { uint8_t * p = reinterpret_cast(&x); std::reverse(p, p + sizeof(T)); } template T read(std::istream& s) { T result; s.read(reinterpret_cast(&result), sizeof(T)); #ifdef BIGENDIAN reverse_endianness(result); #endif return result; } template void write(std::ostream& s, T value) { #ifdef BIGENDIAN reverse_endianness(value); #endif s.write(reinterpret_cast(&value), sizeof(T)); } template OutputIterator get_simplex_vertices(index_t idx, const index_t dim, index_t n, const binomial_coeff_table& binomial_coeff, OutputIterator out) { --n; for (index_t k = dim + 1; k > 0; --k) { if (binomial_coeff(n, k) > idx) { index_t count = n; while (count > 0) { index_t i = n; index_t step = count >> 1; i -= step; if (binomial_coeff(i, k) > idx) { n = --i; count -= step + 1; } else count = step; } } assert(binomial_coeff(n, k) <= idx); assert(binomial_coeff(n + 1, k) > idx); *out++ = n; idx -= binomial_coeff(n, k); } return out; } std::vector vertices_of_simplex(const index_t simplex_index, const index_t dim, const index_t n, const binomial_coeff_table& binomial_coeff) { std::vector vertices; get_simplex_vertices(simplex_index, dim, n, binomial_coeff, std::back_inserter(vertices)); return vertices; } #ifdef USE_COEFFICIENTS struct entry_t { index_t index : 8 * (sizeof(index_t) - sizeof(coefficient_t)); coefficient_t coefficient; entry_t(index_t _index, coefficient_t _coefficient) : index(_index), coefficient(_coefficient) {} entry_t(index_t _index) : index(_index), coefficient(1) {} entry_t() : index(0), coefficient(1) {} } __attribute__((packed)); static_assert(sizeof(entry_t) == sizeof(index_t), "size of entry_t is not the same as index_t"); entry_t make_entry(index_t _index, coefficient_t _coefficient) { return entry_t(_index, _coefficient); } index_t get_index(entry_t e) { return e.index; } index_t get_coefficient(entry_t e) { return e.coefficient; } void set_coefficient(entry_t& e, const coefficient_t c) { e.coefficient = c; } bool operator==(const entry_t& e1, const entry_t& e2) { return get_index(e1) == get_index(e2) && get_coefficient(e1) == get_coefficient(e2); } std::ostream& operator<<(std::ostream& stream, const entry_t& e) { stream << get_index(e) << ":" << get_coefficient(e); return stream; } #else typedef index_t entry_t; const index_t get_index(entry_t i) { return i; } index_t get_coefficient(entry_t i) { return 1; } entry_t make_entry(index_t _index, coefficient_t _value) { return entry_t(_index); } void set_coefficient(index_t& e, const coefficient_t c) { e = c; } #endif const entry_t& get_entry(const entry_t& e) { return e; } template struct smaller_index { bool operator()(const Entry& a, const Entry& b) { return get_index(a) < get_index(b); } }; typedef std::pair diameter_index_t; value_t get_diameter(diameter_index_t i) { return i.first; } index_t get_index(diameter_index_t i) { return i.second; } class diameter_entry_t : public std::pair { public: diameter_entry_t(std::pair p) : std::pair(p) {} diameter_entry_t(entry_t e) : std::pair(0, e) {} diameter_entry_t() : diameter_entry_t(0) {} }; const entry_t& get_entry(const diameter_entry_t& p) { return p.second; } entry_t& get_entry(diameter_entry_t& p) { return p.second; } const index_t get_index(const diameter_entry_t& p) { return get_index(get_entry(p)); } const coefficient_t get_coefficient(const diameter_entry_t& p) { return get_coefficient(get_entry(p)); } const value_t& get_diameter(const diameter_entry_t& p) { return p.first; } void set_coefficient(diameter_entry_t& p, const coefficient_t c) { set_coefficient(get_entry(p), c); } diameter_entry_t make_diameter_entry(value_t _diameter, index_t _index, coefficient_t _coefficient) { return std::make_pair(_diameter, make_entry(_index, _coefficient)); } diameter_entry_t make_diameter_entry(diameter_index_t _diameter_index, coefficient_t _coefficient) { return std::make_pair(get_diameter(_diameter_index), make_entry(get_index(_diameter_index), _coefficient)); } template struct greater_diameter_or_smaller_index { bool operator()(const Entry& a, const Entry& b) { return (get_diameter(a) > get_diameter(b)) || ((get_diameter(a) == get_diameter(b)) && (get_index(a) < get_index(b))); } }; template class rips_filtration_comparator { public: const DistanceMatrix& dist; const index_t dim; private: mutable std::vector vertices; const binomial_coeff_table& binomial_coeff; public: rips_filtration_comparator(const DistanceMatrix& _dist, const index_t _dim, const binomial_coeff_table& _binomial_coeff) : dist(_dist), dim(_dim), vertices(_dim + 1), binomial_coeff(_binomial_coeff){}; value_t diameter(const index_t index) const { value_t diam = 0; get_simplex_vertices(index, dim, dist.size(), binomial_coeff, vertices.begin()); for (index_t i = 0; i <= dim; ++i) for (index_t j = 0; j < i; ++j) { diam = std::max(diam, dist(vertices[i], vertices[j])); } return diam; } bool operator()(const index_t a, const index_t b) const { assert(a < binomial_coeff(dist.size(), dim + 1)); assert(b < binomial_coeff(dist.size(), dim + 1)); return greater_diameter_or_smaller_index()(diameter_index_t(diameter(a), a), diameter_index_t(diameter(b), b)); } template bool operator()(const Entry& a, const Entry& b) const { return operator()(get_index(a), get_index(b)); } }; class simplex_coboundary_enumerator { private: index_t idx_below, idx_above, v, k; const binomial_coeff_table& binomial_coeff; public: simplex_coboundary_enumerator(index_t _idx, index_t _dim, index_t _n, const binomial_coeff_table& _binomial_coeff) : idx_below(_idx), idx_above(0), v(_n - 1), k(_dim + 1), binomial_coeff(_binomial_coeff) {} bool has_next() { while ((v != -1) && (binomial_coeff(v, k) <= idx_below)) { idx_below -= binomial_coeff(v, k); idx_above += binomial_coeff(v, k + 1); --v; --k; assert(k != -1); } return v != -1; } std::pair next() { auto result = std::make_pair(make_entry(idx_above + binomial_coeff(v, k + 1) + idx_below, k & 1 ? -1 : 1), v); --v; return result; } }; enum compressed_matrix_layout { LOWER_TRIANGULAR, UPPER_TRIANGULAR }; template class compressed_distance_matrix { public: std::vector distances; std::vector rows; void init_rows(); compressed_distance_matrix(std::vector&& _distances) : distances(_distances), rows((1 + std::sqrt(1 + 8 * distances.size())) / 2) { assert(distances.size() == size() * (size() - 1) / 2); init_rows(); } template compressed_distance_matrix(const DistanceMatrix& mat) : distances(mat.size() * (mat.size() - 1) / 2), rows(mat.size()) { init_rows(); for (index_t i = 1; i < size(); ++i) for (index_t j = 0; j < i; ++j) rows[i][j] = mat(i, j); } value_t operator()(const index_t i, const index_t j) const; size_t size() const { return rows.size(); } }; template <> void compressed_distance_matrix::init_rows() { value_t* pointer = &distances[0]; for (index_t i = 1; i < size(); ++i) { rows[i] = pointer; pointer += i; } } template <> void compressed_distance_matrix::init_rows() { value_t* pointer = &distances[0] - 1; for (index_t i = 0; i < size() - 1; ++i) { rows[i] = pointer; pointer += size() - i - 2; } } template <> value_t compressed_distance_matrix::operator()(index_t i, index_t j) const { if (i > j) std::swap(i, j); return i == j ? 0 : rows[i][j]; } template <> value_t compressed_distance_matrix::operator()(index_t i, index_t j) const { if (i > j) std::swap(i, j); return i == j ? 0 : rows[j][i]; } typedef compressed_distance_matrix compressed_lower_distance_matrix; typedef compressed_distance_matrix compressed_upper_distance_matrix; class euclidean_distance_matrix { public: std::vector> points; euclidean_distance_matrix(std::vector>&& _points) : points(_points) {} value_t operator()(const index_t i, const index_t j) const { return std::sqrt(std::inner_product(points[i].begin(), points[i].end(), points[j].begin(), value_t(), std::plus(), [](value_t u, value_t v) { return (u - v) * (u - v); })); } size_t size() const { return points.size(); } }; class union_find { std::vector parent; std::vector rank; public: union_find(index_t n) : parent(n), rank(n, 0) { for (index_t i = 0; i < n; ++i) parent[i] = i; } index_t find(index_t x) { index_t y = x, z = parent[y]; while (z != y) { y = z; z = parent[y]; } y = parent[x]; while (z != y) { parent[x] = z; x = y; y = parent[x]; } return z; } void link(index_t x, index_t y) { x = find(x); y = find(y); if (x == y) return; if (rank[x] > rank[y]) parent[y] = x; else { parent[x] = y; if (rank[x] == rank[y]) ++rank[y]; } } }; template diameter_entry_t pop_pivot(Heap& column, coefficient_t modulus) { if (column.empty()) return diameter_entry_t(-1); else { auto pivot = column.top(); #ifdef USE_COEFFICIENTS coefficient_t coefficient = 0; do { coefficient = (coefficient + get_coefficient(column.top())) % modulus; column.pop(); if (coefficient == 0) { if (column.empty()) return diameter_entry_t(-1); else pivot = column.top(); } } while (!column.empty() && get_index(column.top()) == get_index(pivot)); if (get_index(pivot) != -1) { set_coefficient(pivot, coefficient); } #else column.pop(); while (!column.empty() && get_index(column.top()) == get_index(pivot)) { column.pop(); if (column.empty()) return diameter_entry_t(-1); else { pivot = column.top(); column.pop(); } } #endif return pivot; } } template diameter_entry_t get_pivot(Heap& column, coefficient_t modulus) { diameter_entry_t result = pop_pivot(column, modulus); if (get_index(result) != -1) column.push(result); return result; } template class compressed_sparse_matrix { std::vector bounds; std::vector entries; public: size_t size() const { return bounds.size(); } typename std::vector::const_iterator cbegin(size_t index) const { assert(index < size()); return index == 0 ? entries.cbegin() : entries.cbegin() + bounds[index - 1]; } typename std::vector::const_iterator cend(size_t index) const { assert(index < size()); return entries.cbegin() + bounds[index]; } template void append_column(Iterator begin, Iterator end) { for (Iterator it = begin; it != end; ++it) { entries.push_back(*it); } bounds.push_back(entries.size()); } void append_column() { bounds.push_back(entries.size()); } void push_back(ValueType e) { assert(0 < size()); entries.push_back(e); ++bounds.back(); } void pop_back() { assert(0 < size()); entries.pop_back(); --bounds.back(); } template void append_column(const Collection collection) { append_column(collection.cbegin(), collection.cend()); } }; template void push_entry(Heap& column, index_t i, coefficient_t c, value_t diameter) { entry_t e = make_entry(i, c); column.push(std::make_pair(diameter, e)); } template void assemble_columns_to_reduce(std::vector& columns_to_reduce, hash_map& pivot_column_index, const Comparator& comp, index_t dim, index_t n, value_t threshold, const binomial_coeff_table& binomial_coeff) { index_t num_simplices = binomial_coeff(n, dim + 2); columns_to_reduce.clear(); #ifdef INDICATE_PROGRESS std::cout << "\033[K" << "assembling " << num_simplices << " columns" << std::flush << "\r"; #endif for (index_t index = 0; index < num_simplices; ++index) { if (pivot_column_index.find(index) == pivot_column_index.end()) { value_t diameter = comp.diameter(index); if (diameter <= threshold) columns_to_reduce.push_back(std::make_pair(diameter, index)); } } #ifdef INDICATE_PROGRESS std::cout << "\033[K" << "sorting " << num_simplices << " columns" << std::flush << "\r"; #endif std::sort(columns_to_reduce.begin(), columns_to_reduce.end(), greater_diameter_or_smaller_index()); #ifdef INDICATE_PROGRESS std::cout << "\033[K"; #endif } template void compute_pairs(std::vector& columns_to_reduce, hash_map& pivot_column_index, const DistanceMatrix& dist, const ComparatorCofaces& comp, const Comparator& comp_prev, index_t dim, index_t n, value_t threshold, coefficient_t modulus, const std::vector& multiplicative_inverse, const binomial_coeff_table& binomial_coeff, std::ostream& output_stream, output_file_format oformat, int64_t & pair_count) { if (oformat == TEXT_PD) { output_stream << "persistence intervals in dim " << dim << ":" << std::endl; } #ifdef ASSEMBLE_REDUCTION_MATRIX compressed_sparse_matrix reduction_matrix; #else #ifdef USE_COEFFICIENTS std::vector reduction_coefficients; #endif #endif std::vector coface_entries; std::vector vertices; for (index_t i = 0; i < columns_to_reduce.size(); ++i) { auto column_to_reduce = columns_to_reduce[i]; #ifdef ASSEMBLE_REDUCTION_MATRIX std::priority_queue, smaller_index> reduction_column; #endif std::priority_queue, greater_diameter_or_smaller_index> working_coboundary; value_t diameter = get_diameter(column_to_reduce); #ifdef INDICATE_PROGRESS if ((i + 1) % 1000 == 0) std::cout << "\033[K" << "reducing column " << i + 1 << "/" << columns_to_reduce.size() << " (diameter " << diameter << ")" << std::flush << "\r"; #endif index_t j = i; // start with a dummy pivot entry with coefficient -1 in order to initialize // working_coboundary with the coboundary of the simplex with index column_to_reduce diameter_entry_t pivot = make_diameter_entry(0, -1, -1 + modulus); #ifdef ASSEMBLE_REDUCTION_MATRIX // initialize reduction_matrix as identity matrix reduction_matrix.append_column(); reduction_matrix.push_back(make_diameter_entry(column_to_reduce, 1)); #else #ifdef USE_COEFFICIENTS reduction_coefficients.push_back(make_diameter_entry(column_to_reduce, 1)); #endif #endif bool might_be_apparent_pair = (i == j); do { const coefficient_t factor = modulus - get_coefficient(pivot); #ifdef ASSEMBLE_REDUCTION_MATRIX for (auto it = reduction_matrix.cbegin(j); it != reduction_matrix.cend(j); ++it) #endif { #ifdef ASSEMBLE_REDUCTION_MATRIX const auto& simplex = *it; #else #ifdef USE_COEFFICIENTS const auto& simplex = reduction_coefficients[j]; #else const auto& simplex = columns_to_reduce[j]; #endif #endif coefficient_t simplex_coefficient = get_coefficient(simplex) * factor % modulus; #ifdef ASSEMBLE_REDUCTION_MATRIX reduction_column.push( make_diameter_entry(get_diameter(simplex), get_index(simplex), simplex_coefficient)); #endif vertices.clear(); get_simplex_vertices(get_index(simplex), dim, n, binomial_coeff, std::back_inserter(vertices)); coface_entries.clear(); simplex_coboundary_enumerator cofaces(get_index(simplex), dim, n, binomial_coeff); while (cofaces.has_next()) { auto coface_descriptor = cofaces.next(); entry_t coface = coface_descriptor.first; index_t covertex = coface_descriptor.second; index_t coface_index = get_index(coface); value_t coface_diameter = get_diameter(simplex); for (index_t v : vertices) { coface_diameter = std::max(coface_diameter, dist(v, covertex)); } assert(comp.diameter(coface_index) == coface_diameter); if (coface_diameter <= threshold) { coefficient_t coface_coefficient = (get_coefficient(coface) + modulus) * simplex_coefficient % modulus; assert(coface_coefficient >= 0); diameter_entry_t coface_entry = make_diameter_entry(coface_diameter, coface_index, coface_coefficient); coface_entries.push_back(coface_entry); if (might_be_apparent_pair && (get_diameter(simplex) == coface_diameter)) { if (pivot_column_index.find(coface_index) == pivot_column_index.end()) { pivot = coface_entry; goto found_persistence_pair; } might_be_apparent_pair = false; } } } for (auto e : coface_entries) working_coboundary.push(e); } pivot = get_pivot(working_coboundary, modulus); if (get_index(pivot) != -1) { auto pair = pivot_column_index.find(get_index(pivot)); if (pair != pivot_column_index.end()) { j = pair->second; continue; } } else { #ifdef INDICATE_PROGRESS std::cout << "\033[K"; #endif if (oformat == TEXT_PD) { output_stream << " [" << diameter << ", )" << std::endl << std::flush; } else if (oformat == DIPHA_PD) { write(output_stream, -dim - 1); // Essential class in dimension dim write(output_stream, diameter); write(output_stream, NAN); // Undefined/unused for essential classes. pair_count++; } break; } found_persistence_pair: value_t death = get_diameter(pivot); if (diameter != death) { #ifdef INDICATE_PROGRESS std::cout << "\033[K"; #endif if (oformat == TEXT_PD) { output_stream << " [" << diameter << "," << death << ")" << std::endl << std::flush; } else if (oformat == DIPHA_PD) { write(output_stream, dim); write(output_stream, diameter); write(output_stream, death); pair_count++; } } pivot_column_index.insert(std::make_pair(get_index(pivot), i)); #ifdef USE_COEFFICIENTS const coefficient_t inverse = multiplicative_inverse[get_coefficient(pivot)]; #endif #ifdef ASSEMBLE_REDUCTION_MATRIX // replace current column of reduction_matrix (with a single diagonal 1 entry) // by reduction_column (possibly with a different entry on the diagonal) reduction_matrix.pop_back(); while (true) { diameter_entry_t e = pop_pivot(reduction_column, modulus); index_t index = get_index(e); if (index == -1) break; #ifdef USE_COEFFICIENTS const coefficient_t coefficient = inverse * get_coefficient(e) % modulus; assert(coefficient > 0); #else const coefficient_t coefficient = 1; #endif reduction_matrix.push_back(make_diameter_entry(get_diameter(e), index, coefficient)); } #else #ifdef USE_COEFFICIENTS reduction_coefficients.pop_back(); reduction_coefficients.push_back(make_diameter_entry(column_to_reduce, inverse)); #endif #endif break; } while (true); } #ifdef INDICATE_PROGRESS std::cout << "\033[K"; #endif } compressed_lower_distance_matrix read_point_cloud(std::istream& input_stream) { std::vector> points; std::string line; value_t value; while (std::getline(input_stream, line)) { std::vector point; std::istringstream s(line); while (s >> value) { point.push_back(value); s.ignore(); } if (!point.empty()) points.push_back(point); assert(point.size() == points.front().size()); } euclidean_distance_matrix eucl_dist(std::move(points)); index_t n = eucl_dist.size(); std::cout << "point cloud with " << n << " points in dimension " << eucl_dist.points.front().size() << std::endl; std::vector distances; for (int i = 0; i < n; ++i) for (int j = 0; j < i; ++j) distances.push_back(eucl_dist(i, j)); return compressed_lower_distance_matrix(std::move(distances)); } compressed_lower_distance_matrix read_lower_distance_matrix(std::istream& input_stream) { std::vector distances; value_t value; while (input_stream >> value) { distances.push_back(value); input_stream.ignore(); } return compressed_lower_distance_matrix(std::move(distances)); } compressed_lower_distance_matrix read_upper_distance_matrix(std::istream& input_stream) { std::vector distances; value_t value; while (input_stream >> value) { distances.push_back(value); input_stream.ignore(); } return compressed_lower_distance_matrix(compressed_upper_distance_matrix(std::move(distances))); } compressed_lower_distance_matrix read_distance_matrix(std::istream& input_stream) { std::vector distances; std::string line; value_t value; for (int i = 0; std::getline(input_stream, line); ++i) { std::istringstream s(line); for (int j = 0; j < i && s >> value; ++j) { distances.push_back(value); s.ignore(); } } return compressed_lower_distance_matrix(std::move(distances)); } compressed_lower_distance_matrix read_dipha(std::istream& input_stream) { if (read(input_stream) != DIPHA_MAGIC) { std::cerr << "input is not a Dipha file (magic number: " << DIPHA_MAGIC << ")" << std::endl; exit(-1); } if (read(input_stream) != DIPHA_TYPE_DISTANCE_MATRIX) { std::cerr << "input is not a Dipha distance matrix (file type: " << DIPHA_TYPE_DISTANCE_MATRIX << ")" << std::endl; exit(-1); } index_t n = read(input_stream); std::vector distances; for (int i = 0; i < n; ++i) for (int j = 0; j < n; ++j) if (i > j) distances.push_back(read(input_stream)); else read(input_stream); return compressed_lower_distance_matrix(std::move(distances)); } compressed_lower_distance_matrix read_file(std::istream& input_stream, input_file_format format) { switch (format) { case LOWER_DISTANCE_MATRIX: return read_lower_distance_matrix(input_stream); case UPPER_DISTANCE_MATRIX: return read_upper_distance_matrix(input_stream); case DISTANCE_MATRIX: return read_distance_matrix(input_stream); case POINT_CLOUD: return read_point_cloud(input_stream); case DIPHA_DISTANCE_MATRIX: return read_dipha(input_stream); } } void print_usage_and_exit(int exit_code) { std::cerr << "Usage: " << "ripser " << "[options] [input filename] [output filename]" << std::endl << std::endl << "Options:" << std::endl << std::endl << " --help print this screen" << std::endl << " --iformat use the specified file format for the input. Options are:" << std::endl << " lower-distance (lower triangular distance matrix; default)" << std::endl << " upper-distance (upper triangular distance matrix)" << std::endl << " distance (full distance matrix)" << std::endl << " point-cloud (point cloud in Euclidean space)" << std::endl << " dipha (distance matrix in DIPHA file format)" << std::endl << " --oformat use the specified file format for the output. Options are:" << std::endl << " text (plain text output; default)" << std::endl << " dipha (barcode in DIPHA file format)" << std::endl << " --dim compute persistent homology up to dimension " << std::endl << " --threshold compute Rips complexes up to diameter " << std::endl #ifdef USE_COEFFICIENTS << " --modulus

compute homology with coefficients in the prime field Z/

Z" #endif << std::endl; exit(exit_code); } int main(int argc, char** argv) { const char* ifilename = nullptr; const char* ofilename = nullptr; input_file_format iformat = DISTANCE_MATRIX; output_file_format oformat = TEXT_PD; index_t dim_max = 1; value_t threshold = std::numeric_limits::max(); #ifdef USE_COEFFICIENTS coefficient_t modulus = 2; #else const coefficient_t modulus = 2; #endif for (index_t i = 1; i < argc; ++i) { const std::string arg(argv[i]); if (arg == "--help") { print_usage_and_exit(0); } else if (arg == "--dim") { std::string parameter = std::string(argv[++i]); size_t next_pos; dim_max = std::stol(parameter, &next_pos); if (next_pos != parameter.size()) print_usage_and_exit(-1); } else if (arg == "--threshold") { std::string parameter = std::string(argv[++i]); size_t next_pos; threshold = std::stof(parameter, &next_pos); if (next_pos != parameter.size()) print_usage_and_exit(-1); } else if (arg == "--iformat") { std::string parameter = std::string(argv[++i]); if (parameter == "lower-distance") iformat = LOWER_DISTANCE_MATRIX; else if (parameter == "upper-distance") iformat = UPPER_DISTANCE_MATRIX; else if (parameter == "distance") iformat = DISTANCE_MATRIX; else if (parameter == "point-cloud") iformat = POINT_CLOUD; else if (parameter == "dipha") iformat = DIPHA_DISTANCE_MATRIX; else print_usage_and_exit(-1); } else if (arg == "--oformat") { std::string parameter = std::string(argv[++i]); if (parameter == "text") oformat = TEXT_PD; else if (parameter == "dipha") oformat = DIPHA_PD; else print_usage_and_exit(-1); #ifdef USE_COEFFICIENTS } else if (arg == "--modulus") { std::string parameter = std::string(argv[++i]); size_t next_pos; modulus = std::stol(parameter, &next_pos); if (next_pos != parameter.size() || !is_prime(modulus)) print_usage_and_exit(-1); #endif } else if (i == argc - 2) { ifilename = argv[i]; } else if (i == argc - 1) { ofilename = argv[i]; } else { print_usage_and_exit(-1); } } if (!ifilename || !ofilename) { std::cerr << "You need to specify input and output files." << std::endl; print_usage_and_exit(-1); } if (std::string(ifilename) == std::string("-")) ifilename = nullptr; if (std::string(ofilename) == std::string("-")) ofilename = nullptr; std::ifstream input_file_stream(ifilename); if (ifilename && input_file_stream.fail()) { std::cerr << "couldn't open file " << ifilename << std::endl; exit(-1); } if (!ofilename && oformat == DIPHA_PD) { std::cerr << "Warning: Will not write a DIPHA persistence diagram to stdout. Changing output format to text." << std::endl; oformat = TEXT_PD; } std::ofstream output_file_stream; if (oformat == TEXT_PD) { output_file_stream.open(ofilename, std::ios::out); } else if (oformat == DIPHA_PD) { // The in/out opening mode is so that we can seek back to the third third entry in the file and write the pair count at the very end. output_file_stream.open(ofilename, std::ios::out | std::ios::in | std::ios::trunc | std::ios::binary); } if (ofilename && output_file_stream.fail()) { std::cerr << "couldn't open file " << ofilename << std::endl; exit(-1); } std::ostream & output_stream = ofilename ? output_file_stream : std::cout; if (oformat == DIPHA_PD) { write(output_stream, DIPHA_MAGIC); write(output_stream, DIPHA_TYPE_PERSISTENCE_DIAGRAM); write(output_stream, DIPHA_MAGIC); // This is to later be replaced by the number of pairs in the persistence diagram. This is the reason we don't allow binary stdout output, since we'd have to buffer up the entire PD to know the count beforehand. In a file we can seek back and overwrite. } int64_t pair_count = 0; compressed_lower_distance_matrix dist = read_file(ifilename ? input_file_stream : std::cin, iformat); index_t n = dist.size(); std::cout << "distance matrix with " << n << " points" << std::endl; auto value_range = std::minmax_element(dist.distances.begin(), dist.distances.end()); std::cout << "value range: [" << *value_range.first << "," << *value_range.second << "]" << std::endl; dim_max = std::min(dim_max, n - 2); binomial_coeff_table binomial_coeff(n, dim_max + 2); std::vector multiplicative_inverse(multiplicative_inverse_vector(modulus)); std::vector columns_to_reduce; { union_find dset(n); std::vector edges; rips_filtration_comparator comp(dist, 1, binomial_coeff); for (index_t index = binomial_coeff(n, 2); index-- > 0;) { value_t diameter = comp.diameter(index); if (diameter <= threshold) edges.push_back(diameter_index_t(diameter, index)); } std::sort(edges.rbegin(), edges.rend(), greater_diameter_or_smaller_index()); if (oformat == TEXT_PD) { output_stream << "persistence intervals in dim 0:" << std::endl; } std::vector vertices_of_edge(2); for (auto e : edges) { vertices_of_edge.clear(); get_simplex_vertices(get_index(e), 1, n, binomial_coeff, std::back_inserter(vertices_of_edge)); index_t u = dset.find(vertices_of_edge[0]), v = dset.find(vertices_of_edge[1]); if (u != v) { if (oformat == TEXT_PD) { output_stream << " [0," << get_diameter(e) << ")" << std::endl; } else if (oformat == DIPHA_PD) { write(output_stream, 0); write(output_stream, 0); write(output_stream, get_diameter(e)); pair_count++; } dset.link(u, v); } else columns_to_reduce.push_back(e); } std::reverse(columns_to_reduce.begin(), columns_to_reduce.end()); for (index_t i = 0; i < n; ++i) { if (dset.find(i) == i) { if (oformat == TEXT_PD) { output_stream << " [0, )" << std::endl << std::flush; } else if (oformat == DIPHA_PD) { write(output_stream, -0 - 1); // Indicate essential class. write(output_stream, 0); write(output_stream, NAN); // Undefined/unused for essential classes. pair_count++; } } } } for (index_t dim = 1; dim <= dim_max; ++dim) { rips_filtration_comparator comp(dist, dim + 1, binomial_coeff); rips_filtration_comparator comp_prev(dist, dim, binomial_coeff); hash_map pivot_column_index; pivot_column_index.reserve(columns_to_reduce.size()); compute_pairs(columns_to_reduce, pivot_column_index, dist, comp, comp_prev, dim, n, threshold, modulus, multiplicative_inverse, binomial_coeff, output_stream, oformat, pair_count); if (dim < dim_max) { assemble_columns_to_reduce(columns_to_reduce, pivot_column_index, comp, dim, n, threshold, binomial_coeff); } } if (oformat == DIPHA_PD) { output_file_stream.seekp(2*sizeof(int64_t), std::ios_base::beg); // Rewind to the spot in the DIPHA file where we write the number of intervals. write(output_file_stream, pair_count); } input_file_stream.close(); output_file_stream.close(); }