From 4b0c30171a6fddc8a6dbee5290d7f536cdf978a8 Mon Sep 17 00:00:00 2001 From: Gard Spreemann Date: Mon, 28 Jan 2019 17:06:07 +0100 Subject: Basic output done, some basic sanity checks done. --- ripser.cpp | 162 ++++++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 113 insertions(+), 49 deletions(-) diff --git a/ripser.cpp b/ripser.cpp index dc3ae94..cff6e79 100644 --- a/ripser.cpp +++ b/ripser.cpp @@ -48,10 +48,35 @@ public: template class hash_map : public std::unordered_map {}; #endif +static const int64_t DIPHA_MAGIC = 8067171840; +static const int64_t DIPHA_TYPE_DISTANCE_MATRIX = 7; +static const int64_t DIPHA_TYPE_PERSISTENCE_DIAGRAM = 2; + typedef float value_t; typedef int64_t index_t; typedef int16_t coefficient_t; +template inline void reverse_endianness(T & x) { + uint8_t * p = reinterpret_cast(&x); + std::reverse(p, p + sizeof(T)); +} + +template T read(std::istream& s) { + T result; + s.read(reinterpret_cast(&result), sizeof(T)); + #ifdef BIGENDIAN + reverse_endianness(result); + #endif + return result; +} + +template void write(std::ostream& s, T value) { + #ifdef BIGENDIAN + reverse_endianness(value); + #endif + s.write(reinterpret_cast(&value), sizeof(T)); +} + class binomial_coeff_table { std::vector> B; @@ -458,7 +483,7 @@ public: hash_map& pivot_column_index, index_t dim); void compute_dim_0_pairs(std::vector& edges, - std::vector& columns_to_reduce) { + std::vector& columns_to_reduce, std::ofstream & outstream_pds, std::ofstream & outstream_reps, int64_t & pair_count) { union_find dset(n); edges = get_edges(); @@ -478,20 +503,23 @@ public: if (u != v) { #ifdef PRINT_PERSISTENCE_PAIRS - if (get_diameter(e) != 0) - std::cout << " [0," << get_diameter(e) << ")" << std::endl; + if (get_diameter(e) != 0) { + write(outstream_pds, 0); + write(outstream_pds, 0); + write(outstream_pds, get_diameter(e)); + ++pair_count; + } #endif - std::cout << "{"; bool nonempty = false; for (index_t w = 0; w < n; ++w) if (dset.find(w) == u) { - if (nonempty) std::cout << ", "; nonempty = true; - std::cout << "[" << w << "]"; + if (nonempty) outstream_reps << " "; nonempty = true; + outstream_reps << "[" << w << "]"; #ifdef USE_COEFFICIENTS - std::cout << ":" << 1; + outstream_reps << ":" << 1; #endif } - std::cout << "}" << std::endl; + outstream_reps << std::endl; dset.link(u, v); @@ -503,16 +531,18 @@ public: #ifdef PRINT_PERSISTENCE_PAIRS for (index_t i = 0; i < n; ++i) if (dset.find(i) == i) { - std::cout << " [0, )" << std::endl << std::flush; - std::cout << "{"; + write(outstream_pds, -0-1); // Indicate essential class. + write(outstream_pds, 0); + write(outstream_pds, NAN); // Undefined/unused for essential classes. + ++pair_count; for (index_t w = 0; w < n; ++w) { - if (w > 0) std::cout << ", "; - std::cout << "[" << w << "]"; + if (w > 0) outstream_reps << " "; + outstream_reps << "[" << w << "]"; #ifdef USE_COEFFICIENTS - std::cout << ":" << 1; + outstream_reps << ":" << 1; #endif } - std::cout << "}" << std::endl; + outstream_reps << std::endl; #endif } } @@ -529,35 +559,34 @@ public: bool& might_be_apparent_pair); template - void print_chain(Chain& cycle, index_t dim) { + void write_chain(std::ofstream & stream, Chain& cycle, index_t dim) { diameter_entry_t e; - std::cout << "{"; while (get_index(e = get_pivot(cycle, modulus)) != -1) { vertices.resize(dim + 1); get_simplex_vertices(get_index(e), dim, n, vertices.rbegin()); - std::cout << "["; + stream << "["; auto it = vertices.begin(); if (it != vertices.end()) { - std::cout << *it++; - while (it != vertices.end()) std::cout << "," << *it++; + stream << *it++; + while (it != vertices.end()) stream << "," << *it++; } - std::cout << "]"; + stream << "]"; #ifdef USE_COEFFICIENTS - std::cout << ":" << normalize(get_coefficient(e), modulus); + stream << ":" << normalize(get_coefficient(e), modulus); #endif cycle.pop(); if (get_index(e = get_pivot(cycle, modulus)) != -1) - std::cout << ", "; + stream << " "; } - std::cout << "}" << std::endl; + stream << std::endl; } void compute_pairs(std::vector& columns_to_reduce, - hash_map& pivot_column_index, index_t dim) { + hash_map& pivot_column_index, index_t dim, std::ofstream & outstream_pds, std::ofstream & outstream_reps, int64_t & pair_count) { #ifdef PRINT_PERSISTENCE_PAIRS std::cout << "persistence intervals in dim " << dim << ":" << std::endl; @@ -658,9 +687,11 @@ public: #ifdef INDICATE_PROGRESS std::cout << "\033[K"; #endif -// std::cout << " [" << diameter << "," << death << ")" << std::endl << std::flush; - std::cout << " [" << diameter << "," << death << "): "; - print_chain(working_reduction_column, dim); + write(outstream_pds, dim); + write(outstream_pds, diameter); + write(outstream_pds, death); + ++pair_count; + write_chain(outstream_reps, working_reduction_column, dim); } #endif pivot_column_index.insert( @@ -703,9 +734,11 @@ public: #ifdef INDICATE_PROGRESS std::cout << "\033[K"; #endif -// std::cout << " [" << diameter << ", )" << std::endl << std::flush; - std::cout << " [" << diameter << ", ): "; - print_chain(working_reduction_column, dim); + write(outstream_pds, -dim - 1); // Essential class in dimension dim + write(outstream_pds, diameter); + write(outstream_pds, NAN); // Undefined/unused for essential classes. + ++pair_count; + write_chain(outstream_reps, working_reduction_column, dim); #endif break; @@ -720,17 +753,17 @@ public: std::vector get_edges(); - void compute_barcodes() { + void compute_barcodes(std::ofstream & outstream_pds, std::ofstream & outstream_reps, int64_t & pair_count) { std::vector simplices, columns_to_reduce; - compute_dim_0_pairs(simplices, columns_to_reduce); + compute_dim_0_pairs(simplices, columns_to_reduce, outstream_pds, outstream_reps, pair_count); for (index_t dim = 1; dim <= dim_max; ++dim) { hash_map pivot_column_index; pivot_column_index.reserve(columns_to_reduce.size()); - compute_pairs(columns_to_reduce, pivot_column_index, dim); + compute_pairs(columns_to_reduce, pivot_column_index, dim, outstream_pds, outstream_reps, pair_count); if (dim < dim_max) { assemble_columns_to_reduce(simplices, columns_to_reduce, pivot_column_index, @@ -1000,12 +1033,6 @@ enum file_format { RIPSER }; -template T read(std::istream& s) { - T result; - s.read(reinterpret_cast(&result), sizeof(T)); - return result; // on little endian: boost::endian::little_to_native(result); -} - compressed_lower_distance_matrix read_point_cloud(std::istream& input_stream) { std::vector> points; @@ -1154,7 +1181,8 @@ void print_usage_and_exit(int exit_code) { int main(int argc, char** argv) { - const char* filename = nullptr; + const char* ifilename = nullptr; + const char* oprefix = nullptr; file_format format = DISTANCE_MATRIX; @@ -1211,19 +1239,48 @@ int main(int argc, char** argv) { modulus = std::stol(parameter, &next_pos); if (next_pos != parameter.size() || !is_prime(modulus)) print_usage_and_exit(-1); #endif - } else { - if (filename) { print_usage_and_exit(-1); } - filename = argv[i]; + } else if (i == argc - 2) { + ifilename = argv[i]; + } else if (i == argc - 1) { + oprefix = argv[i]; + } else { + print_usage_and_exit(-1); } } - std::ifstream file_stream(filename); - if (filename && file_stream.fail()) { - std::cerr << "couldn't open file " << filename << std::endl; + if (ifilename == nullptr || oprefix == nullptr) + { + std::cerr << "Need input and output file names." << std::endl; + exit(-1); + } + + std::ifstream file_stream(ifilename); + if (ifilename && file_stream.fail()) { + std::cerr << "couldn't open file " << ifilename << std::endl; exit(-1); } - compressed_lower_distance_matrix dist = read_file(filename ? file_stream : std::cin, format); + std::ofstream outstream_pds(std::string(oprefix) + std::string(".pd.dipha"), std::ios::binary); + if (!outstream_pds.is_open()) + { + std::cerr << "Failed to open PD output file." << std::endl; + exit(-1); + } + + std::ofstream outstream_reps(std::string(oprefix) + std::string(".reps.txt")); + if (!outstream_reps.is_open()) + { + std::cerr << "Failed to open representative cocycles output file." << std::endl; + exit(-1); + } + + int64_t pair_count = 0; + + write(outstream_pds, DIPHA_MAGIC); + write(outstream_pds, DIPHA_TYPE_PERSISTENCE_DIAGRAM); + write(outstream_pds, DIPHA_MAGIC); // This is to later be replaced by the number of pairs in the persistence diagram. + + compressed_lower_distance_matrix dist = read_file(ifilename ? file_stream : std::cin, format); value_t min = std::numeric_limits::infinity(), max = -std::numeric_limits::infinity(), max_finite = max; @@ -1251,13 +1308,20 @@ int main(int argc, char** argv) { std::cout << "distance matrix with " << dist.size() << " points" << std::endl; ripser(std::move(dist), dim_max, threshold, ratio, modulus) - .compute_barcodes(); + .compute_barcodes(outstream_pds, outstream_reps, pair_count); } else { std::cout << "sparse distance matrix with " << dist.size() << " points and " << num_edges << " entries" << std::endl; ripser(sparse_distance_matrix(std::move(dist), threshold), dim_max, threshold, ratio, modulus) - .compute_barcodes(); + .compute_barcodes(outstream_pds, outstream_reps, pair_count); } + + + outstream_pds.seekp(2*sizeof(int64_t), std::ios_base::beg); // Rewind to the spot in the DIPHA file where we write the number of intervals. + write(outstream_pds, pair_count); + + outstream_pds.close(); + outstream_reps.close(); } -- cgit v1.2.3