diff options
-rw-r--r-- | Makefile | 32 | ||||
-rw-r--r-- | README.md | 26 | ||||
-rw-r--r-- | ripser.cpp | 267 |
3 files changed, 162 insertions, 163 deletions
@@ -1,39 +1,15 @@ build: ripser -all: ripser ripser-coeff ripser-dist ripser-dist-coeff ripser-upper-dist ripser-upper-dist-coeff ripser-point-cloud ripser-point-cloud-coeff ripser-dipha ripser-dipha-coeff +all: ripser ripser-coeff ripser: ripser.cpp - c++ -std=c++11 ripser.cpp -o ripser -Ofast -D NDEBUG -D FILE_FORMAT_LOWER_DISTANCE_MATRIX + c++ -std=c++11 ripser.cpp -o ripser -Ofast -D NDEBUG ripser-coeff: ripser.cpp - c++ -std=c++11 ripser.cpp -o ripser-coeff -Ofast -D NDEBUG -D FILE_FORMAT_LOWER_DISTANCE_MATRIX -D USE_COEFFICIENTS - -ripser-dist: ripser.cpp - c++ -std=c++11 ripser.cpp -o ripser-dist -Ofast -D NDEBUG -D FILE_FORMAT_DISTANCE_MATRIX - -ripser-dist-coeff: ripser.cpp - c++ -std=c++11 ripser.cpp -o ripser-dist-coeff -Ofast -D NDEBUG -D FILE_FORMAT_DISTANCE_MATRIX -D USE_COEFFICIENTS - -ripser-upper-dist: ripser.cpp - c++ -std=c++11 ripser.cpp -o ripser-upper-dist -Ofast -D NDEBUG -D FILE_FORMAT_UPPER_DISTANCE_MATRIX - -ripser-upper-dist-coeff: ripser.cpp - c++ -std=c++11 ripser.cpp -o ripser-upper-dist-coeff -Ofast -D NDEBUG -D FILE_FORMAT_UPPER_DISTANCE_MATRIX -D USE_COEFFICIENTS - -ripser-point-cloud: ripser.cpp - c++ -std=c++11 ripser.cpp -o ripser-point-cloud -Ofast -D NDEBUG -D FILE_FORMAT_POINT_CLOUD - -ripser-point-cloud-coeff: ripser.cpp - c++ -std=c++11 ripser.cpp -o ripser-point-cloud-coeff -Ofast -D NDEBUG -D FILE_FORMAT_POINT_CLOUD -D USE_COEFFICIENTS - -ripser-dipha: ripser.cpp - c++ -std=c++11 ripser.cpp -o ripser-dipha -Ofast -D NDEBUG -D FILE_FORMAT_DIPHA - -ripser-dipha-coeff: ripser.cpp - c++ -std=c++11 ripser.cpp -o ripser-dipha-coeff -Ofast -D NDEBUG -D FILE_FORMAT_DIPHA -D USE_COEFFICIENTS + c++ -std=c++11 ripser.cpp -o ripser-coeff -Ofast -D NDEBUG -D USE_COEFFICIENTS clean: - rm ripser ripser-coeff ripser-dist ripser-dist-coeff ripser-upper-dist ripser-upper-dist-coeff ripser-point-cloud ripser-point-cloud-coeff ripser-dipha ripser-dipha-coeff + rm ripser ripser-coeff
\ No newline at end of file @@ -21,8 +21,8 @@ Input formats currently supported by Ripser: - comma-separated values lower triangular distance matrix (preferred) - comma-separated values upper triangular distance matrix (MATLAB output from the function `pdist`) - comma-separated values full distance matrix - - point cloud data - [DIPHA] distance matrix data + - point cloud data Ripser's efficiency is based on a few important concepts and principles: @@ -59,29 +59,29 @@ Ripser supports several compile-time options. They are switched on by defining t - `PRINT_PERSISTENCE_PAIRS`: output the computed persistence pairs (enabled by default in the code; comment out to disable) - `USE_GOOGLE_HASHMAP`: enable support for Google's [sparsehash] data structure; may further reducue memory footprint -Furthermore, one of the following options needs to be chosen to specify the format for the input files: - - - `FILE_FORMAT_LOWER_DISTANCE_MATRIX`: lower triangular distance matrix; a comma (or whitespace, or other non-numerical character) separated list of the distance matrix entries below the diagonal, sorted lexicographically by row index, then column index - - `FILE_FORMAT_UPPER_DISTANCE_MATRIX`: upper triangular distance matrix; similar to the previous, but for the entries above the diagonal; suitable for output from the MATLAB function `pdist`, saved in a CSV file - - `FILE_FORMAT_DISTANCE_MATRIX`: full distance matrix; similar to the above, but for all entries of the distance matrix - - `FILE_FORMAT_DIPHA`: DIPHA distance matrix as described on the [DIPHA] website - - `FILE_FORMAT_POINT_CLOUD`: point cloud; a comma (or whitespace, or other non-numerical character) separated list of coordinates of the points in some Euclidean space, one point per line - -For example, to build with support for coefficients: +For example, to build Ripser with support for coefficients: ```sh -$ c++ -std=c++11 ripser.cpp -o ripser -Ofast -D NDEBUG -D FILE_FORMAT_LOWER_DISTANCE_MATRIX -D USE_COEFFICIENTS +$ c++ -std=c++11 ripser.cpp -o ripser -Ofast -D NDEBUG -D USE_COEFFICIENTS ``` -A Makefile is provided with some variants of the above options. Use `make all` to build them. The default `make` only builds a binary with the option `FILE_FORMAT_LOWER_DISTANCE_MATRIX`. +A Makefile is provided with some variants of the above options. Use `make all` to build them. The default `make` builds a binary without any of the above option. -The following options are supported at the command line: +The input is given either in a file whose name is passed as an argument, or through stdin. The following options are supported at the command line: + - `--format`: use the specified file format for the input. The following formats are supported: + - `lower-distance` (default if no format is specified): lower triangular distance matrix; a comma (or whitespace, or other non-numerical character) separated list of the distance matrix entries below the diagonal, sorted lexicographically by row index, then column index + - `upper-distance`: upper triangular distance matrix; similar to the previous, but for the entries above the diagonal; suitable for output from the MATLAB functions `pdist` or `seqpdist`, exported to a CSV file + - `distances`: full distance matrix; similar to the above, but for all entries of the distance matrix + - `dipha`: DIPHA distance matrix as described on the [DIPHA] website + - `point-cloud`: point cloud; a comma (or whitespace, or other non-numerical character) separated list of coordinates of the points in some Euclidean space, one point per line - `--dim k`: compute persistent homology up to dimension *k* - `--threshold t`: compute Rips complexes up to diameter *t* - `--modulus p`: compute homology with coefficients in the prime field Z/*p*Z (only available when built with the option `USE_COEFFICIENTS`) + + ### Planned features The following features are currently planned for future versions: @@ -21,12 +21,6 @@ //#define INDICATE_PROGRESS #define PRINT_PERSISTENCE_PAIRS -//#define FILE_FORMAT_LOWER_DISTANCE_MATRIX -//#define FILE_FORMAT_UPPER_DISTANCE_MATRIX -//#define FILE_FORMAT_DISTANCE_MATRIX -//#define FILE_FORMAT_POINT_CLOUD -//#define FILE_FORMAT_DIPHA - //#define USE_GOOGLE_HASHMAP #include <algorithm> @@ -299,18 +293,16 @@ public: assert(distances.size() == size() * (size() - 1) / 2); init_rows(); } - - template <typename DistanceMatrix> + + template <typename DistanceMatrix> compressed_distance_matrix(const DistanceMatrix& mat) - : rows(mat.size()) { - distances.resize(size() * (size() - 1) / 2); + : distances(mat.size() * (mat.size() - 1) / 2), rows(mat.size()) { init_rows(); for (index_t i = 1; i < size(); ++i) for (index_t j = 0; j < i; ++j) rows[i][j] = mat(i, j); } - value_t operator()(const index_t i, const index_t j) const; size_t size() const { return rows.size(); } @@ -687,84 +679,21 @@ void compute_pairs(std::vector<diameter_index_t>& columns_to_reduce, #endif } +enum file_format { + LOWER_DISTANCE_MATRIX, + UPPER_DISTANCE_MATRIX, + DISTANCE_MATRIX, + POINT_CLOUD, + DIPHA +}; + template <typename T> T read(std::istream& s) { T result; s.read(reinterpret_cast<char*>(&result), sizeof(T)); return result; // on little endian: boost::endian::little_to_native(result); } -void print_usage_and_exit(int exit_code) { - std::cerr << "Usage: " - << "ripser " - << "[options] filename" << std::endl; - std::cerr << std::endl; - std::cerr << "Options:" << std::endl; - std::cerr << std::endl; - std::cerr << " --help print this screen" << std::endl; - std::cerr << " --dim <k> compute persistent homology up to dimension <k>" << std::endl; - std::cerr << " --threshold <t> compute Rips complexes up to diameter <t>" << std::endl; -#ifdef USE_COEFFICIENTS - std::cerr << " --modulus <p> compute homology with coefficients in the prime field Z/<p>Z" - << std::endl; -#endif - - exit(exit_code); -} - -int main(int argc, char** argv) { - - const char* filename = nullptr; - - index_t dim_max = 1; - value_t threshold = std::numeric_limits<value_t>::max(); - -#ifdef USE_COEFFICIENTS - coefficient_t modulus = 2; -#else - const coefficient_t modulus = 2; -#endif - - for (index_t i = 1; i < argc; ++i) { - const std::string arg(argv[i]); - if (arg == "--help") { - print_usage_and_exit(0); - } else if (arg == "--dim") { - std::string parameter = std::string(argv[++i]); - size_t next_pos; - dim_max = std::stol(parameter, &next_pos); - if (next_pos != parameter.size()) print_usage_and_exit(-1); - } else if (arg == "--threshold") { - std::string parameter = std::string(argv[++i]); - size_t next_pos; - threshold = std::stof(parameter, &next_pos); - if (next_pos != parameter.size()) print_usage_and_exit(-1); -#ifdef USE_COEFFICIENTS - } else if (arg == "--modulus") { - std::string parameter = std::string(argv[++i]); - size_t next_pos; - modulus = std::stol(parameter, &next_pos); - if (next_pos != parameter.size() || !is_prime(modulus)) print_usage_and_exit(-1); -#endif - } else { - if (filename) { print_usage_and_exit(-1); } - filename = argv[i]; - } - } - - std::istream* input_stream_ptr = &std::cin; - std::ifstream input_file(filename); - if (filename) { - if (input_file.fail()) { - std::cerr << "couldn't open file " << filename << std::endl; - exit(-1); - } - input_stream_ptr = &input_file; - } - std::istream& input_stream = *input_stream_ptr; - - -#ifdef FILE_FORMAT_POINT_CLOUD - +compressed_lower_distance_matrix read_point_cloud(std::istream& input_stream) { std::vector<std::vector<value_t>> points; std::string line; @@ -774,14 +703,15 @@ int main(int argc, char** argv) { std::istringstream s(line); while (s >> value) point.push_back(value); if (!point.empty()) points.push_back(point); - assert(point.size() == points.front().size()); + assert(point.size() == points.front().size()); } euclidean_distance_matrix eucl_dist(std::move(points)); index_t n = eucl_dist.size(); - std::cout << "point cloud with " << n << " points in dimension " << eucl_dist.points.front().size() << std::endl; + std::cout << "point cloud with " << n << " points in dimension " + << eucl_dist.points.front().size() << std::endl; std::vector<value_t> distances; @@ -789,29 +719,10 @@ int main(int argc, char** argv) { for (int j = 0; j < i; ++j) if (i > j) distances.push_back(eucl_dist(i, j)); - compressed_lower_distance_matrix dist(std::move(distances)); - -#endif - -#ifdef FILE_FORMAT_DISTANCE_MATRIX - - std::vector<value_t> distances; - - std::string line; - value_t value; - for (int i = 0; std::getline(input_stream, line); ++i) { - std::istringstream s(line); - for (int j = 0; j < i && s >> value; ++j) distances.push_back(value); - } - - compressed_lower_distance_matrix dist(std::move(distances)); - - index_t n = dist.size(); - -#endif - -#ifdef FILE_FORMAT_LOWER_DISTANCE_MATRIX + return compressed_lower_distance_matrix(std::move(distances)); +} +compressed_lower_distance_matrix read_lower_distance_matrix(std::istream& input_stream) { std::vector<value_t> distances; value_t value; while (input_stream >> value) { @@ -819,14 +730,10 @@ int main(int argc, char** argv) { input_stream.ignore(); } - compressed_lower_distance_matrix dist(std::move(distances)); - - index_t n = dist.size(); - -#endif - -#ifdef FILE_FORMAT_UPPER_DISTANCE_MATRIX + return compressed_lower_distance_matrix(std::move(distances)); +} +compressed_lower_distance_matrix read_upper_distance_matrix(std::istream& input_stream) { std::vector<value_t> distances; value_t value; while (input_stream >> value) { @@ -834,22 +741,30 @@ int main(int argc, char** argv) { input_stream.ignore(); } - compressed_upper_distance_matrix dist_upper(std::move(distances)); - compressed_lower_distance_matrix dist(dist_upper); + return compressed_lower_distance_matrix(compressed_upper_distance_matrix(std::move(distances))); +} - index_t n = dist.size(); +compressed_lower_distance_matrix read_distance_matrix(std::istream& input_stream) { + std::vector<value_t> distances; -#endif + std::string line; + value_t value; + for (int i = 0; std::getline(input_stream, line); ++i) { + std::istringstream s(line); + for (int j = 0; j < i && s >> value; ++j) distances.push_back(value); + } -#ifdef FILE_FORMAT_DIPHA + return compressed_lower_distance_matrix(std::move(distances)); +} +compressed_lower_distance_matrix read_dipha(std::istream& input_stream) { if (read<int64_t>(input_stream) != 8067171840) { - std::cerr << filename << " is not a Dipha file (magic number: 8067171840)" << std::endl; + std::cerr << "input is not a Dipha file (magic number: 8067171840)" << std::endl; exit(-1); } if (read<int64_t>(input_stream) != 7) { - std::cerr << filename << " is not a Dipha distance matrix (file type: 7)" << std::endl; + std::cerr << "input is not a Dipha distance matrix (file type: 7)" << std::endl; exit(-1); } @@ -864,9 +779,117 @@ int main(int argc, char** argv) { else read<double>(input_stream); - compressed_lower_distance_matrix dist(std::move(distances)); + return compressed_lower_distance_matrix(std::move(distances)); +} + +compressed_lower_distance_matrix read_file(std::istream& input_stream, file_format format) { + switch (format) { + case LOWER_DISTANCE_MATRIX: + return read_lower_distance_matrix(input_stream); + case UPPER_DISTANCE_MATRIX: + return read_upper_distance_matrix(input_stream); + case DISTANCE_MATRIX: + return read_distance_matrix(input_stream); + case POINT_CLOUD: + return read_point_cloud(input_stream); + case DIPHA: + return read_dipha(input_stream); + } +} +void print_usage_and_exit(int exit_code) { + std::cerr << "Usage: " + << "ripser " + << "[options] [filename]" << std::endl + << std::endl + << "Options:" << std::endl + << std::endl + << " --help print this screen" << std::endl + << " --format use the specified file format for the input. Options are:" + << std::endl + << " lower-distance (lower triangular distance matrix; default)" + << std::endl + << " upper-distance (upper triangular distance matrix)" + << std::endl + << " distance (full distance matrix)" << std::endl + << " point-cloud (point cloud in Euclidean space)" << std::endl + << " dipha (distance matrix in DIPHA file format)" + << std::endl + << " --dim <k> compute persistent homology up to dimension <k>" << std::endl + << " --threshold <t> compute Rips complexes up to diameter <t>" << std::endl +#ifdef USE_COEFFICIENTS + << " --modulus <p> compute homology with coefficients in the prime field Z/<p>Z" #endif + << std::endl; + + exit(exit_code); +} + +int main(int argc, char** argv) { + + const char* filename = nullptr; + + file_format format = LOWER_DISTANCE_MATRIX; + + index_t dim_max = 1; + value_t threshold = std::numeric_limits<value_t>::max(); + +#ifdef USE_COEFFICIENTS + coefficient_t modulus = 2; +#else + const coefficient_t modulus = 2; +#endif + + for (index_t i = 1; i < argc; ++i) { + const std::string arg(argv[i]); + if (arg == "--help") { + print_usage_and_exit(0); + } else if (arg == "--dim") { + std::string parameter = std::string(argv[++i]); + size_t next_pos; + dim_max = std::stol(parameter, &next_pos); + if (next_pos != parameter.size()) print_usage_and_exit(-1); + } else if (arg == "--threshold") { + std::string parameter = std::string(argv[++i]); + size_t next_pos; + threshold = std::stof(parameter, &next_pos); + if (next_pos != parameter.size()) print_usage_and_exit(-1); + } else if (arg == "--format") { + std::string parameter = std::string(argv[++i]); + if (parameter == "lower-distance") + format = LOWER_DISTANCE_MATRIX; + else if (parameter == "upper-distance") + format = UPPER_DISTANCE_MATRIX; + else if (parameter == "distance") + format = UPPER_DISTANCE_MATRIX; + else if (parameter == "point-cloud") + format = POINT_CLOUD; + else if (parameter == "dipha") + format = DIPHA; + else + print_usage_and_exit(-1); +#ifdef USE_COEFFICIENTS + } else if (arg == "--modulus") { + std::string parameter = std::string(argv[++i]); + size_t next_pos; + modulus = std::stol(parameter, &next_pos); + if (next_pos != parameter.size() || !is_prime(modulus)) print_usage_and_exit(-1); +#endif + } else { + if (filename) { print_usage_and_exit(-1); } + filename = argv[i]; + } + } + + std::ifstream file_stream(filename); + if (filename && file_stream.fail()) { + std::cerr << "couldn't open file " << filename << std::endl; + exit(-1); + } + + compressed_lower_distance_matrix dist = read_file(filename ? file_stream : std::cin, format); + + index_t n = dist.size(); std::cout << "distance matrix with " << n << " points" << std::endl; |