summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUlrich Bauer <mail@ulrich-bauer.org>2016-08-05 15:06:38 +0200
committerUlrich Bauer <ulrich.bauer@tum.de>2016-08-05 22:52:02 +0200
commita28d553456ce3b99cc1928a4c84b44c909f4f10a (patch)
treec64cdea62aeb2f3ed92f8dd8bf8226f249451a9e
parent3fa4fec040079f6489349556a97b238f2b002d6b (diff)
dynamic input format selection
-rw-r--r--Makefile32
-rw-r--r--README.md26
-rw-r--r--ripser.cpp267
3 files changed, 162 insertions, 163 deletions
diff --git a/Makefile b/Makefile
index 3cd0c79..9359d43 100644
--- a/Makefile
+++ b/Makefile
@@ -1,39 +1,15 @@
build: ripser
-all: ripser ripser-coeff ripser-dist ripser-dist-coeff ripser-upper-dist ripser-upper-dist-coeff ripser-point-cloud ripser-point-cloud-coeff ripser-dipha ripser-dipha-coeff
+all: ripser ripser-coeff
ripser: ripser.cpp
- c++ -std=c++11 ripser.cpp -o ripser -Ofast -D NDEBUG -D FILE_FORMAT_LOWER_DISTANCE_MATRIX
+ c++ -std=c++11 ripser.cpp -o ripser -Ofast -D NDEBUG
ripser-coeff: ripser.cpp
- c++ -std=c++11 ripser.cpp -o ripser-coeff -Ofast -D NDEBUG -D FILE_FORMAT_LOWER_DISTANCE_MATRIX -D USE_COEFFICIENTS
-
-ripser-dist: ripser.cpp
- c++ -std=c++11 ripser.cpp -o ripser-dist -Ofast -D NDEBUG -D FILE_FORMAT_DISTANCE_MATRIX
-
-ripser-dist-coeff: ripser.cpp
- c++ -std=c++11 ripser.cpp -o ripser-dist-coeff -Ofast -D NDEBUG -D FILE_FORMAT_DISTANCE_MATRIX -D USE_COEFFICIENTS
-
-ripser-upper-dist: ripser.cpp
- c++ -std=c++11 ripser.cpp -o ripser-upper-dist -Ofast -D NDEBUG -D FILE_FORMAT_UPPER_DISTANCE_MATRIX
-
-ripser-upper-dist-coeff: ripser.cpp
- c++ -std=c++11 ripser.cpp -o ripser-upper-dist-coeff -Ofast -D NDEBUG -D FILE_FORMAT_UPPER_DISTANCE_MATRIX -D USE_COEFFICIENTS
-
-ripser-point-cloud: ripser.cpp
- c++ -std=c++11 ripser.cpp -o ripser-point-cloud -Ofast -D NDEBUG -D FILE_FORMAT_POINT_CLOUD
-
-ripser-point-cloud-coeff: ripser.cpp
- c++ -std=c++11 ripser.cpp -o ripser-point-cloud-coeff -Ofast -D NDEBUG -D FILE_FORMAT_POINT_CLOUD -D USE_COEFFICIENTS
-
-ripser-dipha: ripser.cpp
- c++ -std=c++11 ripser.cpp -o ripser-dipha -Ofast -D NDEBUG -D FILE_FORMAT_DIPHA
-
-ripser-dipha-coeff: ripser.cpp
- c++ -std=c++11 ripser.cpp -o ripser-dipha-coeff -Ofast -D NDEBUG -D FILE_FORMAT_DIPHA -D USE_COEFFICIENTS
+ c++ -std=c++11 ripser.cpp -o ripser-coeff -Ofast -D NDEBUG -D USE_COEFFICIENTS
clean:
- rm ripser ripser-coeff ripser-dist ripser-dist-coeff ripser-upper-dist ripser-upper-dist-coeff ripser-point-cloud ripser-point-cloud-coeff ripser-dipha ripser-dipha-coeff
+ rm ripser ripser-coeff \ No newline at end of file
diff --git a/README.md b/README.md
index f57cf0e..13ae86b 100644
--- a/README.md
+++ b/README.md
@@ -21,8 +21,8 @@ Input formats currently supported by Ripser:
- comma-separated values lower triangular distance matrix (preferred)
- comma-separated values upper triangular distance matrix (MATLAB output from the function `pdist`)
- comma-separated values full distance matrix
- - point cloud data
- [DIPHA] distance matrix data
+ - point cloud data
Ripser's efficiency is based on a few important concepts and principles:
@@ -59,29 +59,29 @@ Ripser supports several compile-time options. They are switched on by defining t
- `PRINT_PERSISTENCE_PAIRS`: output the computed persistence pairs (enabled by default in the code; comment out to disable)
- `USE_GOOGLE_HASHMAP`: enable support for Google's [sparsehash] data structure; may further reducue memory footprint
-Furthermore, one of the following options needs to be chosen to specify the format for the input files:
-
- - `FILE_FORMAT_LOWER_DISTANCE_MATRIX`: lower triangular distance matrix; a comma (or whitespace, or other non-numerical character) separated list of the distance matrix entries below the diagonal, sorted lexicographically by row index, then column index
- - `FILE_FORMAT_UPPER_DISTANCE_MATRIX`: upper triangular distance matrix; similar to the previous, but for the entries above the diagonal; suitable for output from the MATLAB function `pdist`, saved in a CSV file
- - `FILE_FORMAT_DISTANCE_MATRIX`: full distance matrix; similar to the above, but for all entries of the distance matrix
- - `FILE_FORMAT_DIPHA`: DIPHA distance matrix as described on the [DIPHA] website
- - `FILE_FORMAT_POINT_CLOUD`: point cloud; a comma (or whitespace, or other non-numerical character) separated list of coordinates of the points in some Euclidean space, one point per line
-
-For example, to build with support for coefficients:
+For example, to build Ripser with support for coefficients:
```sh
-$ c++ -std=c++11 ripser.cpp -o ripser -Ofast -D NDEBUG -D FILE_FORMAT_LOWER_DISTANCE_MATRIX -D USE_COEFFICIENTS
+$ c++ -std=c++11 ripser.cpp -o ripser -Ofast -D NDEBUG -D USE_COEFFICIENTS
```
-A Makefile is provided with some variants of the above options. Use `make all` to build them. The default `make` only builds a binary with the option `FILE_FORMAT_LOWER_DISTANCE_MATRIX`.
+A Makefile is provided with some variants of the above options. Use `make all` to build them. The default `make` builds a binary without any of the above option.
-The following options are supported at the command line:
+The input is given either in a file whose name is passed as an argument, or through stdin. The following options are supported at the command line:
+ - `--format`: use the specified file format for the input. The following formats are supported:
+ - `lower-distance` (default if no format is specified): lower triangular distance matrix; a comma (or whitespace, or other non-numerical character) separated list of the distance matrix entries below the diagonal, sorted lexicographically by row index, then column index
+ - `upper-distance`: upper triangular distance matrix; similar to the previous, but for the entries above the diagonal; suitable for output from the MATLAB functions `pdist` or `seqpdist`, exported to a CSV file
+ - `distances`: full distance matrix; similar to the above, but for all entries of the distance matrix
+ - `dipha`: DIPHA distance matrix as described on the [DIPHA] website
+ - `point-cloud`: point cloud; a comma (or whitespace, or other non-numerical character) separated list of coordinates of the points in some Euclidean space, one point per line
- `--dim k`: compute persistent homology up to dimension *k*
- `--threshold t`: compute Rips complexes up to diameter *t*
- `--modulus p`: compute homology with coefficients in the prime field Z/*p*Z (only available when built with the option `USE_COEFFICIENTS`)
+
+
### Planned features
The following features are currently planned for future versions:
diff --git a/ripser.cpp b/ripser.cpp
index 53840cf..1745927 100644
--- a/ripser.cpp
+++ b/ripser.cpp
@@ -21,12 +21,6 @@
//#define INDICATE_PROGRESS
#define PRINT_PERSISTENCE_PAIRS
-//#define FILE_FORMAT_LOWER_DISTANCE_MATRIX
-//#define FILE_FORMAT_UPPER_DISTANCE_MATRIX
-//#define FILE_FORMAT_DISTANCE_MATRIX
-//#define FILE_FORMAT_POINT_CLOUD
-//#define FILE_FORMAT_DIPHA
-
//#define USE_GOOGLE_HASHMAP
#include <algorithm>
@@ -299,18 +293,16 @@ public:
assert(distances.size() == size() * (size() - 1) / 2);
init_rows();
}
-
- template <typename DistanceMatrix>
+
+ template <typename DistanceMatrix>
compressed_distance_matrix(const DistanceMatrix& mat)
- : rows(mat.size()) {
- distances.resize(size() * (size() - 1) / 2);
+ : distances(mat.size() * (mat.size() - 1) / 2), rows(mat.size()) {
init_rows();
for (index_t i = 1; i < size(); ++i)
for (index_t j = 0; j < i; ++j) rows[i][j] = mat(i, j);
}
-
value_t operator()(const index_t i, const index_t j) const;
size_t size() const { return rows.size(); }
@@ -687,84 +679,21 @@ void compute_pairs(std::vector<diameter_index_t>& columns_to_reduce,
#endif
}
+enum file_format {
+ LOWER_DISTANCE_MATRIX,
+ UPPER_DISTANCE_MATRIX,
+ DISTANCE_MATRIX,
+ POINT_CLOUD,
+ DIPHA
+};
+
template <typename T> T read(std::istream& s) {
T result;
s.read(reinterpret_cast<char*>(&result), sizeof(T));
return result; // on little endian: boost::endian::little_to_native(result);
}
-void print_usage_and_exit(int exit_code) {
- std::cerr << "Usage: "
- << "ripser "
- << "[options] filename" << std::endl;
- std::cerr << std::endl;
- std::cerr << "Options:" << std::endl;
- std::cerr << std::endl;
- std::cerr << " --help print this screen" << std::endl;
- std::cerr << " --dim <k> compute persistent homology up to dimension <k>" << std::endl;
- std::cerr << " --threshold <t> compute Rips complexes up to diameter <t>" << std::endl;
-#ifdef USE_COEFFICIENTS
- std::cerr << " --modulus <p> compute homology with coefficients in the prime field Z/<p>Z"
- << std::endl;
-#endif
-
- exit(exit_code);
-}
-
-int main(int argc, char** argv) {
-
- const char* filename = nullptr;
-
- index_t dim_max = 1;
- value_t threshold = std::numeric_limits<value_t>::max();
-
-#ifdef USE_COEFFICIENTS
- coefficient_t modulus = 2;
-#else
- const coefficient_t modulus = 2;
-#endif
-
- for (index_t i = 1; i < argc; ++i) {
- const std::string arg(argv[i]);
- if (arg == "--help") {
- print_usage_and_exit(0);
- } else if (arg == "--dim") {
- std::string parameter = std::string(argv[++i]);
- size_t next_pos;
- dim_max = std::stol(parameter, &next_pos);
- if (next_pos != parameter.size()) print_usage_and_exit(-1);
- } else if (arg == "--threshold") {
- std::string parameter = std::string(argv[++i]);
- size_t next_pos;
- threshold = std::stof(parameter, &next_pos);
- if (next_pos != parameter.size()) print_usage_and_exit(-1);
-#ifdef USE_COEFFICIENTS
- } else if (arg == "--modulus") {
- std::string parameter = std::string(argv[++i]);
- size_t next_pos;
- modulus = std::stol(parameter, &next_pos);
- if (next_pos != parameter.size() || !is_prime(modulus)) print_usage_and_exit(-1);
-#endif
- } else {
- if (filename) { print_usage_and_exit(-1); }
- filename = argv[i];
- }
- }
-
- std::istream* input_stream_ptr = &std::cin;
- std::ifstream input_file(filename);
- if (filename) {
- if (input_file.fail()) {
- std::cerr << "couldn't open file " << filename << std::endl;
- exit(-1);
- }
- input_stream_ptr = &input_file;
- }
- std::istream& input_stream = *input_stream_ptr;
-
-
-#ifdef FILE_FORMAT_POINT_CLOUD
-
+compressed_lower_distance_matrix read_point_cloud(std::istream& input_stream) {
std::vector<std::vector<value_t>> points;
std::string line;
@@ -774,14 +703,15 @@ int main(int argc, char** argv) {
std::istringstream s(line);
while (s >> value) point.push_back(value);
if (!point.empty()) points.push_back(point);
- assert(point.size() == points.front().size());
+ assert(point.size() == points.front().size());
}
euclidean_distance_matrix eucl_dist(std::move(points));
index_t n = eucl_dist.size();
- std::cout << "point cloud with " << n << " points in dimension " << eucl_dist.points.front().size() << std::endl;
+ std::cout << "point cloud with " << n << " points in dimension "
+ << eucl_dist.points.front().size() << std::endl;
std::vector<value_t> distances;
@@ -789,29 +719,10 @@ int main(int argc, char** argv) {
for (int j = 0; j < i; ++j)
if (i > j) distances.push_back(eucl_dist(i, j));
- compressed_lower_distance_matrix dist(std::move(distances));
-
-#endif
-
-#ifdef FILE_FORMAT_DISTANCE_MATRIX
-
- std::vector<value_t> distances;
-
- std::string line;
- value_t value;
- for (int i = 0; std::getline(input_stream, line); ++i) {
- std::istringstream s(line);
- for (int j = 0; j < i && s >> value; ++j) distances.push_back(value);
- }
-
- compressed_lower_distance_matrix dist(std::move(distances));
-
- index_t n = dist.size();
-
-#endif
-
-#ifdef FILE_FORMAT_LOWER_DISTANCE_MATRIX
+ return compressed_lower_distance_matrix(std::move(distances));
+}
+compressed_lower_distance_matrix read_lower_distance_matrix(std::istream& input_stream) {
std::vector<value_t> distances;
value_t value;
while (input_stream >> value) {
@@ -819,14 +730,10 @@ int main(int argc, char** argv) {
input_stream.ignore();
}
- compressed_lower_distance_matrix dist(std::move(distances));
-
- index_t n = dist.size();
-
-#endif
-
-#ifdef FILE_FORMAT_UPPER_DISTANCE_MATRIX
+ return compressed_lower_distance_matrix(std::move(distances));
+}
+compressed_lower_distance_matrix read_upper_distance_matrix(std::istream& input_stream) {
std::vector<value_t> distances;
value_t value;
while (input_stream >> value) {
@@ -834,22 +741,30 @@ int main(int argc, char** argv) {
input_stream.ignore();
}
- compressed_upper_distance_matrix dist_upper(std::move(distances));
- compressed_lower_distance_matrix dist(dist_upper);
+ return compressed_lower_distance_matrix(compressed_upper_distance_matrix(std::move(distances)));
+}
- index_t n = dist.size();
+compressed_lower_distance_matrix read_distance_matrix(std::istream& input_stream) {
+ std::vector<value_t> distances;
-#endif
+ std::string line;
+ value_t value;
+ for (int i = 0; std::getline(input_stream, line); ++i) {
+ std::istringstream s(line);
+ for (int j = 0; j < i && s >> value; ++j) distances.push_back(value);
+ }
-#ifdef FILE_FORMAT_DIPHA
+ return compressed_lower_distance_matrix(std::move(distances));
+}
+compressed_lower_distance_matrix read_dipha(std::istream& input_stream) {
if (read<int64_t>(input_stream) != 8067171840) {
- std::cerr << filename << " is not a Dipha file (magic number: 8067171840)" << std::endl;
+ std::cerr << "input is not a Dipha file (magic number: 8067171840)" << std::endl;
exit(-1);
}
if (read<int64_t>(input_stream) != 7) {
- std::cerr << filename << " is not a Dipha distance matrix (file type: 7)" << std::endl;
+ std::cerr << "input is not a Dipha distance matrix (file type: 7)" << std::endl;
exit(-1);
}
@@ -864,9 +779,117 @@ int main(int argc, char** argv) {
else
read<double>(input_stream);
- compressed_lower_distance_matrix dist(std::move(distances));
+ return compressed_lower_distance_matrix(std::move(distances));
+}
+
+compressed_lower_distance_matrix read_file(std::istream& input_stream, file_format format) {
+ switch (format) {
+ case LOWER_DISTANCE_MATRIX:
+ return read_lower_distance_matrix(input_stream);
+ case UPPER_DISTANCE_MATRIX:
+ return read_upper_distance_matrix(input_stream);
+ case DISTANCE_MATRIX:
+ return read_distance_matrix(input_stream);
+ case POINT_CLOUD:
+ return read_point_cloud(input_stream);
+ case DIPHA:
+ return read_dipha(input_stream);
+ }
+}
+void print_usage_and_exit(int exit_code) {
+ std::cerr << "Usage: "
+ << "ripser "
+ << "[options] [filename]" << std::endl
+ << std::endl
+ << "Options:" << std::endl
+ << std::endl
+ << " --help print this screen" << std::endl
+ << " --format use the specified file format for the input. Options are:"
+ << std::endl
+ << " lower-distance (lower triangular distance matrix; default)"
+ << std::endl
+ << " upper-distance (upper triangular distance matrix)"
+ << std::endl
+ << " distance (full distance matrix)" << std::endl
+ << " point-cloud (point cloud in Euclidean space)" << std::endl
+ << " dipha (distance matrix in DIPHA file format)"
+ << std::endl
+ << " --dim <k> compute persistent homology up to dimension <k>" << std::endl
+ << " --threshold <t> compute Rips complexes up to diameter <t>" << std::endl
+#ifdef USE_COEFFICIENTS
+ << " --modulus <p> compute homology with coefficients in the prime field Z/<p>Z"
#endif
+ << std::endl;
+
+ exit(exit_code);
+}
+
+int main(int argc, char** argv) {
+
+ const char* filename = nullptr;
+
+ file_format format = LOWER_DISTANCE_MATRIX;
+
+ index_t dim_max = 1;
+ value_t threshold = std::numeric_limits<value_t>::max();
+
+#ifdef USE_COEFFICIENTS
+ coefficient_t modulus = 2;
+#else
+ const coefficient_t modulus = 2;
+#endif
+
+ for (index_t i = 1; i < argc; ++i) {
+ const std::string arg(argv[i]);
+ if (arg == "--help") {
+ print_usage_and_exit(0);
+ } else if (arg == "--dim") {
+ std::string parameter = std::string(argv[++i]);
+ size_t next_pos;
+ dim_max = std::stol(parameter, &next_pos);
+ if (next_pos != parameter.size()) print_usage_and_exit(-1);
+ } else if (arg == "--threshold") {
+ std::string parameter = std::string(argv[++i]);
+ size_t next_pos;
+ threshold = std::stof(parameter, &next_pos);
+ if (next_pos != parameter.size()) print_usage_and_exit(-1);
+ } else if (arg == "--format") {
+ std::string parameter = std::string(argv[++i]);
+ if (parameter == "lower-distance")
+ format = LOWER_DISTANCE_MATRIX;
+ else if (parameter == "upper-distance")
+ format = UPPER_DISTANCE_MATRIX;
+ else if (parameter == "distance")
+ format = UPPER_DISTANCE_MATRIX;
+ else if (parameter == "point-cloud")
+ format = POINT_CLOUD;
+ else if (parameter == "dipha")
+ format = DIPHA;
+ else
+ print_usage_and_exit(-1);
+#ifdef USE_COEFFICIENTS
+ } else if (arg == "--modulus") {
+ std::string parameter = std::string(argv[++i]);
+ size_t next_pos;
+ modulus = std::stol(parameter, &next_pos);
+ if (next_pos != parameter.size() || !is_prime(modulus)) print_usage_and_exit(-1);
+#endif
+ } else {
+ if (filename) { print_usage_and_exit(-1); }
+ filename = argv[i];
+ }
+ }
+
+ std::ifstream file_stream(filename);
+ if (filename && file_stream.fail()) {
+ std::cerr << "couldn't open file " << filename << std::endl;
+ exit(-1);
+ }
+
+ compressed_lower_distance_matrix dist = read_file(filename ? file_stream : std::cin, format);
+
+ index_t n = dist.size();
std::cout << "distance matrix with " << n << " points" << std::endl;