summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUlrich Bauer <ulrich.bauer@tum.de>2016-07-25 01:06:01 +0200
committerUlrich Bauer <ulrich.bauer@tum.de>2016-07-25 01:06:01 +0200
commitf69c6af6ca6883dd518c48faf41cf8901c379598 (patch)
tree2c9f1829085d72cbfe797cb7d26d27c2e6332d88
parent6a0b1ffec58fa36b308f09ea60c910e7a05bcd55 (diff)
parentc383c3ee45b4ef1002e643da0bb29ed846367117 (diff)
Merge branch 'dev'
-rw-r--r--Makefile20
-rw-r--r--README.md186
-rw-r--r--examples/projective_plane.lower_distance_matrix24
-rw-r--r--ripser.cpp69
4 files changed, 162 insertions, 137 deletions
diff --git a/Makefile b/Makefile
index 1bb9a9e..c5cf741 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,21 @@
-build:
+build: ripser
+
+
+all: ripser ripser_coeff ripser_dipha ripser_dipha_coeff
+
+
+ripser: ripser.cpp
c++ -std=c++11 ripser.cpp -o ripser -Ofast -D NDEBUG -D FILE_FORMAT_LOWER_TRIANGULAR_CSV
-coefficients:
+ripser_coeff: ripser.cpp
c++ -std=c++11 ripser.cpp -o ripser_coeff -Ofast -D NDEBUG -D FILE_FORMAT_LOWER_TRIANGULAR_CSV -D USE_COEFFICIENTS
+
+ripser_dipha: ripser.cpp
+ c++ -std=c++11 ripser.cpp -o ripser_dipha -Ofast -D NDEBUG -D FILE_FORMAT_DIPHA
+
+ripser_dipha_coeff: ripser.cpp
+ c++ -std=c++11 ripser.cpp -o ripser_dipha_coeff -Ofast -D NDEBUG -D FILE_FORMAT_DIPHA -D USE_COEFFICIENTS
+
+
+clean:
+ rm ripser ripser_coeff ripser_dipha ripser_dipha_coeff
diff --git a/README.md b/README.md
index a3fa958..cfe2f2f 100644
--- a/README.md
+++ b/README.md
@@ -1,101 +1,101 @@
-# Ripser
-
-Copyright © 2015–2016 [Ulrich Bauer].
-
-
-### Description
-
-Ripser is a lean C++ code for the computation of Vietoris–Rips persistence barcodes. It can do just this one thing, but does it extremely well.
-
-The main features of Ripser:
-
- - time- and memory-efficient
- - support for coefficients in prime finite fields
- - less than 1000 lines of code in a single C++ file
- - no external dependencies (optional support for Google's [sparsehash])
-
-Currently, Ripser outperforms other codes ([Dionysus], [DIPHA], [GUDHI], [Perseus], [PHAT]) by a factor of at least 40 in computation time and a factor of at least 15 in memory efficiency. (Note that [PHAT] does not contain code for generating Vietoris–Rips filtrations).
-
-Input formats currently supported by Ripser:
-
- - comma-separated lower triangular distance matrix (preferred)
- - comma-separated lower triangular distance matrix (MATLAB output from the function `pdist`)
- - [DIPHA] distance matrix data
-
-Ripser's efficiency is based on a few important concepts and principles:
-
- - Compute persistent *co*homology
- - Don't compute information that is never needed
- (for the experts: employ the *clearing* optimization, aka *persistence with a twist*)
- - Don't store information that can be readily recomputed
- - Take obvious shortcuts (*apparent persistence pairs*)
-
-
-### Version
-[Latest release][latest-release]: 1.0 (July 2016)
-
-
-### Building
-
-Ripser requires a C++11 compiler. Here is how to obtain, build, and run Ripser:
-
-```sh
-git clone https://github.com/Ripser/ripser.git
-cd ripser
-make
-./ripser examples/sphere_3_192.lower_distance_matrix
-```
-
-
-### Options
-
-Ripser supports several compile-time options. They are switched on by defining the C preprocessor macros listed below, either using `#define` in the code or by passing an argument to the compiler. The following options are supported:
-
- - `ASSEMBLE_REDUCTION_MATRIX`: store the reduction matrix; may speed up computation but will also increase memory usage
- - `USE_COEFFICIENTS`: enable support for coeffitients in a prime field
- - `INDICATE_PROGRESS`: indicate the current progress in the console
- - `PRINT_PERSISTENCE_PAIRS`: output the computed persistence pairs (enabled by default)
- - `USE_GOOGLE_HASHMAP`: enable support for Google's [sparsehash] data structure; may further reducue memory footprint
-
-Furthermore, one of the following options needs to be chosen to specify the format for the input files:
-
- - `FILE_FORMAT_LOWER_TRIANGULAR_CSV`: lower triangular distance matrix; a comma separated list of the distance matrix entries below the diagonal, sorted lexicographically by row index and column index
- - `FILE_FORMAT_UPPER_TRIANGULAR_CSV`: upper triangular distance matrix; similar to the previous, but for the entries above the diagonal; suitable for output from the MATLAB function `pdist`, saved in a CSV file
- - `FILE_FORMAT_DIPHA`: DIPHA distance matrix as described on the [DIPHA] website
-
-For example, to build with support for coefficients:
-
-```sh
-$ c++ -std=c++11 ripser.cpp -o ripser -Ofast -D NDEBUG -D FILE_FORMAT_LOWER_TRIANGULAR_CSV -D USE_COEFFICIENTS
+# Ripser
+
+Copyright © 2015–2016 [Ulrich Bauer].
+
+
+### Description
+
+Ripser is a lean C++ code for the computation of Vietoris–Rips persistence barcodes. It can do just this one thing, but does it extremely well.
+
+The main features of Ripser:
+
+ - time- and memory-efficient
+ - less than 1000 lines of code in a single C++ file
+ - support for coefficients in prime finite fields
+ - no external dependencies (optional support for Google's [sparsehash])
+
+Currently, Ripser outperforms other codes ([Dionysus], [DIPHA], [GUDHI], [Perseus], [PHAT]) by a factor of more than 40 in computation time and a factor of more than 15 in memory efficiency. (Note that [PHAT] does not contain code for generating Vietoris–Rips filtrations).
+
+Input formats currently supported by Ripser:
+
+ - comma-separated values lower triangular distance matrix (preferred)
+ - comma-separated values upper triangular distance matrix (MATLAB output from the function `pdist`)
+ - [DIPHA] distance matrix data
+
+Ripser's efficiency is based on a few important concepts and principles:
+
+ - Compute persistent *co*homology
+ - Don't compute information that is never needed
+ (for the experts: employ the *clearing* optimization, aka *persistence with a twist*)
+ - Don't store information that can be readily recomputed
+ - Take obvious shortcuts (*apparent persistence pairs*)
+
+
+### Version
+[Latest release][latest-release]: 1.0 (July 2016)
+
+
+### Building
+
+Ripser requires a C++11 compiler. Here is how to obtain, build, and run Ripser:
+
+```sh
+git clone https://github.com/Ripser/ripser.git
+cd ripser
+make
+./ripser examples/sphere_3_192.lower_distance_matrix
+```
+
+
+### Options
+
+Ripser supports several compile-time options. They are switched on by defining the C preprocessor macros listed below, either using `#define` in the code or by passing an argument to the compiler. The following options are supported:
+
+ - `ASSEMBLE_REDUCTION_MATRIX`: store the reduction matrix; may speed up computation but will also increase memory usage
+ - `USE_COEFFICIENTS`: enable support for coefficients in a prime field
+ - `INDICATE_PROGRESS`: indicate the current progress in the console
+ - `PRINT_PERSISTENCE_PAIRS`: output the computed persistence pairs (enabled by default)
+ - `USE_GOOGLE_HASHMAP`: enable support for Google's [sparsehash] data structure; may further reducue memory footprint
+
+Furthermore, one of the following options needs to be chosen to specify the format for the input files:
+
+ - `FILE_FORMAT_LOWER_TRIANGULAR_CSV`: lower triangular distance matrix; a comma (or whitespace, or other non-numerical character) separated list of the distance matrix entries below the diagonal, sorted lexicographically by row index, then column index
+ - `FILE_FORMAT_UPPER_TRIANGULAR_CSV`: upper triangular distance matrix; similar to the previous, but for the entries above the diagonal; suitable for output from the MATLAB function `pdist`, saved in a CSV file
+ - `FILE_FORMAT_DIPHA`: DIPHA distance matrix as described on the [DIPHA] website
+
+For example, to build with support for coefficients:
+
+```sh
+$ c++ -std=c++11 ripser.cpp -o ripser -Ofast -D NDEBUG -D FILE_FORMAT_LOWER_TRIANGULAR_CSV -D USE_COEFFICIENTS
```
The following options are supported at the command line:
- - `--top_dim k`: compute persistent homology up to dimension *k*
+ - `--dim k`: compute persistent homology up to dimension *k*
- `--threshold t`: compute Rips complexes up to diameter *t*
- `--modulus p`: compute homology with coefficients in the prime field Z/*p*Z (only available when build with the option `USE_COEFFICIENTS`)
-
-### Planned features
-
-The following features are currently planned for future versions:
-
- - support for point clouds
- - computation of representative cycles for persistent homology (currenly only *co*cycles are computed)
- - support for sparse distance matrices
-
-
-### License
-
-[LGPL] 3.0
-
-
-[Ulrich Bauer]: <http://ulrich-bauer.org>
-[latest-release]: <https://github.com/Ripser/ripser/releases/latest>
-[Dionysus]: <http://www.mrzv.org/software/dionysus/>
-[DIPHA]: <http://git.io/dipha>
-[PHAT]: <http://git.io/dipha>
-[Perseus]: <http://www.sas.upenn.edu/~vnanda/perseus/>
-[GUDHI]: <http://gudhi.gforge.inria.fr>
-[sparsehash]: <https://github.com/sparsehash/sparsehash>
+
+### Planned features
+
+The following features are currently planned for future versions:
+
+ - support for point clouds
+ - computation of representative cycles for persistent homology (currenly only *co*cycles are computed)
+ - support for sparse distance matrices
+
+
+### License
+
+[LGPL] 3.0
+
+
+[Ulrich Bauer]: <http://ulrich-bauer.org>
+[latest-release]: <https://github.com/Ripser/ripser/releases/latest>
+[Dionysus]: <http://www.mrzv.org/software/dionysus/>
+[DIPHA]: <http://git.io/dipha>
+[PHAT]: <http://git.io/dipha>
+[Perseus]: <http://www.sas.upenn.edu/~vnanda/perseus/>
+[GUDHI]: <http://gudhi.gforge.inria.fr>
+[sparsehash]: <https://github.com/sparsehash/sparsehash>
[LGPL]: <https://www.gnu.org/licenses/lgpl> \ No newline at end of file
diff --git a/examples/projective_plane.lower_distance_matrix b/examples/projective_plane.lower_distance_matrix
index 29134ec..a7ad7e6 100644
--- a/examples/projective_plane.lower_distance_matrix
+++ b/examples/projective_plane.lower_distance_matrix
@@ -1,13 +1,13 @@
-1,
-1,2,
-1,2,2,
-1,2,2,2,
-1,1,2,1,2,
-1,2,1,2,1,2,
-1,1,2,2,1,2,2,
-1,2,1,1,2,2,2,2,
-2,1,1,2,2,1,1,1,1,
-2,2,2,2,2,1,1,2,2,1,
-2,2,2,2,2,2,2,1,1,1,2,
-2,2,2,1,1,1,1,1,1,2,1,1, \ No newline at end of file
+1
+1,2
+1,2,2
+1,2,2,2
+1,1,2,1,2
+1,2,1,2,1,2
+1,1,2,2,1,2,2
+1,2,1,1,2,2,2,2
+2,1,1,2,2,1,1,1,1
+2,2,2,2,2,1,1,2,2,1
+2,2,2,2,2,2,2,1,1,1,2
+2,2,2,1,1,1,1,1,1,2,1,1 \ No newline at end of file
diff --git a/ripser.cpp b/ripser.cpp
index 3661b9c..68d0811 100644
--- a/ripser.cpp
+++ b/ripser.cpp
@@ -10,6 +10,7 @@
//#define USE_GOOGLE_HASHMAP
+#include <algorithm>
#include <iostream>
#include <fstream>
#include <cassert>
@@ -362,11 +363,10 @@ template <typename Heap> diameter_entry_t pop_pivot(Heap& column, coefficient_t
column.pop();
if (coefficient == 0) {
- if (column.empty()) {
+ if (column.empty())
return diameter_entry_t(-1);
- } else {
+ else
pivot = column.top();
- }
}
} while (!column.empty() && get_index(column.top()) == get_index(pivot));
if (get_index(pivot) != -1) { set_coefficient(pivot, coefficient); }
@@ -401,6 +401,11 @@ void assemble_columns_to_reduce(std::vector<diameter_index_t>& columns_to_reduce
columns_to_reduce.clear();
+#ifdef INDICATE_PROGRESS
+ std::cout << "\033[K"
+ << "assembling " << num_simplices << " columns" << std::flush << "\r";
+#endif
+
for (index_t index = 0; index < num_simplices; ++index) {
if (pivot_column_index.find(index) == pivot_column_index.end()) {
value_t diameter = comp.diameter(index);
@@ -408,8 +413,16 @@ void assemble_columns_to_reduce(std::vector<diameter_index_t>& columns_to_reduce
}
}
+#ifdef INDICATE_PROGRESS
+ std::cout << "\033[K"
+ << "sorting " << num_simplices << " columns" << std::flush << "\r";
+#endif
+
std::sort(columns_to_reduce.begin(), columns_to_reduce.end(),
greater_diameter_or_smaller_index<diameter_index_t>());
+#ifdef INDICATE_PROGRESS
+ std::cout << "\033[K";
+#endif
}
template <typename ValueType> class compressed_sparse_matrix {
@@ -496,9 +509,10 @@ void compute_pairs(std::vector<diameter_index_t>& columns_to_reduce,
value_t diameter = get_diameter(column_to_reduce);
#ifdef INDICATE_PROGRESS
- std::cout << "\033[K"
- << "reducing column " << i + 1 << "/" << columns_to_reduce.size() << " (diameter "
- << diameter << ")" << std::flush << "\r";
+ if ((i + 1) % 1000 == 0)
+ std::cout << "\033[K"
+ << "reducing column " << i + 1 << "/" << columns_to_reduce.size()
+ << " (diameter " << diameter << ")" << std::flush << "\r";
#endif
index_t j = i;
@@ -666,6 +680,12 @@ bool is_prime(const coefficient_t n) {
return true;
}
+template <typename T> T read(std::ifstream& s) {
+ T result;
+ s.read(reinterpret_cast<char*>(&result), sizeof(T));
+ return result; // on little endian: boost::endian::little_to_native(result);
+}
+
void print_usage_and_exit(int exit_code) {
std::cerr << "Usage: "
<< "ripser "
@@ -674,7 +694,7 @@ void print_usage_and_exit(int exit_code) {
std::cerr << "Options:" << std::endl;
std::cerr << std::endl;
std::cerr << " --help print this screen" << std::endl;
- std::cerr << " --top_dim <k> compute persistent homology up to dimension <k>" << std::endl;
+ std::cerr << " --dim <k> compute persistent homology up to dimension <k>" << std::endl;
std::cerr << " --threshold <t> compute Rips complexes up to diameter <t>" << std::endl;
#ifdef USE_COEFFICIENTS
std::cerr << " --modulus <p> compute homology with coefficients in the prime field Z/<p>Z"
@@ -703,7 +723,7 @@ int main(int argc, char** argv) {
const std::string arg(argv[i]);
if (arg == "--help") {
print_usage_and_exit(0);
- } else if (arg == "--top_dim") {
+ } else if (arg == "--dim") {
std::string parameter = std::string(argv[++i]);
size_t next_pos;
dim_max = std::stol(parameter, &next_pos);
@@ -735,33 +755,24 @@ int main(int argc, char** argv) {
std::vector<value_t> diameters;
#ifdef FILE_FORMAT_DIPHA
- int64_t magic_number;
- input_stream.read(reinterpret_cast<char*>(&magic_number), sizeof(int64_t));
- if (magic_number != 8067171840) {
+
+ if (read<int64_t>(input_stream) != 8067171840) {
std::cerr << filename << " is not a Dipha file (magic number: 8067171840)" << std::endl;
exit(-1);
}
- int64_t file_type;
- input_stream.read(reinterpret_cast<char*>(&file_type), sizeof(int64_t));
- if (file_type != 7) {
+ if (read<int64_t>(input_stream) != 7) {
std::cerr << filename << " is not a Dipha distance matrix (file type: 7)" << std::endl;
exit(-1);
}
- int64_t n;
- input_stream.read(reinterpret_cast<char*>(&n), sizeof(int64_t));
+ index_t n = read<int64_t>(input_stream);
distance_matrix dist_full;
dist_full.distances = std::vector<std::vector<value_t>>(n, std::vector<value_t>(n));
- for (int i = 0; i < n; ++i) {
- for (int j = 0; j < n; ++j) {
- double val;
- input_stream.read(reinterpret_cast<char*>(&val), sizeof(double));
- dist_full.distances[i][j] = val;
- }
- }
+ for (int i = 0; i < n; ++i)
+ for (int j = 0; j < n; ++j) dist_full.distances[i][j] = read<double>(input_stream);
std::cout << "distance matrix with " << n << " points" << std::endl;
compressed_lower_distance_matrix_adapter dist(diameters, dist_full);
@@ -770,9 +781,8 @@ int main(int argc, char** argv) {
#ifdef FILE_FORMAT_UPPER_TRIANGULAR_CSV
std::vector<value_t> distances;
- std::string value_string;
- while (std::getline(input_stream, value_string, ','))
- distances.push_back(std::stod(value_string));
+ value_t value;
+ while ((input_stream >> value).ignore()) distances.push_back(value);
compressed_upper_distance_matrix_adapter dist_upper(distances);
@@ -785,9 +795,8 @@ int main(int argc, char** argv) {
#ifdef FILE_FORMAT_LOWER_TRIANGULAR_CSV
std::vector<value_t>& distances = diameters;
- std::string value_string;
- while (std::getline(input_stream, value_string, ','))
- distances.push_back(std::stod(value_string));
+ value_t value;
+ while ((input_stream >> value).ignore()) distances.push_back(value);
compressed_lower_distance_matrix_adapter dist(distances);
@@ -799,7 +808,7 @@ int main(int argc, char** argv) {
auto result = std::minmax_element(dist.distances.begin(), dist.distances.end());
std::cout << "value range: [" << *result.first << "," << *result.second << "]" << std::endl;
- assert(dim_max <= n - 2);
+ dim_max = std::min(dim_max, n - 2);
binomial_coeff_table binomial_coeff(n, dim_max + 2);
std::vector<coefficient_t> multiplicative_inverse(multiplicative_inverse_vector(modulus));