summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGard Spreemann <gard.spreemann@epfl.ch>2016-09-22 11:01:59 +0200
committerGard Spreemann <gard.spreemann@epfl.ch>2016-09-22 11:01:59 +0200
commite91f1b9d6987dda58498b8386eba191436b6f6d2 (patch)
tree1843949d72793a7536ff81b765f0c148203366b0
parentfcd90f79d25eba3a511aba72e3981aa9c435e690 (diff)
* Flexible output, allow writing DIPHA persistence diagrams (when not
writing to stdout). * Endian-safe reading and writing (for moving towards a Debian package). For convenience and portability, endian-conversions use Boost. If you are building on a little-endian machine without Boost, you can set the FORCE_NATIVE_ENDIANNESS preprocessor flag to avoid the Boost dependency.
-rw-r--r--ripser.cpp215
1 files changed, 162 insertions, 53 deletions
diff --git a/ripser.cpp b/ripser.cpp
index 3d4eb29..b0257c8 100644
--- a/ripser.cpp
+++ b/ripser.cpp
@@ -37,6 +37,10 @@ with this program. If not, see <http://www.gnu.org/licenses/>.
#include <sstream>
#include <unordered_map>
+#ifndef FORCE_NATIVE_ENDIANNESS
+#include <boost/endian/conversion.hpp>
+#endif
+
#ifdef USE_GOOGLE_HASHMAP
#include <sparsehash/sparse_hash_map>
template <class Key, class T> class hash_map : public google::sparse_hash_map<Key, T> {
@@ -47,12 +51,19 @@ public:
template <class Key, class T> class hash_map : public std::unordered_map<Key, T> {};
#endif
+static const int64_t DIPHA_MAGIC = 8067171840;
+static const int64_t DIPHA_TYPE_DISTANCE_MATRIX = 7;
+static const int64_t DIPHA_TYPE_PERSISTENCE_DIAGRAM = 2;
+
typedef float value_t;
// typedef uint16_t value_t;
typedef int64_t index_t;
typedef int16_t coefficient_t;
+enum input_file_format { LOWER_DISTANCE_MATRIX, UPPER_DISTANCE_MATRIX, DISTANCE_MATRIX, POINT_CLOUD, DIPHA_DISTANCE_MATRIX };
+enum output_file_format { TEXT_PD, DIPHA_PD };
+
class binomial_coeff_table {
std::vector<std::vector<index_t>> B;
index_t n_max, k_max;
@@ -98,6 +109,22 @@ std::vector<coefficient_t> multiplicative_inverse_vector(const coefficient_t m)
return inverse;
}
+template <typename T> T read(std::istream& s) {
+ T result;
+ s.read(reinterpret_cast<char*>(&result), sizeof(T));
+ #ifndef FORCE_NATIVE_ENDIANNESS
+ boost::endian::little_to_native_inplace(result);
+ #endif
+ return result;
+}
+
+template <typename T> void write(std::ostream& s, T value) {
+ #ifndef FORCE_NATIVE_ENDIANNESS
+ boost::endian::native_to_little_inplace(value);
+ #endif
+ s.write(reinterpret_cast<char*>(&value), sizeof(T));
+}
+
template <typename OutputIterator>
OutputIterator get_simplex_vertices(index_t idx, const index_t dim, index_t n,
const binomial_coeff_table& binomial_coeff, OutputIterator out) {
@@ -504,11 +531,12 @@ void compute_pairs(std::vector<diameter_index_t>& columns_to_reduce, hash_map<in
const DistanceMatrix& dist, const ComparatorCofaces& comp, const Comparator& comp_prev, index_t dim,
index_t n, value_t threshold, coefficient_t modulus,
const std::vector<coefficient_t>& multiplicative_inverse,
- const binomial_coeff_table& binomial_coeff) {
+ const binomial_coeff_table& binomial_coeff,
+ std::ostream& output_stream, output_file_format oformat, int64_t & pair_count) {
-#ifdef PRINT_PERSISTENCE_PAIRS
- std::cout << "persistence intervals in dim " << dim << ":" << std::endl;
-#endif
+ if (oformat == TEXT_PD) {
+ output_stream << "persistence intervals in dim " << dim << ":" << std::endl;
+ }
#ifdef ASSEMBLE_REDUCTION_MATRIX
compressed_sparse_matrix<diameter_entry_t> reduction_matrix;
@@ -629,25 +657,35 @@ void compute_pairs(std::vector<diameter_index_t>& columns_to_reduce, hash_map<in
continue;
}
} else {
-#ifdef PRINT_PERSISTENCE_PAIRS
#ifdef INDICATE_PROGRESS
std::cout << "\033[K";
#endif
- std::cout << " [" << diameter << ", )" << std::endl << std::flush;
-#endif
+ if (oformat == TEXT_PD) {
+ output_stream << " [" << diameter << ", )" << std::endl << std::flush;
+ } else if (oformat == DIPHA_PD) {
+ write<int64_t>(output_stream, -dim - 1); // Essential class in dimension dim
+ write<double>(output_stream, diameter);
+ write<double>(output_stream, NAN); // Undefined/unused for essential classes.
+ pair_count++;
+ }
break;
}
found_persistence_pair:
-#ifdef PRINT_PERSISTENCE_PAIRS
value_t death = get_diameter(pivot);
if (diameter != death) {
#ifdef INDICATE_PROGRESS
std::cout << "\033[K";
#endif
- std::cout << " [" << diameter << "," << death << ")" << std::endl << std::flush;
+ if (oformat == TEXT_PD) {
+ output_stream << " [" << diameter << "," << death << ")" << std::endl << std::flush;
+ } else if (oformat == DIPHA_PD) {
+ write<int64_t>(output_stream, dim);
+ write<double>(output_stream, diameter);
+ write<double>(output_stream, death);
+ pair_count++;
+ }
}
-#endif
pivot_column_index.insert(std::make_pair(get_index(pivot), i));
@@ -686,14 +724,6 @@ void compute_pairs(std::vector<diameter_index_t>& columns_to_reduce, hash_map<in
#endif
}
-enum file_format { LOWER_DISTANCE_MATRIX, UPPER_DISTANCE_MATRIX, DISTANCE_MATRIX, POINT_CLOUD, DIPHA };
-
-template <typename T> T read(std::istream& s) {
- T result;
- s.read(reinterpret_cast<char*>(&result), sizeof(T));
- return result; // on little endian: boost::endian::little_to_native(result);
-}
-
compressed_lower_distance_matrix read_point_cloud(std::istream& input_stream) {
std::vector<std::vector<value_t>> points;
@@ -763,13 +793,13 @@ compressed_lower_distance_matrix read_distance_matrix(std::istream& input_stream
}
compressed_lower_distance_matrix read_dipha(std::istream& input_stream) {
- if (read<int64_t>(input_stream) != 8067171840) {
- std::cerr << "input is not a Dipha file (magic number: 8067171840)" << std::endl;
+ if (read<int64_t>(input_stream) != DIPHA_MAGIC) {
+ std::cerr << "input is not a Dipha file (magic number: " << DIPHA_MAGIC << ")" << std::endl;
exit(-1);
}
- if (read<int64_t>(input_stream) != 7) {
- std::cerr << "input is not a Dipha distance matrix (file type: 7)" << std::endl;
+ if (read<int64_t>(input_stream) != DIPHA_TYPE_DISTANCE_MATRIX) {
+ std::cerr << "input is not a Dipha distance matrix (file type: " << DIPHA_TYPE_DISTANCE_MATRIX << ")" << std::endl;
exit(-1);
}
@@ -787,7 +817,7 @@ compressed_lower_distance_matrix read_dipha(std::istream& input_stream) {
return compressed_lower_distance_matrix(std::move(distances));
}
-compressed_lower_distance_matrix read_file(std::istream& input_stream, file_format format) {
+compressed_lower_distance_matrix read_file(std::istream& input_stream, input_file_format format) {
switch (format) {
case LOWER_DISTANCE_MATRIX:
return read_lower_distance_matrix(input_stream);
@@ -797,7 +827,7 @@ compressed_lower_distance_matrix read_file(std::istream& input_stream, file_form
return read_distance_matrix(input_stream);
case POINT_CLOUD:
return read_point_cloud(input_stream);
- case DIPHA:
+ case DIPHA_DISTANCE_MATRIX:
return read_dipha(input_stream);
}
}
@@ -805,17 +835,20 @@ compressed_lower_distance_matrix read_file(std::istream& input_stream, file_form
void print_usage_and_exit(int exit_code) {
std::cerr << "Usage: "
<< "ripser "
- << "[options] [filename]" << std::endl
+ << "[options] [input filename] [output filename]" << std::endl
<< std::endl
<< "Options:" << std::endl
<< std::endl
<< " --help print this screen" << std::endl
- << " --format use the specified file format for the input. Options are:" << std::endl
+ << " --iformat use the specified file format for the input. Options are:" << std::endl
<< " lower-distance (lower triangular distance matrix; default)" << std::endl
<< " upper-distance (upper triangular distance matrix)" << std::endl
<< " distance (full distance matrix)" << std::endl
<< " point-cloud (point cloud in Euclidean space)" << std::endl
<< " dipha (distance matrix in DIPHA file format)" << std::endl
+ << " --oformat use the specified file format for the output. Options are:" << std::endl
+ << " text (plain text output; default)" << std::endl
+ << " dipha (barcode in DIPHA file format)" << std::endl
<< " --dim <k> compute persistent homology up to dimension <k>" << std::endl
<< " --threshold <t> compute Rips complexes up to diameter <t>" << std::endl
#ifdef USE_COEFFICIENTS
@@ -828,9 +861,11 @@ void print_usage_and_exit(int exit_code) {
int main(int argc, char** argv) {
- const char* filename = nullptr;
+ const char* ifilename = nullptr;
+ const char* ofilename = nullptr;
- file_format format = DISTANCE_MATRIX;
+ input_file_format iformat = DISTANCE_MATRIX;
+ output_file_format oformat = TEXT_PD;
index_t dim_max = 1;
value_t threshold = std::numeric_limits<value_t>::max();
@@ -855,18 +890,26 @@ int main(int argc, char** argv) {
size_t next_pos;
threshold = std::stof(parameter, &next_pos);
if (next_pos != parameter.size()) print_usage_and_exit(-1);
- } else if (arg == "--format") {
+ } else if (arg == "--iformat") {
std::string parameter = std::string(argv[++i]);
if (parameter == "lower-distance")
- format = LOWER_DISTANCE_MATRIX;
+ iformat = LOWER_DISTANCE_MATRIX;
else if (parameter == "upper-distance")
- format = UPPER_DISTANCE_MATRIX;
+ iformat = UPPER_DISTANCE_MATRIX;
else if (parameter == "distance")
- format = DISTANCE_MATRIX;
+ iformat = DISTANCE_MATRIX;
else if (parameter == "point-cloud")
- format = POINT_CLOUD;
+ iformat = POINT_CLOUD;
else if (parameter == "dipha")
- format = DIPHA;
+ iformat = DIPHA_DISTANCE_MATRIX;
+ else
+ print_usage_and_exit(-1);
+ } else if (arg == "--oformat") {
+ std::string parameter = std::string(argv[++i]);
+ if (parameter == "text")
+ oformat = TEXT_PD;
+ else if (parameter == "dipha")
+ oformat = DIPHA_PD;
else
print_usage_and_exit(-1);
#ifdef USE_COEFFICIENTS
@@ -876,19 +919,62 @@ int main(int argc, char** argv) {
modulus = std::stol(parameter, &next_pos);
if (next_pos != parameter.size() || !is_prime(modulus)) print_usage_and_exit(-1);
#endif
- } else {
- if (filename) { print_usage_and_exit(-1); }
- filename = argv[i];
+ } else if (i == argc - 2) {
+ ifilename = argv[i];
+ }
+ else if (i == argc - 1) {
+ ofilename = argv[i];
}
+ else {
+ print_usage_and_exit(-1);
+ }
+ }
+
+ if (!ifilename || !ofilename) {
+ std::cerr << "You need to specify input and output files." << std::endl;
+ print_usage_and_exit(-1);
+ }
+
+ if (std::string(ifilename) == std::string("-"))
+ ifilename = nullptr;
+ if (std::string(ofilename) == std::string("-"))
+ ofilename = nullptr;
+
+ std::ifstream input_file_stream(ifilename);
+ if (ifilename && input_file_stream.fail()) {
+ std::cerr << "couldn't open file " << ifilename << std::endl;
+ exit(-1);
}
- std::ifstream file_stream(filename);
- if (filename && file_stream.fail()) {
- std::cerr << "couldn't open file " << filename << std::endl;
+ if (!ofilename && oformat == DIPHA_PD) {
+ std::cerr << "Warning: Will not write a DIPHA persistence diagram to stdout. Changing output format to text." << std::endl;
+ oformat = TEXT_PD;
+ }
+
+ std::ofstream output_file_stream;
+ if (oformat == TEXT_PD) {
+ output_file_stream.open(ofilename, std::ios::out);
+ }
+ else if (oformat == DIPHA_PD) {
+ // The in/out opening mode is so that we can seek back to the third third entry in the file and write the pair count at the very end.
+ output_file_stream.open(ofilename, std::ios::out | std::ios::in | std::ios::trunc | std::ios::binary);
+ }
+
+ if (ofilename && output_file_stream.fail()) {
+ std::cerr << "couldn't open file " << ofilename << std::endl;
exit(-1);
}
- compressed_lower_distance_matrix dist = read_file(filename ? file_stream : std::cin, format);
+ std::ostream & output_stream = ofilename ? output_file_stream : std::cout;
+
+ if (oformat == DIPHA_PD) {
+ write<int64_t>(output_stream, DIPHA_MAGIC);
+ write<int64_t>(output_stream, DIPHA_TYPE_PERSISTENCE_DIAGRAM);
+ write<int64_t>(output_stream, DIPHA_MAGIC); // This is to later be replaced by the number of pairs in the persistence diagram. This is the reason we don't allow binary stdout output, since we'd have to buffer up the entire PD to know the count beforehand. In a file we can seek back and overwrite.
+ }
+ int64_t pair_count = 0;
+
+ compressed_lower_distance_matrix dist = read_file(ifilename ? input_file_stream : std::cin, iformat);
index_t n = dist.size();
@@ -914,9 +1000,9 @@ int main(int argc, char** argv) {
}
std::sort(edges.rbegin(), edges.rend(), greater_diameter_or_smaller_index<diameter_index_t>());
-#ifdef PRINT_PERSISTENCE_PAIRS
- std::cout << "persistence intervals in dim 0:" << std::endl;
-#endif
+ if (oformat == TEXT_PD) {
+ output_stream << "persistence intervals in dim 0:" << std::endl;
+ }
std::vector<index_t> vertices_of_edge(2);
for (auto e : edges) {
@@ -925,19 +1011,33 @@ int main(int argc, char** argv) {
index_t u = dset.find(vertices_of_edge[0]), v = dset.find(vertices_of_edge[1]);
if (u != v) {
-#ifdef PRINT_PERSISTENCE_PAIRS
- std::cout << " [0," << get_diameter(e) << ")" << std::endl;
-#endif
+ if (oformat == TEXT_PD) {
+ output_stream << " [0," << get_diameter(e) << ")" << std::endl;
+ } else if (oformat == DIPHA_PD) {
+ write<int64_t>(output_stream, 0);
+ write<double>(output_stream, 0);
+ write<double>(output_stream, get_diameter(e));
+ pair_count++;
+ }
dset.link(u, v);
} else
columns_to_reduce.push_back(e);
}
std::reverse(columns_to_reduce.begin(), columns_to_reduce.end());
-#ifdef PRINT_PERSISTENCE_PAIRS
- for (index_t i = 0; i < n; ++i)
- if (dset.find(i) == i) std::cout << " [0, )" << std::endl << std::flush;
-#endif
+ for (index_t i = 0; i < n; ++i)
+ {
+ if (dset.find(i) == i) {
+ if (oformat == TEXT_PD) {
+ output_stream << " [0, )" << std::endl << std::flush;
+ } else if (oformat == DIPHA_PD) {
+ write<int64_t>(output_stream, -0 - 1); // Indicate essential class.
+ write<double>(output_stream, 0);
+ write<double>(output_stream, NAN); // Undefined/unused for essential classes.
+ pair_count++;
+ }
+ }
+ }
}
for (index_t dim = 1; dim <= dim_max; ++dim) {
@@ -948,10 +1048,19 @@ int main(int argc, char** argv) {
pivot_column_index.reserve(columns_to_reduce.size());
compute_pairs(columns_to_reduce, pivot_column_index, dist, comp, comp_prev, dim, n, threshold, modulus,
- multiplicative_inverse, binomial_coeff);
+ multiplicative_inverse, binomial_coeff, output_stream, oformat, pair_count);
if (dim < dim_max) {
assemble_columns_to_reduce(columns_to_reduce, pivot_column_index, comp, dim, n, threshold, binomial_coeff);
}
}
-} \ No newline at end of file
+
+
+ if (oformat == DIPHA_PD) {
+ output_file_stream.seekp(2*sizeof(int64_t), std::ios_base::beg); // Rewind to the spot in the DIPHA file where we write the number of intervals.
+ write<int64_t>(output_file_stream, pair_count);
+ }
+
+ input_file_stream.close();
+ output_file_stream.close();
+}