1 files changed, 357 insertions, 0 deletions
diff --git a/src/common/include/gudhi/reader_utils.h b/src/common/include/gudhi/reader_utils.h
new file mode 100644
index 00000000..98335552
--- /dev/null
+++ b/src/common/include/gudhi/reader_utils.h
@@ -0,0 +1,357 @@
+/*    This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+ *    See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+ *    Author(s):       Clement Maria, Pawel Dlotko, Clement Jamin
+ *
+ *    Copyright (C) 2014 Inria
+ *
+ *    Modification(s):
+ *      - YYYY/MM Author: Description of the modification
+ */
+
+#ifndef READER_UTILS_H_
+#define READER_UTILS_H_
+
+#include <gudhi/graph_simplicial_complex.h>
+#include <gudhi/Debug_utils.h>
+
+#include <boost/function_output_iterator.hpp>
+#include <boost/graph/adjacency_list.hpp>
+
+#include <iostream>
+#include <fstream>
+#include <map>
+#include <limits>  // for numeric_limits
+#include <string>
+#include <vector>
+#include <utility>  // for pair
+#include <tuple>  // for std::make_tuple
+
+namespace Gudhi {
+
+// Keep this file tag for Doxygen to parse the code, otherwise, functions are not documented.
+// It is required for global functions and variables.
+
+/** @file
+ * @brief This file includes common file reader for GUDHI
+ */
+
+/**
+ * @brief Read a set of points to turn it into a vector< vector<double> > by filling points.
+ *
+ * File format: 1 point per line<br>
+ * X11 X12 ... X1d<br>
+ * X21 X22 ... X2d<br>
+ * etc<br>
+ */
+inline void read_points(std::string file_name, std::vector<std::vector<double>>& points) {
+  std::ifstream in_file(file_name.c_str(), std::ios::in);
+  if (!in_file.is_open()) {
+    std::cerr << "Unable to open file " << file_name << std::endl;
+    return;
+  }
+
+  std::string line;
+  double x;
+  while (getline(in_file, line)) {
+    std::vector<double> point;
+    std::istringstream iss(line);
+    while (iss >> x) {
+      point.push_back(x);
+    }
+    // Check for empty lines
+    if (!point.empty()) points.push_back(point);
+  }
+  in_file.close();
+}
+
+/**
+ * @brief Read a graph from a file.
+ *
+ * \tparam Graph_t Type for the return graph. Must be constructible from iterators on pairs of Vertex_handle
+ * \tparam Filtration_value Type for the value of the read filtration
+ * \tparam Vertex_handle Type for the value of the read vertices
+ *
+ * File format: 1 simplex per line<br>
+ * Dim1 X11 X12 ... X1d Fil1<br>
+ * Dim2 X21 X22 ... X2d Fil2<br>
+ * etc<br>
+ *
+ * The vertices must be labeled from 0 to n-1.
+ * Every simplex must appear exactly once.
+ * Simplices of dimension more than 1 are ignored.
+ */
+template <typename Graph_t, typename Filtration_value, typename Vertex_handle>
+Graph_t read_graph(std::string file_name) {
+  std::ifstream in_(file_name.c_str(), std::ios::in);
+  if (!in_.is_open()) {
+    std::string error_str("read_graph - Unable to open file ");
+    error_str.append(file_name);
+    std::cerr << error_str << std::endl;
+    throw std::invalid_argument(error_str);
+  }
+
+  typedef std::pair<Vertex_handle, Vertex_handle> Edge_t;
+  std::vector<Edge_t> edges;
+  std::vector<Filtration_value> edges_fil;
+  std::map<Vertex_handle, Filtration_value> vertices;
+
+  std::string line;
+  int dim;
+  Vertex_handle u, v, max_h = -1;
+  Filtration_value fil;
+  while (getline(in_, line)) {
+    std::istringstream iss(line);
+    while (iss >> dim) {
+      switch (dim) {
+        case 0: {
+          iss >> u;
+          iss >> fil;
+          vertices[u] = fil;
+          if (max_h < u) {
+            max_h = u;
+          }
+          break;
+        }
+        case 1: {
+          iss >> u;
+          iss >> v;
+          iss >> fil;
+          edges.push_back(Edge_t(u, v));
+          edges_fil.push_back(fil);
+          break;
+        }
+        default: { break; }
+      }
+    }
+  }
+  in_.close();
+
+  if ((size_t)(max_h + 1) != vertices.size()) {
+    std::cerr << "Error: vertices must be labeled from 0 to n-1 \n";
+  }
+
+  Graph_t skel_graph(edges.begin(), edges.end(), edges_fil.begin(), vertices.size());
+  auto vertex_prop = boost::get(vertex_filtration_t(), skel_graph);
+
+  typename boost::graph_traits<Graph_t>::vertex_iterator vi, vi_end;
+  auto v_it = vertices.begin();
+  for (std::tie(vi, vi_end) = boost::vertices(skel_graph); vi != vi_end; ++vi, ++v_it) {
+    boost::put(vertex_prop, *vi, v_it->second);
+  }
+
+  return skel_graph;
+}
+
+/**
+ * @brief Read a face from a file.
+ *
+ * File format: 1 simplex per line<br>
+ * Dim1 X11 X12 ... X1d Fil1<br>
+ * Dim2 X21 X22 ... X2d Fil2<br>
+ * etc<br>
+ *
+ * The vertices must be labeled from 0 to n-1.
+ * Every simplex must appear exactly once.
+ * Simplices of dimension more than 1 are ignored.
+ */
+template <typename Vertex_handle, typename Filtration_value>
+bool read_simplex(std::istream& in_, std::vector<Vertex_handle>& simplex, Filtration_value& fil) {
+  int dim = 0;
+  if (!(in_ >> dim)) return false;
+  Vertex_handle v;
+  for (int i = 0; i < dim + 1; ++i) {
+    if (!(in_ >> v)) return false;
+    simplex.push_back(v);
+  }
+  if (!(in_ >> fil)) return false;
+  in_.ignore((std::numeric_limits<std::streamsize>::max)(), '\n');  // ignore until the carriage return
+  return true;
+}
+
+/**
+ * @brief Read a hasse simplex from a file.
+ *
+ * File format: 1 simplex per line<br>
+ * Dim1 k11 k12 ... k1Dim1 Fil1<br>
+ * Dim2 k21 k22 ... k2Dim2 Fil2<br>
+ * etc<br>
+ *
+ * The key of a simplex is its position in the filtration order and also the number of its row in the file.
+ * Dimi ki1 ki2 ... kiDimi Fili means that the ith simplex in the filtration has dimension Dimi, filtration value
+ * fil1 and simplices with key ki1 ... kiDimi in its boundary.*/
+template <typename Simplex_key, typename Filtration_value>
+bool read_hasse_simplex(std::istream& in_, std::vector<Simplex_key>& boundary, Filtration_value& fil) {
+  int dim;
+  if (!(in_ >> dim)) return false;
+  if (dim == 0) {
+    in_ >> fil;
+    return true;
+  }
+  Simplex_key key;
+  for (int i = 0; i < dim + 1; ++i) {
+    in_ >> key;
+    boundary.push_back(key);
+  }
+  in_ >> fil;
+  return true;
+}
+
+/**
+ * @brief Read a lower triangular distance matrix from a csv file. We assume that the .csv store the whole
+ * (square) matrix.
+ *
+ * @author Pawel Dlotko
+ *
+ * Square matrix file format:<br>
+ * 0;D12;...;D1j<br>
+ * D21;0;...;D2j<br>
+ * ...<br>
+ * Dj1;Dj2;...;0<br>
+ *
+ * lower matrix file format:<br>
+ * 0<br>
+ * D21;<br>
+ * D31;D32;<br>
+ * ...<br>
+ * Dj1;Dj2;...;Dj(j-1);<br>
+ *
+ **/
+template <typename Filtration_value>
+std::vector<std::vector<Filtration_value>> read_lower_triangular_matrix_from_csv_file(const std::string& filename,
+                                                                                      const char separator = ';') {
+#ifdef DEBUG_TRACES
+  std::cout << "Using procedure read_lower_triangular_matrix_from_csv_file \n";
+#endif  // DEBUG_TRACES
+  std::vector<std::vector<Filtration_value>> result;
+  std::ifstream in;
+  in.open(filename.c_str());
+  if (!in.is_open()) {
+    return result;
+  }
+
+  std::string line;
+
+  // the first line is emtpy, so we ignore it:
+  std::getline(in, line);
+  std::vector<Filtration_value> values_in_this_line;
+  result.push_back(values_in_this_line);
+
+  int number_of_line = 0;
+
+  // first, read the file line by line to a string:
+  while (std::getline(in, line)) {
+    // if line is empty, break
+    if (line.size() == 0) break;
+
+    // if the last element of a string is comma:
+    if (line[line.size() - 1] == separator) {
+      // then shrink the string by one
+      line.pop_back();
+    }
+
+    // replace all commas with spaces
+    std::replace(line.begin(), line.end(), separator, ' ');
+
+    // put the new line to a stream
+    std::istringstream iss(line);
+    // and now read the doubles.
+
+    int number_of_entry = 0;
+    std::vector<Filtration_value> values_in_this_line;
+    while (iss.good()) {
+      double entry;
+      iss >> entry;
+      if (number_of_entry <= number_of_line) {
+        values_in_this_line.push_back(entry);
+      }
+      ++number_of_entry;
+    }
+    if (!values_in_this_line.empty()) result.push_back(values_in_this_line);
+    ++number_of_line;
+  }
+  in.close();
+
+#ifdef DEBUG_TRACES
+  std::cerr << "Here is the matrix we read : \n";
+  for (size_t i = 0; i != result.size(); ++i) {
+    for (size_t j = 0; j != result[i].size(); ++j) {
+      std::cerr << result[i][j] << " ";
+    }
+    std::cerr << std::endl;
+  }
+#endif  // DEBUG_TRACES
+
+  return result;
+}  // read_lower_triangular_matrix_from_csv_file
+
+/**
+Reads a file containing persistence intervals.
+Each line might contain 2, 3 or 4 values: [[field] dimension] birth death
+The output iterator `out` is used this way: `*out++ = std::make_tuple(dim, birth, death);`
+where `dim` is an `int`, `birth` a `double`, and `death` a `double`.
+Note: the function does not check that birth <= death.
+**/
+template <typename OutputIterator>
+void read_persistence_intervals_and_dimension(std::string const& filename, OutputIterator out) {
+  std::ifstream in(filename);
+  if (!in.is_open()) {
+    std::string error_str("read_persistence_intervals_and_dimension - Unable to open file ");
+    error_str.append(filename);
+    std::cerr << error_str << std::endl;
+    throw std::invalid_argument(error_str);
+  }
+
+  while (!in.eof()) {
+    std::string line;
+    getline(in, line);
+    if (line.length() != 0 && line[0] != '#') {
+      double numbers[4];
+      int n = sscanf(line.c_str(), "%lf %lf %lf %lf", &numbers[0], &numbers[1], &numbers[2], &numbers[3]);
+      if (n >= 2) {
+        int dim = (n >= 3 ? static_cast<int>(numbers[n - 3]) : -1);
+        *out++ = std::make_tuple(dim, numbers[n - 2], numbers[n - 1]);
+      }
+    }
+  }
+}
+
+/**
+Reads a file containing persistence intervals.
+Each line might contain 2, 3 or 4 values: [[field] dimension] birth death
+The return value is an `std::map<dim, std::vector<std::pair<birth, death>>>`
+where `dim` is an `int`, `birth` a `double`, and `death` a `double`.
+Note: the function does not check that birth <= death.
+**/
+inline std::map<int, std::vector<std::pair<double, double>>> read_persistence_intervals_grouped_by_dimension(
+    std::string const& filename) {
+  std::map<int, std::vector<std::pair<double, double>>> ret;
+  read_persistence_intervals_and_dimension(
+      filename, boost::make_function_output_iterator([&ret](std::tuple<int, double, double> t) {
+        ret[get<0>(t)].push_back(std::make_pair(get<1>(t), get<2>(t)));
+      }));
+  return ret;
+}
+
+/**
+Reads a file containing persistence intervals.
+Each line might contain 2, 3 or 4 values: [[field] dimension] birth death
+If `only_this_dim` = -1, dimension is ignored and all lines are returned.
+If `only_this_dim` is >= 0, only the lines where dimension = `only_this_dim`
+(or where dimension is not specified) are returned.
+The return value is an `std::vector<std::pair<birth, death>>`
+where `dim` is an `int`, `birth` a `double`, and `death` a `double`.
+Note: the function does not check that birth <= death.
+**/
+inline std::vector<std::pair<double, double>> read_persistence_intervals_in_dimension(std::string const& filename,
+                                                                                      int only_this_dim = -1) {
+  std::vector<std::pair<double, double>> ret;
+  read_persistence_intervals_and_dimension(
+      filename, boost::make_function_output_iterator([only_this_dim, &ret](std::tuple<int, double, double> t) {
+        if (only_this_dim == get<0>(t) || only_this_dim == -1) ret.emplace_back(get<1>(t), get<2>(t));
+      }));
+  return ret;
+}
+
+}  // namespace Gudhi
+
+#endif  // READER_UTILS_H_