summaryrefslogtreecommitdiff
path: root/include/gudhi/reader_utils.h
blob: 90be4fc7d9e2d2659684558c6c6d7f8f73410a75 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
/*    This file is part of the Gudhi Library. The Gudhi library
 *    (Geometric Understanding in Higher Dimensions) is a generic C++
 *    library for computational topology.
 *
 *    Author(s):       Clement Maria, Pawel Dlotko, Clement Jamin
 *
 *    Copyright (C) 2014  INRIA
 *
 *    This program is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#ifndef READER_UTILS_H_
#define READER_UTILS_H_

#include <gudhi/graph_simplicial_complex.h>
#include <gudhi/Debug_utils.h>

#include <boost/function_output_iterator.hpp>
#include <boost/graph/adjacency_list.hpp>

#include <iostream>
#include <fstream>
#include <map>
#include <limits>  // for numeric_limits
#include <string>
#include <vector>
#include <utility>  // for pair
#include <tuple>  // for std::make_tuple

namespace Gudhi {

// Keep this file tag for Doxygen to parse the code, otherwise, functions are not documented.
// It is required for global functions and variables.

/** @file
 * @brief This file includes common file reader for GUDHI
 */

/**
 * @brief Read a set of points to turn it into a vector< vector<double> > by filling points.
 *
 * File format: 1 point per line<br>
 * X11 X12 ... X1d<br>
 * X21 X22 ... X2d<br>
 * etc<br>
 */
inline void read_points(std::string file_name, std::vector<std::vector<double>>& points) {
  std::ifstream in_file(file_name.c_str(), std::ios::in);
  if (!in_file.is_open()) {
    std::cerr << "Unable to open file " << file_name << std::endl;
    return;
  }

  std::string line;
  double x;
  while (getline(in_file, line)) {
    std::vector<double> point;
    std::istringstream iss(line);
    while (iss >> x) {
      point.push_back(x);
    }
    // Check for empty lines
    if (!point.empty()) points.push_back(point);
  }
  in_file.close();
}

/**
 * @brief Read a graph from a file.
 *
 * \tparam Graph_t Type for the return graph. Must be constructible from iterators on pairs of Vertex_handle
 * \tparam Filtration_value Type for the value of the read filtration
 * \tparam Vertex_handle Type for the value of the read vertices
 *
 * File format: 1 simplex per line<br>
 * Dim1 X11 X12 ... X1d Fil1<br>
 * Dim2 X21 X22 ... X2d Fil2<br>
 * etc<br>
 *
 * The vertices must be labeled from 0 to n-1.
 * Every simplex must appear exactly once.
 * Simplices of dimension more than 1 are ignored.
 */
template <typename Graph_t, typename Filtration_value, typename Vertex_handle>
Graph_t read_graph(std::string file_name) {
  std::ifstream in_(file_name.c_str(), std::ios::in);
  if (!in_.is_open()) {
    std::string error_str("read_graph - Unable to open file ");
    error_str.append(file_name);
    std::cerr << error_str << std::endl;
    throw std::invalid_argument(error_str);
  }

  typedef std::pair<Vertex_handle, Vertex_handle> Edge_t;
  std::vector<Edge_t> edges;
  std::vector<Filtration_value> edges_fil;
  std::map<Vertex_handle, Filtration_value> vertices;

  std::string line;
  int dim;
  Vertex_handle u, v, max_h = -1;
  Filtration_value fil;
  while (getline(in_, line)) {
    std::istringstream iss(line);
    while (iss >> dim) {
      switch (dim) {
        case 0: {
          iss >> u;
          iss >> fil;
          vertices[u] = fil;
          if (max_h < u) {
            max_h = u;
          }
          break;
        }
        case 1: {
          iss >> u;
          iss >> v;
          iss >> fil;
          edges.push_back(Edge_t(u, v));
          edges_fil.push_back(fil);
          break;
        }
        default: { break; }
      }
    }
  }
  in_.close();

  if ((size_t)(max_h + 1) != vertices.size()) {
    std::cerr << "Error: vertices must be labeled from 0 to n-1 \n";
  }

  Graph_t skel_graph(edges.begin(), edges.end(), edges_fil.begin(), vertices.size());
  auto vertex_prop = boost::get(vertex_filtration_t(), skel_graph);

  typename boost::graph_traits<Graph_t>::vertex_iterator vi, vi_end;
  auto v_it = vertices.begin();
  for (std::tie(vi, vi_end) = boost::vertices(skel_graph); vi != vi_end; ++vi, ++v_it) {
    boost::put(vertex_prop, *vi, v_it->second);
  }

  return skel_graph;
}

/**
 * @brief Read a face from a file.
 *
 * File format: 1 simplex per line<br>
 * Dim1 X11 X12 ... X1d Fil1<br>
 * Dim2 X21 X22 ... X2d Fil2<br>
 * etc<br>
 *
 * The vertices must be labeled from 0 to n-1.
 * Every simplex must appear exactly once.
 * Simplices of dimension more than 1 are ignored.
 */
template <typename Vertex_handle, typename Filtration_value>
bool read_simplex(std::istream& in_, std::vector<Vertex_handle>& simplex, Filtration_value& fil) {
  int dim = 0;
  if (!(in_ >> dim)) return false;
  Vertex_handle v;
  for (int i = 0; i < dim + 1; ++i) {
    in_ >> v;
    simplex.push_back(v);
  }
  in_ >> fil;
  in_.ignore((std::numeric_limits<std::streamsize>::max)(), '\n');  // ignore until the carriage return
  return true;
}

/**
 * @brief Read a hasse simplex from a file.
 *
 * File format: 1 simplex per line<br>
 * Dim1 k11 k12 ... k1Dim1 Fil1<br>
 * Dim2 k21 k22 ... k2Dim2 Fil2<br>
 * etc<br>
 *
 * The key of a simplex is its position in the filtration order and also the number of its row in the file.
 * Dimi ki1 ki2 ... kiDimi Fili means that the ith simplex in the filtration has dimension Dimi, filtration value
 * fil1 and simplices with key ki1 ... kiDimi in its boundary.*/
template <typename Simplex_key, typename Filtration_value>
bool read_hasse_simplex(std::istream& in_, std::vector<Simplex_key>& boundary, Filtration_value& fil) {
  int dim;
  if (!(in_ >> dim)) return false;
  if (dim == 0) {
    in_ >> fil;
    return true;
  }
  Simplex_key key;
  for (int i = 0; i < dim + 1; ++i) {
    in_ >> key;
    boundary.push_back(key);
  }
  in_ >> fil;
  return true;
}

/**
 * @brief Read a lower triangular distance matrix from a csv file. We assume that the .csv store the whole
 * (square) matrix.
 *
 * @author Pawel Dlotko
 *
 * Square matrix file format:<br>
 * 0;D12;...;D1j<br>
 * D21;0;...;D2j<br>
 * ...<br>
 * Dj1;Dj2;...;0<br>
 *
 * lower matrix file format:<br>
 * 0<br>
 * D21;<br>
 * D31;D32;<br>
 * ...<br>
 * Dj1;Dj2;...;Dj(j-1);<br>
 *
 **/
template <typename Filtration_value>
std::vector<std::vector<Filtration_value>> read_lower_triangular_matrix_from_csv_file(const std::string& filename,
                                                                                      const char separator = ';') {
#ifdef DEBUG_TRACES
  std::cout << "Using procedure read_lower_triangular_matrix_from_csv_file \n";
#endif  // DEBUG_TRACES
  std::vector<std::vector<Filtration_value>> result;
  std::ifstream in;
  in.open(filename.c_str());
  if (!in.is_open()) {
    return result;
  }

  std::string line;

  // the first line is emtpy, so we ignore it:
  std::getline(in, line);
  std::vector<Filtration_value> values_in_this_line;
  result.push_back(values_in_this_line);

  int number_of_line = 0;

  // first, read the file line by line to a string:
  while (std::getline(in, line)) {
    // if line is empty, break
    if (line.size() == 0) break;

    // if the last element of a string is comma:
    if (line[line.size() - 1] == separator) {
      // then shrink the string by one
      line.pop_back();
    }

    // replace all commas with spaces
    std::replace(line.begin(), line.end(), separator, ' ');

    // put the new line to a stream
    std::istringstream iss(line);
    // and now read the doubles.

    int number_of_entry = 0;
    std::vector<Filtration_value> values_in_this_line;
    while (iss.good()) {
      double entry;
      iss >> entry;
      if (number_of_entry <= number_of_line) {
        values_in_this_line.push_back(entry);
      }
      ++number_of_entry;
    }
    if (!values_in_this_line.empty()) result.push_back(values_in_this_line);
    ++number_of_line;
  }
  in.close();

#ifdef DEBUG_TRACES
  std::cerr << "Here is the matrix we read : \n";
  for (size_t i = 0; i != result.size(); ++i) {
    for (size_t j = 0; j != result[i].size(); ++j) {
      std::cerr << result[i][j] << " ";
    }
    std::cerr << std::endl;
  }
#endif  // DEBUG_TRACES

  return result;
}  // read_lower_triangular_matrix_from_csv_file

/**
Reads a file containing persistence intervals.
Each line might contain 2, 3 or 4 values: [[field] dimension] birth death
The output iterator `out` is used this way: `*out++ = std::make_tuple(dim, birth, death);`
where `dim` is an `int`, `birth` a `double`, and `death` a `double`.
Note: the function does not check that birth <= death.
**/
template <typename OutputIterator>
void read_persistence_intervals_and_dimension(std::string const& filename, OutputIterator out) {
  std::ifstream in(filename);
  if (!in.is_open()) {
    std::string error_str("read_persistence_intervals_and_dimension - Unable to open file ");
    error_str.append(filename);
    std::cerr << error_str << std::endl;
    throw std::invalid_argument(error_str);
  }

  while (!in.eof()) {
    std::string line;
    getline(in, line);
    if (line.length() != 0 && line[0] != '#') {
      double numbers[4];
      int n = sscanf(line.c_str(), "%lf %lf %lf %lf", &numbers[0], &numbers[1], &numbers[2], &numbers[3]);
      if (n >= 2) {
        int dim = (n >= 3 ? static_cast<int>(numbers[n - 3]) : -1);
        *out++ = std::make_tuple(dim, numbers[n - 2], numbers[n - 1]);
      }
    }
  }
}

/**
Reads a file containing persistence intervals.
Each line might contain 2, 3 or 4 values: [[field] dimension] birth death
The return value is an `std::map<dim, std::vector<std::pair<birth, death>>>`
where `dim` is an `int`, `birth` a `double`, and `death` a `double`.
Note: the function does not check that birth <= death.
**/
inline std::map<int, std::vector<std::pair<double, double>>> read_persistence_intervals_grouped_by_dimension(
    std::string const& filename) {
  std::map<int, std::vector<std::pair<double, double>>> ret;
  read_persistence_intervals_and_dimension(
      filename, boost::make_function_output_iterator([&ret](std::tuple<int, double, double> t) {
        ret[get<0>(t)].push_back(std::make_pair(get<1>(t), get<2>(t)));
      }));
  return ret;
}

/**
Reads a file containing persistence intervals.
Each line might contain 2, 3 or 4 values: [[field] dimension] birth death
If `only_this_dim` = -1, dimension is ignored and all lines are returned.
If `only_this_dim` is >= 0, only the lines where dimension = `only_this_dim`
(or where dimension is not specified) are returned.
The return value is an `std::vector<std::pair<birth, death>>`
where `dim` is an `int`, `birth` a `double`, and `death` a `double`.
Note: the function does not check that birth <= death.
**/
inline std::vector<std::pair<double, double>> read_persistence_intervals_in_dimension(std::string const& filename,
                                                                                      int only_this_dim = -1) {
  std::vector<std::pair<double, double>> ret;
  read_persistence_intervals_and_dimension(
      filename, boost::make_function_output_iterator([only_this_dim, &ret](std::tuple<int, double, double> t) {
        if (only_this_dim == get<0>(t) || only_this_dim == -1) ret.emplace_back(get<1>(t), get<2>(t));
      }));
  return ret;
}

}  // namespace Gudhi

#endif  // READER_UTILS_H_