/* This file is part of the Gudhi Library. The Gudhi library * (Geometric Understanding in Higher Dimensions) is a generic C++ * library for computational topology. * * Author(s): Clement Maria, Pawel Dlotko, Clement Jamin * * Copyright (C) 2014 Inria * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #ifndef READER_UTILS_H_ #define READER_UTILS_H_ #include #include #include #include #include #include #include #include // for numeric_limits #include #include #include // for pair #include // for std::make_tuple namespace Gudhi { // Keep this file tag for Doxygen to parse the code, otherwise, functions are not documented. // It is required for global functions and variables. /** @file * @brief This file includes common file reader for GUDHI */ /** * @brief Read a set of points to turn it into a vector< vector > by filling points. * * File format: 1 point per line
* X11 X12 ... X1d
* X21 X22 ... X2d
* etc
*/ inline void read_points(std::string file_name, std::vector>& points) { std::ifstream in_file(file_name.c_str(), std::ios::in); if (!in_file.is_open()) { std::cerr << "Unable to open file " << file_name << std::endl; return; } std::string line; double x; while (getline(in_file, line)) { std::vector point; std::istringstream iss(line); while (iss >> x) { point.push_back(x); } // Check for empty lines if (!point.empty()) points.push_back(point); } in_file.close(); } /** * @brief Read a graph from a file. * * \tparam Graph_t Type for the return graph. Must be constructible from iterators on pairs of Vertex_handle * \tparam Filtration_value Type for the value of the read filtration * \tparam Vertex_handle Type for the value of the read vertices * * File format: 1 simplex per line
* Dim1 X11 X12 ... X1d Fil1
* Dim2 X21 X22 ... X2d Fil2
* etc
* * The vertices must be labeled from 0 to n-1. * Every simplex must appear exactly once. * Simplices of dimension more than 1 are ignored. */ template Graph_t read_graph(std::string file_name) { std::ifstream in_(file_name.c_str(), std::ios::in); if (!in_.is_open()) { std::string error_str("read_graph - Unable to open file "); error_str.append(file_name); std::cerr << error_str << std::endl; throw std::invalid_argument(error_str); } typedef std::pair Edge_t; std::vector edges; std::vector edges_fil; std::map vertices; std::string line; int dim; Vertex_handle u, v, max_h = -1; Filtration_value fil; while (getline(in_, line)) { std::istringstream iss(line); while (iss >> dim) { switch (dim) { case 0: { iss >> u; iss >> fil; vertices[u] = fil; if (max_h < u) { max_h = u; } break; } case 1: { iss >> u; iss >> v; iss >> fil; edges.push_back(Edge_t(u, v)); edges_fil.push_back(fil); break; } default: { break; } } } } in_.close(); if ((size_t)(max_h + 1) != vertices.size()) { std::cerr << "Error: vertices must be labeled from 0 to n-1 \n"; } Graph_t skel_graph(edges.begin(), edges.end(), edges_fil.begin(), vertices.size()); auto vertex_prop = boost::get(vertex_filtration_t(), skel_graph); typename boost::graph_traits::vertex_iterator vi, vi_end; auto v_it = vertices.begin(); for (std::tie(vi, vi_end) = boost::vertices(skel_graph); vi != vi_end; ++vi, ++v_it) { boost::put(vertex_prop, *vi, v_it->second); } return skel_graph; } /** * @brief Read a face from a file. * * File format: 1 simplex per line
* Dim1 X11 X12 ... X1d Fil1
* Dim2 X21 X22 ... X2d Fil2
* etc
* * The vertices must be labeled from 0 to n-1. * Every simplex must appear exactly once. * Simplices of dimension more than 1 are ignored. */ template bool read_simplex(std::istream& in_, std::vector& simplex, Filtration_value& fil) { int dim = 0; if (!(in_ >> dim)) return false; Vertex_handle v; for (int i = 0; i < dim + 1; ++i) { in_ >> v; simplex.push_back(v); } in_ >> fil; in_.ignore((std::numeric_limits::max)(), '\n'); // ignore until the carriage return return true; } /** * @brief Read a hasse simplex from a file. * * File format: 1 simplex per line
* Dim1 k11 k12 ... k1Dim1 Fil1
* Dim2 k21 k22 ... k2Dim2 Fil2
* etc
* * The key of a simplex is its position in the filtration order and also the number of its row in the file. * Dimi ki1 ki2 ... kiDimi Fili means that the ith simplex in the filtration has dimension Dimi, filtration value * fil1 and simplices with key ki1 ... kiDimi in its boundary.*/ template bool read_hasse_simplex(std::istream& in_, std::vector& boundary, Filtration_value& fil) { int dim; if (!(in_ >> dim)) return false; if (dim == 0) { in_ >> fil; return true; } Simplex_key key; for (int i = 0; i < dim + 1; ++i) { in_ >> key; boundary.push_back(key); } in_ >> fil; return true; } /** * @brief Read a lower triangular distance matrix from a csv file. We assume that the .csv store the whole * (square) matrix. * * @author Pawel Dlotko * * Square matrix file format:
* 0;D12;...;D1j
* D21;0;...;D2j
* ...
* Dj1;Dj2;...;0
* * lower matrix file format:
* 0
* D21;
* D31;D32;
* ...
* Dj1;Dj2;...;Dj(j-1);
* **/ template std::vector> read_lower_triangular_matrix_from_csv_file(const std::string& filename, const char separator = ';') { #ifdef DEBUG_TRACES std::cout << "Using procedure read_lower_triangular_matrix_from_csv_file \n"; #endif // DEBUG_TRACES std::vector> result; std::ifstream in; in.open(filename.c_str()); if (!in.is_open()) { return result; } std::string line; // the first line is emtpy, so we ignore it: std::getline(in, line); std::vector values_in_this_line; result.push_back(values_in_this_line); int number_of_line = 0; // first, read the file line by line to a string: while (std::getline(in, line)) { // if line is empty, break if (line.size() == 0) break; // if the last element of a string is comma: if (line[line.size() - 1] == separator) { // then shrink the string by one line.pop_back(); } // replace all commas with spaces std::replace(line.begin(), line.end(), separator, ' '); // put the new line to a stream std::istringstream iss(line); // and now read the doubles. int number_of_entry = 0; std::vector values_in_this_line; while (iss.good()) { double entry; iss >> entry; if (number_of_entry <= number_of_line) { values_in_this_line.push_back(entry); } ++number_of_entry; } if (!values_in_this_line.empty()) result.push_back(values_in_this_line); ++number_of_line; } in.close(); #ifdef DEBUG_TRACES std::cerr << "Here is the matrix we read : \n"; for (size_t i = 0; i != result.size(); ++i) { for (size_t j = 0; j != result[i].size(); ++j) { std::cerr << result[i][j] << " "; } std::cerr << std::endl; } #endif // DEBUG_TRACES return result; } // read_lower_triangular_matrix_from_csv_file /** Reads a file containing persistence intervals. Each line might contain 2, 3 or 4 values: [[field] dimension] birth death The output iterator `out` is used this way: `*out++ = std::make_tuple(dim, birth, death);` where `dim` is an `int`, `birth` a `double`, and `death` a `double`. Note: the function does not check that birth <= death. **/ template void read_persistence_intervals_and_dimension(std::string const& filename, OutputIterator out) { std::ifstream in(filename); if (!in.is_open()) { std::string error_str("read_persistence_intervals_and_dimension - Unable to open file "); error_str.append(filename); std::cerr << error_str << std::endl; throw std::invalid_argument(error_str); } while (!in.eof()) { std::string line; getline(in, line); if (line.length() != 0 && line[0] != '#') { double numbers[4]; int n = sscanf(line.c_str(), "%lf %lf %lf %lf", &numbers[0], &numbers[1], &numbers[2], &numbers[3]); if (n >= 2) { int dim = (n >= 3 ? static_cast(numbers[n - 3]) : -1); *out++ = std::make_tuple(dim, numbers[n - 2], numbers[n - 1]); } } } } /** Reads a file containing persistence intervals. Each line might contain 2, 3 or 4 values: [[field] dimension] birth death The return value is an `std::map>>` where `dim` is an `int`, `birth` a `double`, and `death` a `double`. Note: the function does not check that birth <= death. **/ inline std::map>> read_persistence_intervals_grouped_by_dimension( std::string const& filename) { std::map>> ret; read_persistence_intervals_and_dimension( filename, boost::make_function_output_iterator([&ret](std::tuple t) { ret[get<0>(t)].push_back(std::make_pair(get<1>(t), get<2>(t))); })); return ret; } /** Reads a file containing persistence intervals. Each line might contain 2, 3 or 4 values: [[field] dimension] birth death If `only_this_dim` = -1, dimension is ignored and all lines are returned. If `only_this_dim` is >= 0, only the lines where dimension = `only_this_dim` (or where dimension is not specified) are returned. The return value is an `std::vector>` where `dim` is an `int`, `birth` a `double`, and `death` a `double`. Note: the function does not check that birth <= death. **/ inline std::vector> read_persistence_intervals_in_dimension(std::string const& filename, int only_this_dim = -1) { std::vector> ret; read_persistence_intervals_and_dimension( filename, boost::make_function_output_iterator([only_this_dim, &ret](std::tuple t) { if (only_this_dim == get<0>(t) || only_this_dim == -1) ret.emplace_back(get<1>(t), get<2>(t)); })); return ret; } } // namespace Gudhi #endif // READER_UTILS_H_