diff options
Diffstat (limited to 'include/gudhi/Persistence_intervals.h')
-rw-r--r-- | include/gudhi/Persistence_intervals.h | 570 |
1 files changed, 0 insertions, 570 deletions
diff --git a/include/gudhi/Persistence_intervals.h b/include/gudhi/Persistence_intervals.h deleted file mode 100644 index 76eac7d7..00000000 --- a/include/gudhi/Persistence_intervals.h +++ /dev/null @@ -1,570 +0,0 @@ -/* This file is part of the Gudhi hiLibrary. The Gudhi library - * (Geometric Understanding in Higher Dimensions) is a generic C++ - * library for computational topology. - * - * Author(s): Pawel Dlotko - * - * Copyright (C) 2016 Inria - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef PERSISTENCE_INTERVALS_H_ -#define PERSISTENCE_INTERVALS_H_ - -// gudhi include -#include <gudhi/read_persistence_from_file.h> - -// standard include -#include <limits> -#include <iostream> -#include <fstream> -#include <vector> -#include <algorithm> -#include <cmath> -#include <functional> -#include <utility> -#include <string> - -namespace Gudhi { -namespace Persistence_representations { - -/** - * This class implements the following concepts: Vectorized_topological_data, Topological_data_with_distances, - *Real_valued_topological_data -**/ -class Persistence_intervals { - public: - /** - * This is a constructor of a class Persistence_intervals from a text file. Each line of the input file is supposed to - *contain two numbers of a type double (or convertible to double) - * representing the birth and the death of the persistence interval. If the pairs are not sorted so that birth <= - *death, then the constructor will sort then that way. - * * The second parameter of a constructor is a dimension of intervals to be read from a file. If your file contains - *only birth-death pairs, use the default value. - **/ - Persistence_intervals(const char* filename, unsigned dimension = std::numeric_limits<unsigned>::max()); - - /** - * This is a constructor of a class Persistence_intervals from a vector of pairs. Each pair is assumed to represent a - *persistence interval. We assume that the first elements of pairs - * are smaller or equal the second elements of pairs. - **/ - Persistence_intervals(const std::vector<std::pair<double, double> >& intervals); - - /** - * This procedure returns x-range of a given persistence diagram. - **/ - std::pair<double, double> get_x_range() const { - double min_ = std::numeric_limits<int>::max(); - double max_ = -std::numeric_limits<int>::max(); - for (size_t i = 0; i != this->intervals.size(); ++i) { - if (this->intervals[i].first < min_) min_ = this->intervals[i].first; - if (this->intervals[i].second > max_) max_ = this->intervals[i].second; - } - return std::make_pair(min_, max_); - } - - /** - * This procedure returns y-range of a given persistence diagram. - **/ - std::pair<double, double> get_y_range() const { - double min_ = std::numeric_limits<int>::max(); - double max_ = -std::numeric_limits<int>::max(); - for (size_t i = 0; i != this->intervals.size(); ++i) { - if (this->intervals[i].second < min_) min_ = this->intervals[i].second; - if (this->intervals[i].second > max_) max_ = this->intervals[i].second; - } - return std::make_pair(min_, max_); - } - - /** - * Procedure that compute the vector of lengths of the dominant (i.e. the longest) persistence intervals. The list is - *truncated at the parameter of the call where_to_cut (set by default to 100). - **/ - std::vector<double> length_of_dominant_intervals(size_t where_to_cut = 100) const; - - /** - * Procedure that compute the vector of the dominant (i.e. the longest) persistence intervals. The parameter of - *the procedure (set by default to 100) is the number of dominant intervals returned by the procedure. - **/ - std::vector<std::pair<double, double> > dominant_intervals(size_t where_to_cut = 100) const; - - /** - * Procedure to compute a histogram of interval's length. A histogram is a block plot. The number of blocks is - *determined by the first parameter of the function (set by default to 10). - * For the sake of argument let us assume that the length of the longest interval is 1 and the number of bins is - *10. In this case the i-th block correspond to a range between i-1/10 and i10. - * The vale of a block supported at the interval is the number of persistence intervals of a length between x_0 - *and x_1. - **/ - std::vector<size_t> histogram_of_lengths(size_t number_of_bins = 10) const; - - /** - * Based on a histogram of intervals lengths computed by the function histogram_of_lengths H the procedure below - *computes the cumulative histogram. The i-th position of the resulting histogram - * is the sum of values of H for the positions from 0 to i. - **/ - std::vector<size_t> cumulative_histogram_of_lengths(size_t number_of_bins = 10) const; - - /** - * In this procedure we assume that each barcode is a characteristic function of a hight equal to its length. The - *persistence diagram is a sum of such a functions. The procedure below construct a function being a - * sum of the characteristic functions of persistence intervals. The first two parameters are the range in which the - *function is to be computed and the last parameter is the number of bins in - * the discretization of the interval [_min,_max]. - **/ - std::vector<double> characteristic_function_of_diagram(double x_min, double x_max, size_t number_of_bins = 10) const; - - /** - * Cumulative version of the function characteristic_function_of_diagram - **/ - std::vector<double> cumulative_characteristic_function_of_diagram(double x_min, double x_max, - size_t number_of_bins = 10) const; - - /** - * Compute the function of persistence Betti numbers. The returned value is a vector of pair. First element of each - *pair is a place where persistence Betti numbers change. - * Second element of each pair is the value of Persistence Betti numbers at that point. - **/ - std::vector<std::pair<double, size_t> > compute_persistent_betti_numbers() const; - - /** - *This is a non optimal procedure that compute vector of distances from each point of diagram to its k-th nearest - *neighbor (k is a parameter of the program). The resulting vector is by default truncated to 10 - *elements (this value can be changed by using the second parameter of the program). The points are returned in order - *from the ones which are farthest away from their k-th nearest neighbors. - **/ - std::vector<double> k_n_n(size_t k, size_t where_to_cut = 10) const; - - /** -* Operator that send the diagram to a stream. -**/ - friend std::ostream& operator<<(std::ostream& out, const Persistence_intervals& intervals) { - for (size_t i = 0; i != intervals.intervals.size(); ++i) { - out << intervals.intervals[i].first << " " << intervals.intervals[i].second << std::endl; - } - return out; - } - - /** - * Generating gnuplot script to plot the interval. - **/ - void plot(const char* filename, double min_x = std::numeric_limits<double>::max(), - double max_x = std::numeric_limits<double>::max(), double min_y = std::numeric_limits<double>::max(), - double max_y = std::numeric_limits<double>::max()) const { - // this program create a gnuplot script file that allows to plot persistence diagram. - std::ofstream out; - - std::stringstream gnuplot_script; - gnuplot_script << filename << "_GnuplotScript"; - - out.open(gnuplot_script.str().c_str()); - - std::pair<double, double> min_max_values = this->get_x_range(); - if (min_x == max_x) { - out << "set xrange [" << min_max_values.first - 0.1 * (min_max_values.second - min_max_values.first) << " : " - << min_max_values.second + 0.1 * (min_max_values.second - min_max_values.first) << " ]" << std::endl; - out << "set yrange [" << min_max_values.first - 0.1 * (min_max_values.second - min_max_values.first) << " : " - << min_max_values.second + 0.1 * (min_max_values.second - min_max_values.first) << " ]" << std::endl; - } else { - out << "set xrange [" << min_x << " : " << max_x << " ]" << std::endl; - out << "set yrange [" << min_y << " : " << max_y << " ]" << std::endl; - } - out << "plot '-' using 1:2 notitle \"" << filename << "\", \\" << std::endl; - out << " '-' using 1:2 notitle with lp" << std::endl; - for (size_t i = 0; i != this->intervals.size(); ++i) { - out << this->intervals[i].first << " " << this->intervals[i].second << std::endl; - } - out << "EOF" << std::endl; - out << min_max_values.first - 0.1 * (min_max_values.second - min_max_values.first) << " " - << min_max_values.first - 0.1 * (min_max_values.second - min_max_values.first) << std::endl; - out << min_max_values.second + 0.1 * (min_max_values.second - min_max_values.first) << " " - << min_max_values.second + 0.1 * (min_max_values.second - min_max_values.first) << std::endl; - - out.close(); - - std::cout << "To visualize, install gnuplot and type the command: gnuplot -persist -e \"load \'" - << gnuplot_script.str().c_str() << "\'\"" << std::endl; - } - - /** -* Return number of points in the diagram. -**/ - size_t size() const { return this->intervals.size(); } - - /** - * Return the persistence interval at the given position. Note that intervals are not sorted with respect to their - *lengths. - **/ - inline std::pair<double, double> operator[](size_t i) const { - if (i >= this->intervals.size()) throw("Index out of range! Operator [], one_d_gaussians class\n"); - return this->intervals[i]; - } - - // Implementations of functions for various concepts. - /** - * This is a simple function projecting the persistence intervals to a real number. The function we use here is a sum - *of squared lengths of intervals. It can be naturally interpreted as - * sum of step function, where the step hight it equal to the length of the interval. - * At the moment this function is not tested, since it is quite likely to be changed in the future. Given this, when - *using it, keep in mind that it - * will be most likely changed in the next versions. - **/ - double project_to_R(int number_of_function) const; - /** - * The function gives the number of possible projections to R. This function is required by the - *Real_valued_topological_data concept. - **/ - size_t number_of_projections_to_R() const { return this->number_of_functions_for_projections_to_reals; } - - /** - * Return a family of vectors obtained from the persistence diagram. The i-th vector consist of the length of i - *dominant persistence intervals. - **/ - std::vector<double> vectorize(int number_of_function) const { - return this->length_of_dominant_intervals(number_of_function); - } - /** - * This function return the number of functions that allows vectorization of a persistence diagram. It is required - *in a concept Vectorized_topological_data. - **/ - size_t number_of_vectorize_functions() const { return this->number_of_functions_for_vectorization; } - - // end of implementation of functions needed for concepts. - - // For visualization use output from vectorize and build histograms. - std::vector<std::pair<double, double> > output_for_visualization() { return this->intervals; } - - protected: - void set_up_numbers_of_functions_for_vectorization_and_projections_to_reals() { - // warning, this function can be only called after filling in the intervals vector. - this->number_of_functions_for_vectorization = this->intervals.size(); - this->number_of_functions_for_projections_to_reals = 1; - } - - std::vector<std::pair<double, double> > intervals; - size_t number_of_functions_for_vectorization; - size_t number_of_functions_for_projections_to_reals; -}; - -Persistence_intervals::Persistence_intervals(const char* filename, unsigned dimension) { - if (dimension == std::numeric_limits<unsigned>::max()) { - this->intervals = read_persistence_intervals_in_one_dimension_from_file(filename); - } else { - this->intervals = read_persistence_intervals_in_one_dimension_from_file(filename, dimension); - } - this->set_up_numbers_of_functions_for_vectorization_and_projections_to_reals(); -} // Persistence_intervals - -Persistence_intervals::Persistence_intervals(const std::vector<std::pair<double, double> >& intervals_) - : intervals(intervals_) { - this->set_up_numbers_of_functions_for_vectorization_and_projections_to_reals(); -} - -std::vector<double> Persistence_intervals::length_of_dominant_intervals(size_t where_to_cut) const { - std::vector<double> result(this->intervals.size()); - for (size_t i = 0; i != this->intervals.size(); ++i) { - result[i] = this->intervals[i].second - this->intervals[i].first; - } - std::sort(result.begin(), result.end(), std::greater<double>()); - - result.resize(std::min(where_to_cut, result.size())); - return result; -} // length_of_dominant_intervals - -bool compare(const std::pair<size_t, double>& first, const std::pair<size_t, double>& second) { - return first.second > second.second; -} - -std::vector<std::pair<double, double> > Persistence_intervals::dominant_intervals(size_t where_to_cut) const { - bool dbg = false; - std::vector<std::pair<size_t, double> > position_length_vector(this->intervals.size()); - for (size_t i = 0; i != this->intervals.size(); ++i) { - position_length_vector[i] = std::make_pair(i, this->intervals[i].second - this->intervals[i].first); - } - - std::sort(position_length_vector.begin(), position_length_vector.end(), compare); - - std::vector<std::pair<double, double> > result; - result.reserve(std::min(where_to_cut, position_length_vector.size())); - - for (size_t i = 0; i != std::min(where_to_cut, position_length_vector.size()); ++i) { - result.push_back(this->intervals[position_length_vector[i].first]); - if (dbg) - std::cerr << "Position : " << position_length_vector[i].first << " length : " << position_length_vector[i].second - << std::endl; - } - - return result; -} // dominant_intervals - -std::vector<size_t> Persistence_intervals::histogram_of_lengths(size_t number_of_bins) const { - bool dbg = false; - - if (dbg) std::cerr << "this->intervals.size() : " << this->intervals.size() << std::endl; - // first find the length of the longest interval: - double lengthOfLongest = 0; - for (size_t i = 0; i != this->intervals.size(); ++i) { - if ((this->intervals[i].second - this->intervals[i].first) > lengthOfLongest) { - lengthOfLongest = this->intervals[i].second - this->intervals[i].first; - } - } - - if (dbg) { - std::cerr << "lengthOfLongest : " << lengthOfLongest << std::endl; - } - - // this is a container we will use to store the resulting histogram - std::vector<size_t> result(number_of_bins + 1, 0); - - // for every persistence interval in our collection. - for (size_t i = 0; i != this->intervals.size(); ++i) { - // compute its length relative to the length of the dominant interval: - double relative_length_of_this_interval = (this->intervals[i].second - this->intervals[i].first) / lengthOfLongest; - - // given the relative length (between 0 and 1) compute to which bin should it contribute. - size_t position = (size_t)(relative_length_of_this_interval * number_of_bins); - - ++result[position]; - - if (dbg) { - std::cerr << "i : " << i << std::endl; - std::cerr << "Interval : [" << this->intervals[i].first << " , " << this->intervals[i].second << " ] \n"; - std::cerr << "relative_length_of_this_interval : " << relative_length_of_this_interval << std::endl; - std::cerr << "position : " << position << std::endl; - getchar(); - } - } - - if (dbg) { - for (size_t i = 0; i != result.size(); ++i) std::cerr << result[i] << std::endl; - } - return result; -} - -std::vector<size_t> Persistence_intervals::cumulative_histogram_of_lengths(size_t number_of_bins) const { - std::vector<size_t> histogram = this->histogram_of_lengths(number_of_bins); - std::vector<size_t> result(histogram.size()); - - size_t sum = 0; - for (size_t i = 0; i != histogram.size(); ++i) { - sum += histogram[i]; - result[i] = sum; - } - return result; -} - -std::vector<double> Persistence_intervals::characteristic_function_of_diagram(double x_min, double x_max, - size_t number_of_bins) const { - bool dbg = false; - - std::vector<double> result(number_of_bins); - std::fill(result.begin(), result.end(), 0); - - for (size_t i = 0; i != this->intervals.size(); ++i) { - if (dbg) { - std::cerr << "Interval : " << this->intervals[i].first << " , " << this->intervals[i].second << std::endl; - } - - size_t beginIt = 0; - if (this->intervals[i].first < x_min) beginIt = 0; - if (this->intervals[i].first >= x_max) beginIt = result.size(); - if ((this->intervals[i].first > x_min) && (this->intervals[i].first < x_max)) { - beginIt = number_of_bins * (this->intervals[i].first - x_min) / (x_max - x_min); - } - - size_t endIt = 0; - if (this->intervals[i].second < x_min) endIt = 0; - if (this->intervals[i].second >= x_max) endIt = result.size(); - if ((this->intervals[i].second > x_min) && (this->intervals[i].second < x_max)) { - endIt = number_of_bins * (this->intervals[i].second - x_min) / (x_max - x_min); - } - - if (beginIt > endIt) { - beginIt = endIt; - } - - if (dbg) { - std::cerr << "beginIt : " << beginIt << std::endl; - std::cerr << "endIt : " << endIt << std::endl; - } - - for (size_t pos = beginIt; pos != endIt; ++pos) { - result[pos] += ((x_max - x_min) / static_cast<double>(number_of_bins)) * - (this->intervals[i].second - this->intervals[i].first); - } - if (dbg) { - std::cerr << "Result at this stage \n"; - for (size_t aa = 0; aa != result.size(); ++aa) { - std::cerr << result[aa] << " "; - } - std::cerr << std::endl; - } - } - return result; -} // characteristic_function_of_diagram - -std::vector<double> Persistence_intervals::cumulative_characteristic_function_of_diagram(double x_min, double x_max, - size_t number_of_bins) const { - std::vector<double> intsOfBars = this->characteristic_function_of_diagram(x_min, x_max, number_of_bins); - std::vector<double> result(intsOfBars.size()); - double sum = 0; - for (size_t i = 0; i != intsOfBars.size(); ++i) { - sum += intsOfBars[i]; - result[i] = sum; - } - return result; -} // cumulative_characteristic_function_of_diagram - -template <typename T> -bool compare_first_element_of_pair(const std::pair<T, bool>& f, const std::pair<T, bool>& s) { - return (f.first < s.first); -} - -std::vector<std::pair<double, size_t> > Persistence_intervals::compute_persistent_betti_numbers() const { - std::vector<std::pair<double, bool> > places_where_pbs_change(2 * this->intervals.size()); - - for (size_t i = 0; i != this->intervals.size(); ++i) { - places_where_pbs_change[2 * i] = std::make_pair(this->intervals[i].first, true); - places_where_pbs_change[2 * i + 1] = std::make_pair(this->intervals[i].second, false); - } - - std::sort(places_where_pbs_change.begin(), places_where_pbs_change.end(), compare_first_element_of_pair<double>); - size_t pbn = 0; - std::vector<std::pair<double, size_t> > pbns(places_where_pbs_change.size()); - for (size_t i = 0; i != places_where_pbs_change.size(); ++i) { - if (places_where_pbs_change[i].second == true) { - ++pbn; - } else { - --pbn; - } - pbns[i] = std::make_pair(places_where_pbs_change[i].first, pbn); - } - return pbns; -} - -inline double compute_euclidean_distance(const std::pair<double, double>& f, const std::pair<double, double>& s) { - return sqrt((f.first - s.first) * (f.first - s.first) + (f.second - s.second) * (f.second - s.second)); -} - -std::vector<double> Persistence_intervals::k_n_n(size_t k, size_t where_to_cut) const { - bool dbg = false; - if (dbg) { - std::cerr << "Here are the intervals : \n"; - for (size_t i = 0; i != this->intervals.size(); ++i) { - std::cerr << "[ " << this->intervals[i].first << " , " << this->intervals[i].second << "] \n"; - } - getchar(); - } - - std::vector<double> result; - // compute all to all distance between point in the diagram. Also, consider points in the diagonal with the infinite - // multiplicity. - std::vector<std::vector<double> > distances(this->intervals.size()); - for (size_t i = 0; i != this->intervals.size(); ++i) { - std::vector<double> aa(this->intervals.size()); - std::fill(aa.begin(), aa.end(), 0); - distances[i] = aa; - } - std::vector<double> distances_from_diagonal(this->intervals.size()); - std::fill(distances_from_diagonal.begin(), distances_from_diagonal.end(), 0); - - for (size_t i = 0; i != this->intervals.size(); ++i) { - std::vector<double> distancesFromI; - for (size_t j = i + 1; j != this->intervals.size(); ++j) { - distancesFromI.push_back(compute_euclidean_distance(this->intervals[i], this->intervals[j])); - } - // also add a distance from this guy to diagonal: - double distanceToDiagonal = compute_euclidean_distance( - this->intervals[i], std::make_pair(0.5 * (this->intervals[i].first + this->intervals[i].second), - 0.5 * (this->intervals[i].first + this->intervals[i].second))); - distances_from_diagonal[i] = distanceToDiagonal; - - if (dbg) { - std::cerr << "Here are the distances form the point : [" << this->intervals[i].first << " , " - << this->intervals[i].second << "] in the diagram \n"; - for (size_t aa = 0; aa != distancesFromI.size(); ++aa) { - std::cerr << "To : " << i + aa << " : " << distancesFromI[aa] << " "; - } - std::cerr << std::endl; - getchar(); - } - - // filling in the distances matrix: - for (size_t j = i + 1; j != this->intervals.size(); ++j) { - distances[i][j] = distancesFromI[j - i - 1]; - distances[j][i] = distancesFromI[j - i - 1]; - } - } - if (dbg) { - std::cerr << "Here is the distance matrix : \n"; - for (size_t i = 0; i != distances.size(); ++i) { - for (size_t j = 0; j != distances.size(); ++j) { - std::cerr << distances[i][j] << " "; - } - std::cerr << std::endl; - } - std::cerr << std::endl << std::endl << "And here are the distances to the diagonal : " << std::endl; - for (size_t i = 0; i != distances_from_diagonal.size(); ++i) { - std::cerr << distances_from_diagonal[i] << " "; - } - std::cerr << std::endl << std::endl; - getchar(); - } - - for (size_t i = 0; i != this->intervals.size(); ++i) { - std::vector<double> distancesFromI = distances[i]; - distancesFromI.push_back(distances_from_diagonal[i]); - - // sort it: - std::sort(distancesFromI.begin(), distancesFromI.end(), std::greater<double>()); - - if (k > distancesFromI.size()) { - if (dbg) { - std::cerr << "There are not enough neighbors in your set. We set the result to plus infty \n"; - } - result.push_back(std::numeric_limits<double>::max()); - } else { - if (distances_from_diagonal[i] > distancesFromI[k]) { - if (dbg) { - std::cerr << "The k-th n.n. is on a diagonal. Therefore we set up a distance to diagonal \n"; - } - result.push_back(distances_from_diagonal[i]); - } else { - result.push_back(distancesFromI[k]); - } - } - } - std::sort(result.begin(), result.end(), std::greater<double>()); - result.resize(std::min(result.size(), where_to_cut)); - - return result; -} - -double Persistence_intervals::project_to_R(int number_of_function) const { - double result = 0; - - for (size_t i = 0; i != this->intervals.size(); ++i) { - result += - (this->intervals[i].second - this->intervals[i].first) * (this->intervals[i].second - this->intervals[i].first); - } - - return result; -} - -} // namespace Persistence_representations -} // namespace Gudhi - -#endif // PERSISTENCE_INTERVALS_H_ |