diff options
Diffstat (limited to 'src/Gudhi_stat/include/gudhi/topological_process.h')
-rw-r--r-- | src/Gudhi_stat/include/gudhi/topological_process.h | 417 |
1 files changed, 417 insertions, 0 deletions
diff --git a/src/Gudhi_stat/include/gudhi/topological_process.h b/src/Gudhi_stat/include/gudhi/topological_process.h new file mode 100644 index 00000000..4a41c0a7 --- /dev/null +++ b/src/Gudhi_stat/include/gudhi/topological_process.h @@ -0,0 +1,417 @@ +/* This file is part of the Gudhi Library. The Gudhi library + * (Geometric Understanding in Higher Dimensions) is a generic C++ + * library for computational topology. + * + * Author(s): Pawel Dlotko + * + * Copyright (C) 2015 INRIA (France) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef TOPOLOGICAL_PROCESS_H +#define TOPOLOGICAL_PROCESS_H + + +//concretizations +#include <gudhi/persistence_representations/Vector_distances_in_diagram.h> +#include <gudhi/persistence_representations/Persistence_landscape.h> +#include <gudhi/persistence_representations/Persistence_landscape_on_grid.h> +#include <gudhi/persistence_representations/Persistence_heat_maps.h> +#include <vector> +#include <limits> + +//extras +#include <gudhi/common_gudhi_stat.h> + +namespace Gudhi +{ +namespace Gudhi_stat +{ + +//over here we will need a few version of construct_representation_from_file procedure, since different representations may require different parameters. This is a procedure that in my +//oppinion cannot be standarize, since construction of representation cannot. But, the remaining part of the code in my opinion is free from any details of representation. + + +//the reason I am separating the process of getting the intervals from the process of consstructing the represnetation is the following: for soem representations, we may need to ahve the same +//scale. To determine the scale, we need to know all the intervals before. So, we read the intervals first, then if needed, process them, to get the parametersof the representation, +//and only then construct the representations. +std::vector< std::vector< std::pair< double , double > > > read_persistence_pairs_from_file_with_names_of_files( const char* filename ) +{ + bool dbg = false; + std::vector< std::vector< std::pair< double , double > > > result; + + std::vector< std::string > files = readFileNames( filename ); + + std::cout << "Here are the filenames in the file : " << filename << std::endl; + for ( size_t i = 0 ; i != files.size() ; ++i ) + { + std::cout << files[i] << std::endl; + } + + for ( size_t i = 0 ; i != files.size() ; ++i ) + { + std::vector< std::pair< double , double > > diag = read_standard_file( files[i].c_str() ); + result.push_back( diag ); + if ( dbg ) + { + std::cerr << "Here is a diagram from a file : " << files[i].c_str() << std::endl; + for ( size_t aa = 0 ; aa != diag.size() ; ++aa ) + { + std::cout << diag[aa].first << " " << diag[aa].second << std::endl; + } + getchar(); + } + } + return result; +} + +//When workign with time varying data, we tpically have a collection of files with intervals to produce one process. The intervals from all the files that constitute a single process can be read with +//the read_persistence_pairs_from_file_with_names_of_files procedure. +//But then, we also may need to read the persistence intervals of collection of proesses we may want to read the intervals first. This is what the procedre +std::vector< std::vector< std::vector< std::pair< double , double > > > > read_persistence_pairs_from_files_with_names_of_files( const std::vector<const char*>& filenames ) +{ + std::vector< std::vector< std::vector< std::pair< double , double > > > > result; + result.reserve( filenames.size() ); + for ( size_t file_no = 0 ; file_no != filenames.size() ; ++file_no ) + { + result.push_back( read_persistence_pairs_from_file_with_names_of_files( filenames[file_no] ) ); + } + return result; +}//read_persistence_pairs_from_files_with_names_of_files + + +std::pair< std::pair< double,double > , std::pair< double,double > > find_x_and_y_ranges_of_intervals( const std::vector< std::vector< std::pair< double , double > > >& intervals_from_file ) +{ + double min_x = std::numeric_limits< double >::max(); + double max_x = -std::numeric_limits< double >::max(); + double min_y = std::numeric_limits< double >::max(); + double max_y = -std::numeric_limits< double >::max(); + for ( size_t i = 0 ; i != intervals_from_file.size() ; ++i ) + { + for ( size_t j = 0 ; j != intervals_from_file[i].size() ; ++j ) + { + if ( min_x > intervals_from_file[i][j].first )min_x = intervals_from_file[i][j].first; + if ( max_x < intervals_from_file[i][j].first )max_x = intervals_from_file[i][j].first; + + if ( min_y > intervals_from_file[i][j].second )min_y = intervals_from_file[i][j].second; + if ( max_y < intervals_from_file[i][j].second )max_y = intervals_from_file[i][j].second; + } + } + return std::make_pair( std::make_pair( min_x,max_x ) , std::make_pair( min_y,max_y ) ); +}//find_x_and_y_ranges_of_intervals + + +std::pair< std::pair< double,double > , std::pair< double,double > > find_x_and_y_ranges_of_intervals( const std::vector< std::vector< std::vector< std::pair< double , double > > > >& intervals_from_files ) +{ + double min_x = std::numeric_limits< double >::max(); + double max_x = -std::numeric_limits< double >::max(); + double min_y = std::numeric_limits< double >::max(); + double max_y = -std::numeric_limits< double >::max(); + for ( size_t i = 0 ; i != intervals_from_files.size() ; ++i ) + { + std::pair< std::pair< double,double > , std::pair< double,double > > ranges = find_x_and_y_ranges_of_intervals( intervals_from_files[i] ); + if ( min_x > ranges.first.first ) min_x = ranges.first.first; + if ( max_x < ranges.first.second ) max_x = ranges.first.second; + if ( min_y > ranges.second.first ) min_y = ranges.second.first; + if ( max_y < ranges.second.second ) max_y = ranges.second.second; + } + return std::make_pair( std::make_pair( min_x,max_x ) , std::make_pair( min_y,max_y ) ); +} + + +template <typename Representation> +std::vector< Representation* > construct_representation_from_file( const char* filename ) +{ + std::vector< std::vector< std::pair< double , double > > > intervals_from_file = read_persistence_pairs_from_file_with_names_of_files( filename ); + std::vector< Representation* > result( intervals_from_file.size() ); + for ( size_t i = 0 ; i != intervals_from_file.size() ; ++i ) + { + Representation* l = new Representation( intervals_from_file[i] ); + result[i] = l; + } + return result; +} + +//this one can be use for Persistence_intervals.h and Persistence_landscape.h +template <typename Representation> +std::vector< Representation* > construct_representation_from_file( const std::vector< std::vector< std::pair< double , double > > >& intervals_from_file) +{ + + std::vector< Representation* > result( intervals_from_file.size() ); + for ( size_t i = 0 ; i != intervals_from_file.size() ; ++i ) + { + Representation* l = new Representation( intervals_from_file[i] ); + result[i] = l; + } + return result; +} + +//this one can be use for Persistence_heat_maps.h +template <typename Representation> +std::vector< Representation* > construct_representation_from_file( const std::vector< std::vector< std::pair< double , double > > >& intervals_from_file , + std::vector< std::vector<double> > filter = create_Gaussian_filter(5,1), + bool erase_below_diagonal = false , + size_t number_of_pixels = 1000 , + double min_ = std::numeric_limits<double>::max() , double max_ = std::numeric_limits<double>::max() ) +{ + std::vector< Representation* > result( intervals_from_file.size() ); + for ( size_t i = 0 ; i != intervals_from_file.size() ; ++i ) + { + Representation* l = new Representation( intervals_from_file[i] , filter , erase_below_diagonal , number_of_pixels , min_ , max_ ); + result[i] = l; + } + return result; +} + +//this one can be use for Persistence_landscape_on_grid.h +template <typename Representation> +std::vector< Representation* > construct_representation_from_file( const std::vector< std::vector< std::pair< double , double > > >& intervals_from_file, + double grid_min_ , double grid_max_ , size_t number_of_points_ +) +{ + std::vector< Representation* > result( intervals_from_file.size() ); + for ( size_t i = 0 ; i != intervals_from_file.size() ; ++i ) + { + Representation* l = new Representation( intervals_from_file[i] , grid_min_ , grid_max_ , number_of_points_ ); + result[i] = l; + } + return result; +} + + +//this one can be use for Vector_distances_in_diagram.h +template <typename Representation> +std::vector< Representation* > construct_representation_from_file( const std::vector< std::vector< std::pair< double , double > > >& intervals_from_file, + size_t where_to_cut +) +{ + std::vector< Representation* > result( intervals_from_file.size() ); + for ( size_t i = 0 ; i != intervals_from_file.size() ; ++i ) + { + Representation* l = new Representation( intervals_from_file[i] , where_to_cut ); + result[i] = l; + } + return result; +} + + +template <typename Representation> +class Topological_process +{ +public: + Topological_process(){}; + ~Topological_process() + { + for ( size_t i = 0 ; i != this->data.size() ; ++i ) + { + delete this->data[i]; + } + } + + Topological_process( const std::vector< Representation* >& data_ ):data(data_){} + double distance( const Topological_process& second , double exponent = 1 ) + { + if ( this->data.size() != second.data.size() ) + { + throw "Incompatible lengths of topological processes, we cannot compute the distance in this case \n"; + } + double result = 0; + for ( size_t i = 0 ; i != this->data.size() ; ++i ) + { + result += this->data[i]->distance( *second.data[i] , exponent ); + } + return result; + } + + void compute_average( const std::vector< Topological_process* >& to_average ) + { + //since we will substitute whatever data we have in this object with an average, we clear the data in this object first: + this->data.clear(); + //then we need to check if all the topological processes in the vector to_average have the same length. + if ( to_average.size() == 0 )return; + for ( size_t i = 1 ; i != to_average.size() ; ++i ) + { + if ( to_average[0]->data.size() != to_average[i]->data.size() ) + { + throw "Incompatible lengths of topological processes, the averages cannot be computed \n"; + } + } + + this->data.reserve( to_average[0]->data.size() ); + + for ( size_t level = 0 ; level != to_average[0]->data.size() ; ++level ) + { + //now we will be averaging the level level: + std::vector< Representation* > to_average_at_this_level; + to_average_at_this_level.reserve( to_average.size() ); + for ( size_t i = 0 ; i != to_average.size() ; ++i ) + { + to_average_at_this_level.push_back( to_average[i]->data[level] ); + } + Representation* average_representation_on_this_level = new Representation; + average_representation_on_this_level->compute_average( to_average_at_this_level ); + this->data.push_back( average_representation_on_this_level ); + } + } + + std::pair< double , double > get_x_range()const + { + double min_x = std::numeric_limits< double >::max(); + double max_x = -std::numeric_limits< double >::max(); + for ( size_t i = 0 ; i != this->data.size() ; ++i ) + { + std::pair< double , double > xrange = this->data[i]->get_x_range(); + if ( min_x > xrange.first )min_x = xrange.first; + if ( max_x < xrange.second )max_x = xrange.second; + } + return std::make_pair( min_x , max_x ); + } + + std::pair< double , double > get_y_range()const + { + double min_y = std::numeric_limits< double >::max(); + double max_y = -std::numeric_limits< double >::max(); + for ( size_t i = 0 ; i != this->data.size() ; ++i ) + { + std::pair< double , double > yrange = this->data[i]->get_y_range(); + if ( min_y > yrange.first )min_y = yrange.first; + if ( max_y < yrange.second )max_y = yrange.second; + } + return std::make_pair( min_y , max_y ); + } + + /** + * The procedure checks if the ranges of data are the same for all of them. + **/ + bool are_the_data_aligned()const + { + if ( this->data.size() == 0 )return true;//empty collection is aligned + std::pair< double , double > x_range = this->data[0]->get_x_range(); + std::pair< double , double > y_range = this->data[0]->get_y_range(); + for ( size_t i = 1 ; i != this->data.size() ; ++i ) + { + if ( (x_range != this->data[i]->get_x_range()) || (y_range != this->data[i]->get_y_range()) ) + { + return false; + } + } + return true; + } + + + //scalar products? + //confidence bounds? + + void plot( const char* filename , size_t delay = 30 , double min_x = std::numeric_limits<double>::max() , double max_x = std::numeric_limits<double>::max() , double min_y = std::numeric_limits<double>::max() , double max_y = std::numeric_limits<double>::max() ) + { + std::vector< std::string > filenames; + //over here we need to + for ( size_t i = 0 ; i != this->data.size() ; ++i ) + { + std::stringstream ss; + ss << filename << "_" << i; + if ( ( min_x != max_x ) && ( min_y != max_y ) ) + { + //in this case, we set up the uniform min and max values for pciture + //this->data[i]->plot( ss.str().c_str() , min_x , max_x , min_y , max_y ); + } + else + { + //in this case, we DO NOT set up the uniform min and max values for pciture + this->data[i]->plot( ss.str().c_str() ); + } + ss << "_GnuplotScript"; + filenames.push_back( ss.str() ); + } + //and now we have to call it somehow for all the files. We will create a script that will call all the other scripts and ceate a sequence of jpg/png files. + + + std::stringstream gif_file_name; + gif_file_name << filename << ".gif"; + std::stringstream gif_gnuplot_script_file_name; + gif_gnuplot_script_file_name << filename << "_gif_gnuplot_script"; + + std::ofstream out; + out.open( gif_gnuplot_script_file_name.str().c_str() ); + out << "set terminal gif animate delay " << delay << std::endl; + out << "set output '" << gif_file_name.str() << "'" << std::endl; + if ( min_x != max_x ) + { + out << "set xrange [" << min_x << ":" << max_x << "]" << std::endl; + out << "set yrange [" << min_y << ":" << max_y << "]" << std::endl; + } + + for ( size_t i = 0 ; i != filenames.size() ; ++i ) + { + out << " load '" << filenames[i] << "'" << std::endl; + } + out.close(); + + std::cout << std::endl << std::endl << std::endl << "Open gnuplot terminal and type load '" << gif_gnuplot_script_file_name.str() << "' to create an animated gif \n"; + }//plot + + /** + * This procedure compute veliocity of the data in the topological process. The veliocity is devine as distance (of persistence information) over time. In this implementation we assume that + * the time points when the persistence information is sampled is equally distributed in time. Therefore that we may assume that time between two constitive persistence information is 1, in which + * case veliocity is just distance between two constitutitve diagrams. + * There is an obvious intuition behinf the veliocity: large veliocity means large changes in the topoogy. + **/ + std::vector< double > compute_veliocity( double exponent = 1 ) + { + std::vector< double > result; + result.reserve( this->data.size()-1 ); + for ( size_t i = 0 ; i != this-data.size() - 1 ; ++i ) + { + result.push_back( this->data[i]->distance( *this->data[i+1] , exponent ) ); + } + return result; + } + + /** + * This procedure compute veliocity of the data in the topological process. Accleration is defined as an increase of veliocity over time. In this implementation we assume that + * the time points when the persistence information is sampled is equally distributed in time. Therefore that we may assume that time between two constitive persistence information is 1, in which + * case acceleration is simply an increase of veliocity (the number may be negative). + * We have two versions of this procedure: + * The first one compute the acceleration based on the raw data (computations of veliocity are need to be done first, and later the informationa about veliocity is lost). + * The second one takes as an input the vector of veliocities, and return the vector of acceleration. + * There is an obvious intuition behid the acceleration: large (up to absolute value) value of acceleration implies large changes in the topology. + **/ + std::vector< double > compute_acceleration( const std::vector< double >& veliocity ) + { + std::vector< double > acceleration; + acceleration.reserve( veliocity.size() - 1 ); + for ( size_t i = 0 ; i != veliocity.size()-1 ; ++i ) + { + acceleration.push_back( veliocity[i+1]-veliocity[i] ); + } + return acceleration; + } + std::vector< double > compute_acceleration( double exponent = 1 ) + { + return this->compute_acceleration( this->compute_veliocity( exponent ) ); + } + + + std::vector< Representation* > get_data(){return this->data;} +private: + std::vector< Representation* > data; +}; + + + +}//Gudhi_stat +}//Gudhi + +#endif |