summaryrefslogtreecommitdiff
path: root/src/Gudhi_stat/include/gudhi/topological_process.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/Gudhi_stat/include/gudhi/topological_process.h')
-rw-r--r--src/Gudhi_stat/include/gudhi/topological_process.h417
1 files changed, 417 insertions, 0 deletions
diff --git a/src/Gudhi_stat/include/gudhi/topological_process.h b/src/Gudhi_stat/include/gudhi/topological_process.h
new file mode 100644
index 00000000..4a41c0a7
--- /dev/null
+++ b/src/Gudhi_stat/include/gudhi/topological_process.h
@@ -0,0 +1,417 @@
+/* This file is part of the Gudhi Library. The Gudhi library
+ * (Geometric Understanding in Higher Dimensions) is a generic C++
+ * library for computational topology.
+ *
+ * Author(s): Pawel Dlotko
+ *
+ * Copyright (C) 2015 INRIA (France)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef TOPOLOGICAL_PROCESS_H
+#define TOPOLOGICAL_PROCESS_H
+
+
+//concretizations
+#include <gudhi/persistence_representations/Vector_distances_in_diagram.h>
+#include <gudhi/persistence_representations/Persistence_landscape.h>
+#include <gudhi/persistence_representations/Persistence_landscape_on_grid.h>
+#include <gudhi/persistence_representations/Persistence_heat_maps.h>
+#include <vector>
+#include <limits>
+
+//extras
+#include <gudhi/common_gudhi_stat.h>
+
+namespace Gudhi
+{
+namespace Gudhi_stat
+{
+
+//over here we will need a few version of construct_representation_from_file procedure, since different representations may require different parameters. This is a procedure that in my
+//oppinion cannot be standarize, since construction of representation cannot. But, the remaining part of the code in my opinion is free from any details of representation.
+
+
+//the reason I am separating the process of getting the intervals from the process of consstructing the represnetation is the following: for soem representations, we may need to ahve the same
+//scale. To determine the scale, we need to know all the intervals before. So, we read the intervals first, then if needed, process them, to get the parametersof the representation,
+//and only then construct the representations.
+std::vector< std::vector< std::pair< double , double > > > read_persistence_pairs_from_file_with_names_of_files( const char* filename )
+{
+ bool dbg = false;
+ std::vector< std::vector< std::pair< double , double > > > result;
+
+ std::vector< std::string > files = readFileNames( filename );
+
+ std::cout << "Here are the filenames in the file : " << filename << std::endl;
+ for ( size_t i = 0 ; i != files.size() ; ++i )
+ {
+ std::cout << files[i] << std::endl;
+ }
+
+ for ( size_t i = 0 ; i != files.size() ; ++i )
+ {
+ std::vector< std::pair< double , double > > diag = read_standard_file( files[i].c_str() );
+ result.push_back( diag );
+ if ( dbg )
+ {
+ std::cerr << "Here is a diagram from a file : " << files[i].c_str() << std::endl;
+ for ( size_t aa = 0 ; aa != diag.size() ; ++aa )
+ {
+ std::cout << diag[aa].first << " " << diag[aa].second << std::endl;
+ }
+ getchar();
+ }
+ }
+ return result;
+}
+
+//When workign with time varying data, we tpically have a collection of files with intervals to produce one process. The intervals from all the files that constitute a single process can be read with
+//the read_persistence_pairs_from_file_with_names_of_files procedure.
+//But then, we also may need to read the persistence intervals of collection of proesses we may want to read the intervals first. This is what the procedre
+std::vector< std::vector< std::vector< std::pair< double , double > > > > read_persistence_pairs_from_files_with_names_of_files( const std::vector<const char*>& filenames )
+{
+ std::vector< std::vector< std::vector< std::pair< double , double > > > > result;
+ result.reserve( filenames.size() );
+ for ( size_t file_no = 0 ; file_no != filenames.size() ; ++file_no )
+ {
+ result.push_back( read_persistence_pairs_from_file_with_names_of_files( filenames[file_no] ) );
+ }
+ return result;
+}//read_persistence_pairs_from_files_with_names_of_files
+
+
+std::pair< std::pair< double,double > , std::pair< double,double > > find_x_and_y_ranges_of_intervals( const std::vector< std::vector< std::pair< double , double > > >& intervals_from_file )
+{
+ double min_x = std::numeric_limits< double >::max();
+ double max_x = -std::numeric_limits< double >::max();
+ double min_y = std::numeric_limits< double >::max();
+ double max_y = -std::numeric_limits< double >::max();
+ for ( size_t i = 0 ; i != intervals_from_file.size() ; ++i )
+ {
+ for ( size_t j = 0 ; j != intervals_from_file[i].size() ; ++j )
+ {
+ if ( min_x > intervals_from_file[i][j].first )min_x = intervals_from_file[i][j].first;
+ if ( max_x < intervals_from_file[i][j].first )max_x = intervals_from_file[i][j].first;
+
+ if ( min_y > intervals_from_file[i][j].second )min_y = intervals_from_file[i][j].second;
+ if ( max_y < intervals_from_file[i][j].second )max_y = intervals_from_file[i][j].second;
+ }
+ }
+ return std::make_pair( std::make_pair( min_x,max_x ) , std::make_pair( min_y,max_y ) );
+}//find_x_and_y_ranges_of_intervals
+
+
+std::pair< std::pair< double,double > , std::pair< double,double > > find_x_and_y_ranges_of_intervals( const std::vector< std::vector< std::vector< std::pair< double , double > > > >& intervals_from_files )
+{
+ double min_x = std::numeric_limits< double >::max();
+ double max_x = -std::numeric_limits< double >::max();
+ double min_y = std::numeric_limits< double >::max();
+ double max_y = -std::numeric_limits< double >::max();
+ for ( size_t i = 0 ; i != intervals_from_files.size() ; ++i )
+ {
+ std::pair< std::pair< double,double > , std::pair< double,double > > ranges = find_x_and_y_ranges_of_intervals( intervals_from_files[i] );
+ if ( min_x > ranges.first.first ) min_x = ranges.first.first;
+ if ( max_x < ranges.first.second ) max_x = ranges.first.second;
+ if ( min_y > ranges.second.first ) min_y = ranges.second.first;
+ if ( max_y < ranges.second.second ) max_y = ranges.second.second;
+ }
+ return std::make_pair( std::make_pair( min_x,max_x ) , std::make_pair( min_y,max_y ) );
+}
+
+
+template <typename Representation>
+std::vector< Representation* > construct_representation_from_file( const char* filename )
+{
+ std::vector< std::vector< std::pair< double , double > > > intervals_from_file = read_persistence_pairs_from_file_with_names_of_files( filename );
+ std::vector< Representation* > result( intervals_from_file.size() );
+ for ( size_t i = 0 ; i != intervals_from_file.size() ; ++i )
+ {
+ Representation* l = new Representation( intervals_from_file[i] );
+ result[i] = l;
+ }
+ return result;
+}
+
+//this one can be use for Persistence_intervals.h and Persistence_landscape.h
+template <typename Representation>
+std::vector< Representation* > construct_representation_from_file( const std::vector< std::vector< std::pair< double , double > > >& intervals_from_file)
+{
+
+ std::vector< Representation* > result( intervals_from_file.size() );
+ for ( size_t i = 0 ; i != intervals_from_file.size() ; ++i )
+ {
+ Representation* l = new Representation( intervals_from_file[i] );
+ result[i] = l;
+ }
+ return result;
+}
+
+//this one can be use for Persistence_heat_maps.h
+template <typename Representation>
+std::vector< Representation* > construct_representation_from_file( const std::vector< std::vector< std::pair< double , double > > >& intervals_from_file ,
+ std::vector< std::vector<double> > filter = create_Gaussian_filter(5,1),
+ bool erase_below_diagonal = false ,
+ size_t number_of_pixels = 1000 ,
+ double min_ = std::numeric_limits<double>::max() , double max_ = std::numeric_limits<double>::max() )
+{
+ std::vector< Representation* > result( intervals_from_file.size() );
+ for ( size_t i = 0 ; i != intervals_from_file.size() ; ++i )
+ {
+ Representation* l = new Representation( intervals_from_file[i] , filter , erase_below_diagonal , number_of_pixels , min_ , max_ );
+ result[i] = l;
+ }
+ return result;
+}
+
+//this one can be use for Persistence_landscape_on_grid.h
+template <typename Representation>
+std::vector< Representation* > construct_representation_from_file( const std::vector< std::vector< std::pair< double , double > > >& intervals_from_file,
+ double grid_min_ , double grid_max_ , size_t number_of_points_
+)
+{
+ std::vector< Representation* > result( intervals_from_file.size() );
+ for ( size_t i = 0 ; i != intervals_from_file.size() ; ++i )
+ {
+ Representation* l = new Representation( intervals_from_file[i] , grid_min_ , grid_max_ , number_of_points_ );
+ result[i] = l;
+ }
+ return result;
+}
+
+
+//this one can be use for Vector_distances_in_diagram.h
+template <typename Representation>
+std::vector< Representation* > construct_representation_from_file( const std::vector< std::vector< std::pair< double , double > > >& intervals_from_file,
+ size_t where_to_cut
+)
+{
+ std::vector< Representation* > result( intervals_from_file.size() );
+ for ( size_t i = 0 ; i != intervals_from_file.size() ; ++i )
+ {
+ Representation* l = new Representation( intervals_from_file[i] , where_to_cut );
+ result[i] = l;
+ }
+ return result;
+}
+
+
+template <typename Representation>
+class Topological_process
+{
+public:
+ Topological_process(){};
+ ~Topological_process()
+ {
+ for ( size_t i = 0 ; i != this->data.size() ; ++i )
+ {
+ delete this->data[i];
+ }
+ }
+
+ Topological_process( const std::vector< Representation* >& data_ ):data(data_){}
+ double distance( const Topological_process& second , double exponent = 1 )
+ {
+ if ( this->data.size() != second.data.size() )
+ {
+ throw "Incompatible lengths of topological processes, we cannot compute the distance in this case \n";
+ }
+ double result = 0;
+ for ( size_t i = 0 ; i != this->data.size() ; ++i )
+ {
+ result += this->data[i]->distance( *second.data[i] , exponent );
+ }
+ return result;
+ }
+
+ void compute_average( const std::vector< Topological_process* >& to_average )
+ {
+ //since we will substitute whatever data we have in this object with an average, we clear the data in this object first:
+ this->data.clear();
+ //then we need to check if all the topological processes in the vector to_average have the same length.
+ if ( to_average.size() == 0 )return;
+ for ( size_t i = 1 ; i != to_average.size() ; ++i )
+ {
+ if ( to_average[0]->data.size() != to_average[i]->data.size() )
+ {
+ throw "Incompatible lengths of topological processes, the averages cannot be computed \n";
+ }
+ }
+
+ this->data.reserve( to_average[0]->data.size() );
+
+ for ( size_t level = 0 ; level != to_average[0]->data.size() ; ++level )
+ {
+ //now we will be averaging the level level:
+ std::vector< Representation* > to_average_at_this_level;
+ to_average_at_this_level.reserve( to_average.size() );
+ for ( size_t i = 0 ; i != to_average.size() ; ++i )
+ {
+ to_average_at_this_level.push_back( to_average[i]->data[level] );
+ }
+ Representation* average_representation_on_this_level = new Representation;
+ average_representation_on_this_level->compute_average( to_average_at_this_level );
+ this->data.push_back( average_representation_on_this_level );
+ }
+ }
+
+ std::pair< double , double > get_x_range()const
+ {
+ double min_x = std::numeric_limits< double >::max();
+ double max_x = -std::numeric_limits< double >::max();
+ for ( size_t i = 0 ; i != this->data.size() ; ++i )
+ {
+ std::pair< double , double > xrange = this->data[i]->get_x_range();
+ if ( min_x > xrange.first )min_x = xrange.first;
+ if ( max_x < xrange.second )max_x = xrange.second;
+ }
+ return std::make_pair( min_x , max_x );
+ }
+
+ std::pair< double , double > get_y_range()const
+ {
+ double min_y = std::numeric_limits< double >::max();
+ double max_y = -std::numeric_limits< double >::max();
+ for ( size_t i = 0 ; i != this->data.size() ; ++i )
+ {
+ std::pair< double , double > yrange = this->data[i]->get_y_range();
+ if ( min_y > yrange.first )min_y = yrange.first;
+ if ( max_y < yrange.second )max_y = yrange.second;
+ }
+ return std::make_pair( min_y , max_y );
+ }
+
+ /**
+ * The procedure checks if the ranges of data are the same for all of them.
+ **/
+ bool are_the_data_aligned()const
+ {
+ if ( this->data.size() == 0 )return true;//empty collection is aligned
+ std::pair< double , double > x_range = this->data[0]->get_x_range();
+ std::pair< double , double > y_range = this->data[0]->get_y_range();
+ for ( size_t i = 1 ; i != this->data.size() ; ++i )
+ {
+ if ( (x_range != this->data[i]->get_x_range()) || (y_range != this->data[i]->get_y_range()) )
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+
+
+ //scalar products?
+ //confidence bounds?
+
+ void plot( const char* filename , size_t delay = 30 , double min_x = std::numeric_limits<double>::max() , double max_x = std::numeric_limits<double>::max() , double min_y = std::numeric_limits<double>::max() , double max_y = std::numeric_limits<double>::max() )
+ {
+ std::vector< std::string > filenames;
+ //over here we need to
+ for ( size_t i = 0 ; i != this->data.size() ; ++i )
+ {
+ std::stringstream ss;
+ ss << filename << "_" << i;
+ if ( ( min_x != max_x ) && ( min_y != max_y ) )
+ {
+ //in this case, we set up the uniform min and max values for pciture
+ //this->data[i]->plot( ss.str().c_str() , min_x , max_x , min_y , max_y );
+ }
+ else
+ {
+ //in this case, we DO NOT set up the uniform min and max values for pciture
+ this->data[i]->plot( ss.str().c_str() );
+ }
+ ss << "_GnuplotScript";
+ filenames.push_back( ss.str() );
+ }
+ //and now we have to call it somehow for all the files. We will create a script that will call all the other scripts and ceate a sequence of jpg/png files.
+
+
+ std::stringstream gif_file_name;
+ gif_file_name << filename << ".gif";
+ std::stringstream gif_gnuplot_script_file_name;
+ gif_gnuplot_script_file_name << filename << "_gif_gnuplot_script";
+
+ std::ofstream out;
+ out.open( gif_gnuplot_script_file_name.str().c_str() );
+ out << "set terminal gif animate delay " << delay << std::endl;
+ out << "set output '" << gif_file_name.str() << "'" << std::endl;
+ if ( min_x != max_x )
+ {
+ out << "set xrange [" << min_x << ":" << max_x << "]" << std::endl;
+ out << "set yrange [" << min_y << ":" << max_y << "]" << std::endl;
+ }
+
+ for ( size_t i = 0 ; i != filenames.size() ; ++i )
+ {
+ out << " load '" << filenames[i] << "'" << std::endl;
+ }
+ out.close();
+
+ std::cout << std::endl << std::endl << std::endl << "Open gnuplot terminal and type load '" << gif_gnuplot_script_file_name.str() << "' to create an animated gif \n";
+ }//plot
+
+ /**
+ * This procedure compute veliocity of the data in the topological process. The veliocity is devine as distance (of persistence information) over time. In this implementation we assume that
+ * the time points when the persistence information is sampled is equally distributed in time. Therefore that we may assume that time between two constitive persistence information is 1, in which
+ * case veliocity is just distance between two constitutitve diagrams.
+ * There is an obvious intuition behinf the veliocity: large veliocity means large changes in the topoogy.
+ **/
+ std::vector< double > compute_veliocity( double exponent = 1 )
+ {
+ std::vector< double > result;
+ result.reserve( this->data.size()-1 );
+ for ( size_t i = 0 ; i != this-data.size() - 1 ; ++i )
+ {
+ result.push_back( this->data[i]->distance( *this->data[i+1] , exponent ) );
+ }
+ return result;
+ }
+
+ /**
+ * This procedure compute veliocity of the data in the topological process. Accleration is defined as an increase of veliocity over time. In this implementation we assume that
+ * the time points when the persistence information is sampled is equally distributed in time. Therefore that we may assume that time between two constitive persistence information is 1, in which
+ * case acceleration is simply an increase of veliocity (the number may be negative).
+ * We have two versions of this procedure:
+ * The first one compute the acceleration based on the raw data (computations of veliocity are need to be done first, and later the informationa about veliocity is lost).
+ * The second one takes as an input the vector of veliocities, and return the vector of acceleration.
+ * There is an obvious intuition behid the acceleration: large (up to absolute value) value of acceleration implies large changes in the topology.
+ **/
+ std::vector< double > compute_acceleration( const std::vector< double >& veliocity )
+ {
+ std::vector< double > acceleration;
+ acceleration.reserve( veliocity.size() - 1 );
+ for ( size_t i = 0 ; i != veliocity.size()-1 ; ++i )
+ {
+ acceleration.push_back( veliocity[i+1]-veliocity[i] );
+ }
+ return acceleration;
+ }
+ std::vector< double > compute_acceleration( double exponent = 1 )
+ {
+ return this->compute_acceleration( this->compute_veliocity( exponent ) );
+ }
+
+
+ std::vector< Representation* > get_data(){return this->data;}
+private:
+ std::vector< Representation* > data;
+};
+
+
+
+}//Gudhi_stat
+}//Gudhi
+
+#endif