summaryrefslogtreecommitdiff
path: root/src/Gudhi_stat/include/gudhi/topological_process.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/Gudhi_stat/include/gudhi/topological_process.h')
-rw-r--r--src/Gudhi_stat/include/gudhi/topological_process.h420
1 files changed, 0 insertions, 420 deletions
diff --git a/src/Gudhi_stat/include/gudhi/topological_process.h b/src/Gudhi_stat/include/gudhi/topological_process.h
deleted file mode 100644
index ee8478ed..00000000
--- a/src/Gudhi_stat/include/gudhi/topological_process.h
+++ /dev/null
@@ -1,420 +0,0 @@
-/* This file is part of the Gudhi Library. The Gudhi library
- * (Geometric Understanding in Higher Dimensions) is a generic C++
- * library for computational topology.
- *
- * Author(s): Pawel Dlotko
- *
- * Copyright (C) 2015 INRIA (France)
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef TOPOLOGICAL_PROCESS_H
-#define TOPOLOGICAL_PROCESS_H
-
-
-//concretizations
-#include <gudhi/Vector_distances_in_diagram.h>
-#include <gudhi/Persistence_landscape.h>
-#include <gudhi/Persistence_landscape_on_grid.h>
-#include <gudhi/Persistence_heat_maps.h>
-#include <vector>
-#include <limits>
-
-//extras
-#include <gudhi/common_persistence_representations.h>
-
-namespace Gudhi
-{
-namespace Persistence_representations
-{
-
-
-//TODO, change reading procedures so that they also accept the value of dimension.
-
-//over here we will need a few version of construct_representation_from_file procedure, since different representations may require different parameters. This is a procedure that in my
-//oppinion cannot be standarize, since construction of representation cannot. But, the remaining part of the code in my opinion is free from any details of representation.
-
-
-//the reason I am separating the process of getting the intervals from the process of consstructing the represnetation is the following: for soem representations, we may need to ahve the same
-//scale. To determine the scale, we need to know all the intervals before. So, we read the intervals first, then if needed, process them, to get the parametersof the representation,
-//and only then construct the representations.
-std::vector< std::vector< std::pair< double , double > > > read_persistence_pairs_from_file_with_names_of_files( const char* filename )
-{
- bool dbg = false;
- std::vector< std::vector< std::pair< double , double > > > result;
-
- std::vector< std::string > files = readFileNames( filename );
-
- std::cout << "Here are the filenames in the file : " << filename << std::endl;
- for ( size_t i = 0 ; i != files.size() ; ++i )
- {
- std::cout << files[i] << std::endl;
- }
-
- for ( size_t i = 0 ; i != files.size() ; ++i )
- {
- std::vector< std::pair< double , double > > diag = read_persistence_intervals_in_one_dimension_from_file( files[i].c_str() );
- result.push_back( diag );
- if ( dbg )
- {
- std::cerr << "Here is a diagram from a file : " << files[i].c_str() << std::endl;
- for ( size_t aa = 0 ; aa != diag.size() ; ++aa )
- {
- std::cout << diag[aa].first << " " << diag[aa].second << std::endl;
- }
- getchar();
- }
- }
- return result;
-}
-
-//When workign with time varying data, we tpically have a collection of files with intervals to produce one process. The intervals from all the files that constitute a single process can be read with
-//the read_persistence_pairs_from_file_with_names_of_files procedure.
-//But then, we also may need to read the persistence intervals of collection of proesses we may want to read the intervals first. This is what the procedre
-std::vector< std::vector< std::vector< std::pair< double , double > > > > read_persistence_pairs_from_files_with_names_of_files( const std::vector<const char*>& filenames )
-{
- std::vector< std::vector< std::vector< std::pair< double , double > > > > result;
- result.reserve( filenames.size() );
- for ( size_t file_no = 0 ; file_no != filenames.size() ; ++file_no )
- {
- result.push_back( read_persistence_pairs_from_file_with_names_of_files( filenames[file_no] ) );
- }
- return result;
-}//read_persistence_pairs_from_files_with_names_of_files
-
-
-std::pair< std::pair< double,double > , std::pair< double,double > > find_x_and_y_ranges_of_intervals( const std::vector< std::vector< std::pair< double , double > > >& intervals_from_file )
-{
- double min_x = std::numeric_limits< double >::max();
- double max_x = -std::numeric_limits< double >::max();
- double min_y = std::numeric_limits< double >::max();
- double max_y = -std::numeric_limits< double >::max();
- for ( size_t i = 0 ; i != intervals_from_file.size() ; ++i )
- {
- for ( size_t j = 0 ; j != intervals_from_file[i].size() ; ++j )
- {
- if ( min_x > intervals_from_file[i][j].first )min_x = intervals_from_file[i][j].first;
- if ( max_x < intervals_from_file[i][j].first )max_x = intervals_from_file[i][j].first;
-
- if ( min_y > intervals_from_file[i][j].second )min_y = intervals_from_file[i][j].second;
- if ( max_y < intervals_from_file[i][j].second )max_y = intervals_from_file[i][j].second;
- }
- }
- return std::make_pair( std::make_pair( min_x,max_x ) , std::make_pair( min_y,max_y ) );
-}//find_x_and_y_ranges_of_intervals
-
-
-std::pair< std::pair< double,double > , std::pair< double,double > > find_x_and_y_ranges_of_intervals( const std::vector< std::vector< std::vector< std::pair< double , double > > > >& intervals_from_files )
-{
- double min_x = std::numeric_limits< double >::max();
- double max_x = -std::numeric_limits< double >::max();
- double min_y = std::numeric_limits< double >::max();
- double max_y = -std::numeric_limits< double >::max();
- for ( size_t i = 0 ; i != intervals_from_files.size() ; ++i )
- {
- std::pair< std::pair< double,double > , std::pair< double,double > > ranges = find_x_and_y_ranges_of_intervals( intervals_from_files[i] );
- if ( min_x > ranges.first.first ) min_x = ranges.first.first;
- if ( max_x < ranges.first.second ) max_x = ranges.first.second;
- if ( min_y > ranges.second.first ) min_y = ranges.second.first;
- if ( max_y < ranges.second.second ) max_y = ranges.second.second;
- }
- return std::make_pair( std::make_pair( min_x,max_x ) , std::make_pair( min_y,max_y ) );
-}
-
-
-template <typename Representation>
-std::vector< Representation* > construct_representation_from_file( const char* filename )
-{
- std::vector< std::vector< std::pair< double , double > > > intervals_from_file = read_persistence_pairs_from_file_with_names_of_files( filename );
- std::vector< Representation* > result( intervals_from_file.size() );
- for ( size_t i = 0 ; i != intervals_from_file.size() ; ++i )
- {
- Representation* l = new Representation( intervals_from_file[i] );
- result[i] = l;
- }
- return result;
-}
-
-//this one can be use for Persistence_intervals.h and Persistence_landscape.h
-template <typename Representation>
-std::vector< Representation* > construct_representation_from_file( const std::vector< std::vector< std::pair< double , double > > >& intervals_from_file)
-{
-
- std::vector< Representation* > result( intervals_from_file.size() );
- for ( size_t i = 0 ; i != intervals_from_file.size() ; ++i )
- {
- Representation* l = new Representation( intervals_from_file[i] );
- result[i] = l;
- }
- return result;
-}
-
-//this one can be use for Persistence_heat_maps.h
-template <typename Representation>
-std::vector< Representation* > construct_representation_from_file( const std::vector< std::vector< std::pair< double , double > > >& intervals_from_file ,
- std::vector< std::vector<double> > filter = create_Gaussian_filter(5,1),
- bool erase_below_diagonal = false ,
- size_t number_of_pixels = 1000 ,
- double min_ = std::numeric_limits<double>::max() , double max_ = std::numeric_limits<double>::max() )
-{
- std::vector< Representation* > result( intervals_from_file.size() );
- for ( size_t i = 0 ; i != intervals_from_file.size() ; ++i )
- {
- Representation* l = new Representation( intervals_from_file[i] , filter , erase_below_diagonal , number_of_pixels , min_ , max_ );
- result[i] = l;
- }
- return result;
-}
-
-//this one can be use for Persistence_landscape_on_grid.h
-template <typename Representation>
-std::vector< Representation* > construct_representation_from_file( const std::vector< std::vector< std::pair< double , double > > >& intervals_from_file,
- double grid_min_ , double grid_max_ , size_t number_of_points_
-)
-{
- std::vector< Representation* > result( intervals_from_file.size() );
- for ( size_t i = 0 ; i != intervals_from_file.size() ; ++i )
- {
- Representation* l = new Representation( intervals_from_file[i] , grid_min_ , grid_max_ , number_of_points_ );
- result[i] = l;
- }
- return result;
-}
-
-
-//this one can be use for Vector_distances_in_diagram.h
-template <typename Representation>
-std::vector< Representation* > construct_representation_from_file( const std::vector< std::vector< std::pair< double , double > > >& intervals_from_file,
- size_t where_to_cut
-)
-{
- std::vector< Representation* > result( intervals_from_file.size() );
- for ( size_t i = 0 ; i != intervals_from_file.size() ; ++i )
- {
- Representation* l = new Representation( intervals_from_file[i] , where_to_cut );
- result[i] = l;
- }
- return result;
-}
-
-
-template <typename Representation>
-class Topological_process
-{
-public:
- Topological_process(){};
- ~Topological_process()
- {
- for ( size_t i = 0 ; i != this->data.size() ; ++i )
- {
- delete this->data[i];
- }
- }
-
- Topological_process( const std::vector< Representation* >& data_ ):data(data_){}
- double distance( const Topological_process& second , double exponent = 1 )
- {
- if ( this->data.size() != second.data.size() )
- {
- throw "Incompatible lengths of topological processes, we cannot compute the distance in this case \n";
- }
- double result = 0;
- for ( size_t i = 0 ; i != this->data.size() ; ++i )
- {
- result += this->data[i]->distance( *second.data[i] , exponent );
- }
- return result;
- }
-
- void compute_average( const std::vector< Topological_process* >& to_average )
- {
- //since we will substitute whatever data we have in this object with an average, we clear the data in this object first:
- this->data.clear();
- //then we need to check if all the topological processes in the vector to_average have the same length.
- if ( to_average.size() == 0 )return;
- for ( size_t i = 1 ; i != to_average.size() ; ++i )
- {
- if ( to_average[0]->data.size() != to_average[i]->data.size() )
- {
- throw "Incompatible lengths of topological processes, the averages cannot be computed \n";
- }
- }
-
- this->data.reserve( to_average[0]->data.size() );
-
- for ( size_t level = 0 ; level != to_average[0]->data.size() ; ++level )
- {
- //now we will be averaging the level level:
- std::vector< Representation* > to_average_at_this_level;
- to_average_at_this_level.reserve( to_average.size() );
- for ( size_t i = 0 ; i != to_average.size() ; ++i )
- {
- to_average_at_this_level.push_back( to_average[i]->data[level] );
- }
- Representation* average_representation_on_this_level = new Representation;
- average_representation_on_this_level->compute_average( to_average_at_this_level );
- this->data.push_back( average_representation_on_this_level );
- }
- }
-
- std::pair< double , double > get_x_range()const
- {
- double min_x = std::numeric_limits< double >::max();
- double max_x = -std::numeric_limits< double >::max();
- for ( size_t i = 0 ; i != this->data.size() ; ++i )
- {
- std::pair< double , double > xrange = this->data[i]->get_x_range();
- if ( min_x > xrange.first )min_x = xrange.first;
- if ( max_x < xrange.second )max_x = xrange.second;
- }
- return std::make_pair( min_x , max_x );
- }
-
- std::pair< double , double > get_y_range()const
- {
- double min_y = std::numeric_limits< double >::max();
- double max_y = -std::numeric_limits< double >::max();
- for ( size_t i = 0 ; i != this->data.size() ; ++i )
- {
- std::pair< double , double > yrange = this->data[i]->get_y_range();
- if ( min_y > yrange.first )min_y = yrange.first;
- if ( max_y < yrange.second )max_y = yrange.second;
- }
- return std::make_pair( min_y , max_y );
- }
-
- /**
- * The procedure checks if the ranges of data are the same for all of them.
- **/
- bool are_the_data_aligned()const
- {
- if ( this->data.size() == 0 )return true;//empty collection is aligned
- std::pair< double , double > x_range = this->data[0]->get_x_range();
- std::pair< double , double > y_range = this->data[0]->get_y_range();
- for ( size_t i = 1 ; i != this->data.size() ; ++i )
- {
- if ( (x_range != this->data[i]->get_x_range()) || (y_range != this->data[i]->get_y_range()) )
- {
- return false;
- }
- }
- return true;
- }
-
-
- //scalar products?
- //confidence bounds?
-
- void plot( const char* filename , size_t delay = 30 , double min_x = std::numeric_limits<double>::max() , double max_x = std::numeric_limits<double>::max() , double min_y = std::numeric_limits<double>::max() , double max_y = std::numeric_limits<double>::max() )
- {
- std::vector< std::string > filenames;
- //over here we need to
- for ( size_t i = 0 ; i != this->data.size() ; ++i )
- {
- std::stringstream ss;
- ss << filename << "_" << i;
- if ( ( min_x != max_x ) && ( min_y != max_y ) )
- {
- //in this case, we set up the uniform min and max values for pciture
- //this->data[i]->plot( ss.str().c_str() , min_x , max_x , min_y , max_y );
- }
- else
- {
- //in this case, we DO NOT set up the uniform min and max values for pciture
- this->data[i]->plot( ss.str().c_str() );
- }
- ss << "_GnuplotScript";
- filenames.push_back( ss.str() );
- }
- //and now we have to call it somehow for all the files. We will create a script that will call all the other scripts and ceate a sequence of jpg/png files.
-
-
- std::stringstream gif_file_name;
- gif_file_name << filename << ".gif";
- std::stringstream gif_gnuplot_script_file_name;
- gif_gnuplot_script_file_name << filename << "_gif_gnuplot_script";
-
- std::ofstream out;
- out.open( gif_gnuplot_script_file_name.str().c_str() );
- out << "set terminal gif animate delay " << delay << std::endl;
- out << "set output '" << gif_file_name.str() << "'" << std::endl;
- if ( min_x != max_x )
- {
- out << "set xrange [" << min_x << ":" << max_x << "]" << std::endl;
- out << "set yrange [" << min_y << ":" << max_y << "]" << std::endl;
- }
-
- for ( size_t i = 0 ; i != filenames.size() ; ++i )
- {
- out << " load '" << filenames[i] << "'" << std::endl;
- }
- out.close();
-
- std::cout << std::endl << std::endl << std::endl << "Open gnuplot terminal and type load '" << gif_gnuplot_script_file_name.str() << "' to create an animated gif \n";
- }//plot
-
- /**
- * This procedure compute veliocity of the data in the topological process. The veliocity is devine as distance (of persistence information) over time. In this implementation we assume that
- * the time points when the persistence information is sampled is equally distributed in time. Therefore that we may assume that time between two constitive persistence information is 1, in which
- * case veliocity is just distance between two constitutitve diagrams.
- * There is an obvious intuition behinf the veliocity: large veliocity means large changes in the topoogy.
- **/
- std::vector< double > compute_veliocity( double exponent = 1 )
- {
- std::vector< double > result;
- result.reserve( this->data.size()-1 );
- for ( size_t i = 0 ; i != this-data.size() - 1 ; ++i )
- {
- result.push_back( this->data[i]->distance( *this->data[i+1] , exponent ) );
- }
- return result;
- }
-
- /**
- * This procedure compute veliocity of the data in the topological process. Accleration is defined as an increase of veliocity over time. In this implementation we assume that
- * the time points when the persistence information is sampled is equally distributed in time. Therefore that we may assume that time between two constitive persistence information is 1, in which
- * case acceleration is simply an increase of veliocity (the number may be negative).
- * We have two versions of this procedure:
- * The first one compute the acceleration based on the raw data (computations of veliocity are need to be done first, and later the informationa about veliocity is lost).
- * The second one takes as an input the vector of veliocities, and return the vector of acceleration.
- * There is an obvious intuition behid the acceleration: large (up to absolute value) value of acceleration implies large changes in the topology.
- **/
- std::vector< double > compute_acceleration( const std::vector< double >& veliocity )
- {
- std::vector< double > acceleration;
- acceleration.reserve( veliocity.size() - 1 );
- for ( size_t i = 0 ; i != veliocity.size()-1 ; ++i )
- {
- acceleration.push_back( veliocity[i+1]-veliocity[i] );
- }
- return acceleration;
- }
- std::vector< double > compute_acceleration( double exponent = 1 )
- {
- return this->compute_acceleration( this->compute_veliocity( exponent ) );
- }
-
-
- std::vector< Representation* > get_data(){return this->data;}
-private:
- std::vector< Representation* > data;
-};
-
-
-
-}//Gudhi_stat
-}//Gudhi
-
-#endif