/* This file is part of the Gudhi Library. The Gudhi library
* (Geometric Understanding in Higher Dimensions) is a generic C++
* library for computational topology.
*
* Author(s): Pawel Dlotko
*
* Copyright (C) 2015 INRIA (France)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
#ifndef TOPOLOGICAL_PROCESS_H
#define TOPOLOGICAL_PROCESS_H
//concretizations
#include
#include
#include
#include
#include
#include
//extras
#include
namespace Gudhi
{
namespace Gudhi_stat
{
//over here we will need a few version of construct_representation_from_file procedure, since different representations may require different parameters. This is a procedure that in my
//oppinion cannot be standarize, since construction of representation cannot. But, the remaining part of the code in my opinion is free from any details of representation.
//the reason I am separating the process of getting the intervals from the process of consstructing the represnetation is the following: for soem representations, we may need to ahve the same
//scale. To determine the scale, we need to know all the intervals before. So, we read the intervals first, then if needed, process them, to get the parametersof the representation,
//and only then construct the representations.
std::vector< std::vector< std::pair< double , double > > > read_persistence_pairs_from_file_with_names_of_files( const char* filename )
{
bool dbg = false;
std::vector< std::vector< std::pair< double , double > > > result;
std::vector< std::string > files = readFileNames( filename );
std::cout << "Here are the filenames in the file : " << filename << std::endl;
for ( size_t i = 0 ; i != files.size() ; ++i )
{
std::cout << files[i] << std::endl;
}
for ( size_t i = 0 ; i != files.size() ; ++i )
{
std::vector< std::pair< double , double > > diag = read_standard_persistence_file( files[i].c_str() );
result.push_back( diag );
if ( dbg )
{
std::cerr << "Here is a diagram from a file : " << files[i].c_str() << std::endl;
for ( size_t aa = 0 ; aa != diag.size() ; ++aa )
{
std::cout << diag[aa].first << " " << diag[aa].second << std::endl;
}
getchar();
}
}
return result;
}
//When workign with time varying data, we tpically have a collection of files with intervals to produce one process. The intervals from all the files that constitute a single process can be read with
//the read_persistence_pairs_from_file_with_names_of_files procedure.
//But then, we also may need to read the persistence intervals of collection of proesses we may want to read the intervals first. This is what the procedre
std::vector< std::vector< std::vector< std::pair< double , double > > > > read_persistence_pairs_from_files_with_names_of_files( const std::vector& filenames )
{
std::vector< std::vector< std::vector< std::pair< double , double > > > > result;
result.reserve( filenames.size() );
for ( size_t file_no = 0 ; file_no != filenames.size() ; ++file_no )
{
result.push_back( read_persistence_pairs_from_file_with_names_of_files( filenames[file_no] ) );
}
return result;
}//read_persistence_pairs_from_files_with_names_of_files
std::pair< std::pair< double,double > , std::pair< double,double > > find_x_and_y_ranges_of_intervals( const std::vector< std::vector< std::pair< double , double > > >& intervals_from_file )
{
double min_x = std::numeric_limits< double >::max();
double max_x = -std::numeric_limits< double >::max();
double min_y = std::numeric_limits< double >::max();
double max_y = -std::numeric_limits< double >::max();
for ( size_t i = 0 ; i != intervals_from_file.size() ; ++i )
{
for ( size_t j = 0 ; j != intervals_from_file[i].size() ; ++j )
{
if ( min_x > intervals_from_file[i][j].first )min_x = intervals_from_file[i][j].first;
if ( max_x < intervals_from_file[i][j].first )max_x = intervals_from_file[i][j].first;
if ( min_y > intervals_from_file[i][j].second )min_y = intervals_from_file[i][j].second;
if ( max_y < intervals_from_file[i][j].second )max_y = intervals_from_file[i][j].second;
}
}
return std::make_pair( std::make_pair( min_x,max_x ) , std::make_pair( min_y,max_y ) );
}//find_x_and_y_ranges_of_intervals
std::pair< std::pair< double,double > , std::pair< double,double > > find_x_and_y_ranges_of_intervals( const std::vector< std::vector< std::vector< std::pair< double , double > > > >& intervals_from_files )
{
double min_x = std::numeric_limits< double >::max();
double max_x = -std::numeric_limits< double >::max();
double min_y = std::numeric_limits< double >::max();
double max_y = -std::numeric_limits< double >::max();
for ( size_t i = 0 ; i != intervals_from_files.size() ; ++i )
{
std::pair< std::pair< double,double > , std::pair< double,double > > ranges = find_x_and_y_ranges_of_intervals( intervals_from_files[i] );
if ( min_x > ranges.first.first ) min_x = ranges.first.first;
if ( max_x < ranges.first.second ) max_x = ranges.first.second;
if ( min_y > ranges.second.first ) min_y = ranges.second.first;
if ( max_y < ranges.second.second ) max_y = ranges.second.second;
}
return std::make_pair( std::make_pair( min_x,max_x ) , std::make_pair( min_y,max_y ) );
}
template
std::vector< Representation* > construct_representation_from_file( const char* filename )
{
std::vector< std::vector< std::pair< double , double > > > intervals_from_file = read_persistence_pairs_from_file_with_names_of_files( filename );
std::vector< Representation* > result( intervals_from_file.size() );
for ( size_t i = 0 ; i != intervals_from_file.size() ; ++i )
{
Representation* l = new Representation( intervals_from_file[i] );
result[i] = l;
}
return result;
}
//this one can be use for Persistence_intervals.h and Persistence_landscape.h
template
std::vector< Representation* > construct_representation_from_file( const std::vector< std::vector< std::pair< double , double > > >& intervals_from_file)
{
std::vector< Representation* > result( intervals_from_file.size() );
for ( size_t i = 0 ; i != intervals_from_file.size() ; ++i )
{
Representation* l = new Representation( intervals_from_file[i] );
result[i] = l;
}
return result;
}
//this one can be use for Persistence_heat_maps.h
template
std::vector< Representation* > construct_representation_from_file( const std::vector< std::vector< std::pair< double , double > > >& intervals_from_file ,
std::vector< std::vector > filter = create_Gaussian_filter(5,1),
bool erase_below_diagonal = false ,
size_t number_of_pixels = 1000 ,
double min_ = std::numeric_limits::max() , double max_ = std::numeric_limits::max() )
{
std::vector< Representation* > result( intervals_from_file.size() );
for ( size_t i = 0 ; i != intervals_from_file.size() ; ++i )
{
Representation* l = new Representation( intervals_from_file[i] , filter , erase_below_diagonal , number_of_pixels , min_ , max_ );
result[i] = l;
}
return result;
}
//this one can be use for Persistence_landscape_on_grid.h
template
std::vector< Representation* > construct_representation_from_file( const std::vector< std::vector< std::pair< double , double > > >& intervals_from_file,
double grid_min_ , double grid_max_ , size_t number_of_points_
)
{
std::vector< Representation* > result( intervals_from_file.size() );
for ( size_t i = 0 ; i != intervals_from_file.size() ; ++i )
{
Representation* l = new Representation( intervals_from_file[i] , grid_min_ , grid_max_ , number_of_points_ );
result[i] = l;
}
return result;
}
//this one can be use for Vector_distances_in_diagram.h
template
std::vector< Representation* > construct_representation_from_file( const std::vector< std::vector< std::pair< double , double > > >& intervals_from_file,
size_t where_to_cut
)
{
std::vector< Representation* > result( intervals_from_file.size() );
for ( size_t i = 0 ; i != intervals_from_file.size() ; ++i )
{
Representation* l = new Representation( intervals_from_file[i] , where_to_cut );
result[i] = l;
}
return result;
}
template
class Topological_process
{
public:
Topological_process(){};
~Topological_process()
{
for ( size_t i = 0 ; i != this->data.size() ; ++i )
{
delete this->data[i];
}
}
Topological_process( const std::vector< Representation* >& data_ ):data(data_){}
double distance( const Topological_process& second , double exponent = 1 )
{
if ( this->data.size() != second.data.size() )
{
throw "Incompatible lengths of topological processes, we cannot compute the distance in this case \n";
}
double result = 0;
for ( size_t i = 0 ; i != this->data.size() ; ++i )
{
result += this->data[i]->distance( *second.data[i] , exponent );
}
return result;
}
void compute_average( const std::vector< Topological_process* >& to_average )
{
//since we will substitute whatever data we have in this object with an average, we clear the data in this object first:
this->data.clear();
//then we need to check if all the topological processes in the vector to_average have the same length.
if ( to_average.size() == 0 )return;
for ( size_t i = 1 ; i != to_average.size() ; ++i )
{
if ( to_average[0]->data.size() != to_average[i]->data.size() )
{
throw "Incompatible lengths of topological processes, the averages cannot be computed \n";
}
}
this->data.reserve( to_average[0]->data.size() );
for ( size_t level = 0 ; level != to_average[0]->data.size() ; ++level )
{
//now we will be averaging the level level:
std::vector< Representation* > to_average_at_this_level;
to_average_at_this_level.reserve( to_average.size() );
for ( size_t i = 0 ; i != to_average.size() ; ++i )
{
to_average_at_this_level.push_back( to_average[i]->data[level] );
}
Representation* average_representation_on_this_level = new Representation;
average_representation_on_this_level->compute_average( to_average_at_this_level );
this->data.push_back( average_representation_on_this_level );
}
}
std::pair< double , double > get_x_range()const
{
double min_x = std::numeric_limits< double >::max();
double max_x = -std::numeric_limits< double >::max();
for ( size_t i = 0 ; i != this->data.size() ; ++i )
{
std::pair< double , double > xrange = this->data[i]->get_x_range();
if ( min_x > xrange.first )min_x = xrange.first;
if ( max_x < xrange.second )max_x = xrange.second;
}
return std::make_pair( min_x , max_x );
}
std::pair< double , double > get_y_range()const
{
double min_y = std::numeric_limits< double >::max();
double max_y = -std::numeric_limits< double >::max();
for ( size_t i = 0 ; i != this->data.size() ; ++i )
{
std::pair< double , double > yrange = this->data[i]->get_y_range();
if ( min_y > yrange.first )min_y = yrange.first;
if ( max_y < yrange.second )max_y = yrange.second;
}
return std::make_pair( min_y , max_y );
}
/**
* The procedure checks if the ranges of data are the same for all of them.
**/
bool are_the_data_aligned()const
{
if ( this->data.size() == 0 )return true;//empty collection is aligned
std::pair< double , double > x_range = this->data[0]->get_x_range();
std::pair< double , double > y_range = this->data[0]->get_y_range();
for ( size_t i = 1 ; i != this->data.size() ; ++i )
{
if ( (x_range != this->data[i]->get_x_range()) || (y_range != this->data[i]->get_y_range()) )
{
return false;
}
}
return true;
}
//scalar products?
//confidence bounds?
void plot( const char* filename , size_t delay = 30 , double min_x = std::numeric_limits::max() , double max_x = std::numeric_limits::max() , double min_y = std::numeric_limits::max() , double max_y = std::numeric_limits::max() )
{
std::vector< std::string > filenames;
//over here we need to
for ( size_t i = 0 ; i != this->data.size() ; ++i )
{
std::stringstream ss;
ss << filename << "_" << i;
if ( ( min_x != max_x ) && ( min_y != max_y ) )
{
//in this case, we set up the uniform min and max values for pciture
//this->data[i]->plot( ss.str().c_str() , min_x , max_x , min_y , max_y );
}
else
{
//in this case, we DO NOT set up the uniform min and max values for pciture
this->data[i]->plot( ss.str().c_str() );
}
ss << "_GnuplotScript";
filenames.push_back( ss.str() );
}
//and now we have to call it somehow for all the files. We will create a script that will call all the other scripts and ceate a sequence of jpg/png files.
std::stringstream gif_file_name;
gif_file_name << filename << ".gif";
std::stringstream gif_gnuplot_script_file_name;
gif_gnuplot_script_file_name << filename << "_gif_gnuplot_script";
std::ofstream out;
out.open( gif_gnuplot_script_file_name.str().c_str() );
out << "set terminal gif animate delay " << delay << std::endl;
out << "set output '" << gif_file_name.str() << "'" << std::endl;
if ( min_x != max_x )
{
out << "set xrange [" << min_x << ":" << max_x << "]" << std::endl;
out << "set yrange [" << min_y << ":" << max_y << "]" << std::endl;
}
for ( size_t i = 0 ; i != filenames.size() ; ++i )
{
out << " load '" << filenames[i] << "'" << std::endl;
}
out.close();
std::cout << std::endl << std::endl << std::endl << "Open gnuplot terminal and type load '" << gif_gnuplot_script_file_name.str() << "' to create an animated gif \n";
}//plot
/**
* This procedure compute veliocity of the data in the topological process. The veliocity is devine as distance (of persistence information) over time. In this implementation we assume that
* the time points when the persistence information is sampled is equally distributed in time. Therefore that we may assume that time between two constitive persistence information is 1, in which
* case veliocity is just distance between two constitutitve diagrams.
* There is an obvious intuition behinf the veliocity: large veliocity means large changes in the topoogy.
**/
std::vector< double > compute_veliocity( double exponent = 1 )
{
std::vector< double > result;
result.reserve( this->data.size()-1 );
for ( size_t i = 0 ; i != this-data.size() - 1 ; ++i )
{
result.push_back( this->data[i]->distance( *this->data[i+1] , exponent ) );
}
return result;
}
/**
* This procedure compute veliocity of the data in the topological process. Accleration is defined as an increase of veliocity over time. In this implementation we assume that
* the time points when the persistence information is sampled is equally distributed in time. Therefore that we may assume that time between two constitive persistence information is 1, in which
* case acceleration is simply an increase of veliocity (the number may be negative).
* We have two versions of this procedure:
* The first one compute the acceleration based on the raw data (computations of veliocity are need to be done first, and later the informationa about veliocity is lost).
* The second one takes as an input the vector of veliocities, and return the vector of acceleration.
* There is an obvious intuition behid the acceleration: large (up to absolute value) value of acceleration implies large changes in the topology.
**/
std::vector< double > compute_acceleration( const std::vector< double >& veliocity )
{
std::vector< double > acceleration;
acceleration.reserve( veliocity.size() - 1 );
for ( size_t i = 0 ; i != veliocity.size()-1 ; ++i )
{
acceleration.push_back( veliocity[i+1]-veliocity[i] );
}
return acceleration;
}
std::vector< double > compute_acceleration( double exponent = 1 )
{
return this->compute_acceleration( this->compute_veliocity( exponent ) );
}
std::vector< Representation* > get_data(){return this->data;}
private:
std::vector< Representation* > data;
};
}//Gudhi_stat
}//Gudhi
#endif