src/Gudhi_stat/include/gudhi/topological_process.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417

/*    This file is part of the Gudhi Library. The Gudhi library
 *    (Geometric Understanding in Higher Dimensions) is a generic C++
 *    library for computational topology.
 *
 *    Author(s):       Pawel Dlotko
 *
 *    Copyright (C) 2015  INRIA (France)
 *
 *    This program is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#ifndef TOPOLOGICAL_PROCESS_H
#define TOPOLOGICAL_PROCESS_H


//concretizations
#include <gudhi/persistence_representations/Vector_distances_in_diagram.h>
#include <gudhi/persistence_representations/Persistence_landscape.h>
#include <gudhi/persistence_representations/Persistence_landscape_on_grid.h>
#include <gudhi/persistence_representations/Persistence_heat_maps.h>
#include <vector>
#include <limits>

//extras
#include <gudhi/common_gudhi_stat.h>

namespace Gudhi 
{
namespace Gudhi_stat 
{
	
//over here we will need a few version of construct_representation_from_file procedure, since different representations may require different parameters. This is a procedure that in my 
//oppinion cannot be standarize, since construction of representation cannot. But, the remaining part of the code in my opinion is free from any details of representation.


//the reason I am separating the process of getting the intervals from the process of consstructing the represnetation is the following: for soem representations, we may need to ahve the same
//scale. To determine the scale, we need to know all the intervals before. So, we read the intervals first, then if needed, process them, to get the parametersof the representation,
//and only then construct the representations. 
std::vector< std::vector< std::pair< double , double > > > read_persistence_pairs_from_file_with_names_of_files( const char* filename )
{
	bool dbg = false;
	std::vector< std::vector< std::pair< double , double > > > result;
	
	std::vector< std::string > files = readFileNames( filename );	
	
	std::cout << "Here are the filenames in the file : " << filename << std::endl;
	for ( size_t i = 0 ; i != files.size() ; ++i )
	{
		std::cout << files[i] << std::endl;
	}
	
	for ( size_t i = 0 ; i != files.size() ; ++i )
	{
		std::vector< std::pair< double , double > > diag = read_standard_file( files[i].c_str() );	
		result.push_back( diag );		
		if ( dbg )
		{
			std::cerr << "Here is a diagram from a file : " << files[i].c_str() << std::endl;
			for ( size_t aa = 0  ; aa != diag.size() ; ++aa )
			{
				std::cout << diag[aa].first << " " << diag[aa].second << std::endl;
			}
			getchar();
		}
	}
	return result;
}

//When workign with time varying data, we tpically have a collection of files with intervals to produce one process. The intervals from all the files that constitute a single process can be read with 
//the read_persistence_pairs_from_file_with_names_of_files procedure. 
//But then, we also may need to read the persistence intervals of collection of proesses we may want to read the intervals first. This is what the procedre 
std::vector< std::vector< std::vector< std::pair< double , double > > > > read_persistence_pairs_from_files_with_names_of_files( const std::vector<const char*>& filenames )
{
	std::vector< std::vector< std::vector< std::pair< double , double > > > > result;
	result.reserve( filenames.size() );
	for ( size_t file_no = 0 ; file_no != filenames.size() ; ++file_no )
	{
		result.push_back( read_persistence_pairs_from_file_with_names_of_files( filenames[file_no] ) );
	}
	return result;
}//read_persistence_pairs_from_files_with_names_of_files


std::pair< std::pair< double,double > , std::pair< double,double > > find_x_and_y_ranges_of_intervals( const std::vector< std::vector< std::pair< double , double > > >& intervals_from_file )
{
	double min_x = std::numeric_limits< double >::max();
	double max_x = -std::numeric_limits< double >::max();
	double min_y = std::numeric_limits< double >::max();
	double max_y = -std::numeric_limits< double >::max();
	for ( size_t i = 0 ; i != intervals_from_file.size() ; ++i )
	{	
		for ( size_t j = 0 ; j != intervals_from_file[i].size() ; ++j )
		{
			if ( min_x > intervals_from_file[i][j].first )min_x = intervals_from_file[i][j].first;
			if ( max_x < intervals_from_file[i][j].first )max_x = intervals_from_file[i][j].first;
			
			if ( min_y > intervals_from_file[i][j].second )min_y = intervals_from_file[i][j].second;
			if ( max_y < intervals_from_file[i][j].second )max_y = intervals_from_file[i][j].second;
		}		
	}
	return std::make_pair( std::make_pair( min_x,max_x ) , std::make_pair( min_y,max_y ) );
}//find_x_and_y_ranges_of_intervals


std::pair< std::pair< double,double > , std::pair< double,double > > find_x_and_y_ranges_of_intervals( const std::vector< std::vector< std::vector< std::pair< double , double > > > >& intervals_from_files )
{
	double min_x = std::numeric_limits< double >::max();
	double max_x = -std::numeric_limits< double >::max();
	double min_y = std::numeric_limits< double >::max();
	double max_y = -std::numeric_limits< double >::max();
	for ( size_t i = 0 ; i != intervals_from_files.size() ; ++i )
	{
		std::pair< std::pair< double,double > , std::pair< double,double > > ranges = find_x_and_y_ranges_of_intervals( intervals_from_files[i] );
		if ( min_x > ranges.first.first ) min_x = ranges.first.first;
		if ( max_x < ranges.first.second ) max_x = ranges.first.second;
		if ( min_y > ranges.second.first ) min_y = ranges.second.first;
		if ( max_y < ranges.second.second ) max_y = ranges.second.second;
	}
	return std::make_pair( std::make_pair( min_x,max_x ) , std::make_pair( min_y,max_y ) );
}


template <typename Representation>
std::vector< Representation* > construct_representation_from_file( const char* filename )	
{
	std::vector< std::vector< std::pair< double , double > > > intervals_from_file = read_persistence_pairs_from_file_with_names_of_files( filename );
	std::vector< Representation* > result( intervals_from_file.size() );
	for ( size_t i = 0 ; i != intervals_from_file.size() ; ++i )
	{										
		Representation* l = new Representation( intervals_from_file[i] );			
		result[i] = l;		
	}
	return result;
}

//this one can be use for Persistence_intervals.h and Persistence_landscape.h
template <typename Representation>
std::vector< Representation* > construct_representation_from_file( const std::vector< std::vector< std::pair< double , double > > >& intervals_from_file)	
{
	
	std::vector< Representation* > result( intervals_from_file.size() );
	for ( size_t i = 0 ; i != intervals_from_file.size() ; ++i )
	{										
		Representation* l = new Representation( intervals_from_file[i] );			
		result[i] = l;		
	}
	return result;
}

//this one can be use for Persistence_heat_maps.h
template <typename Representation>
std::vector< Representation* > construct_representation_from_file( const std::vector< std::vector< std::pair< double , double > > >& intervals_from_file , 
																   std::vector< std::vector<double> > filter = create_Gaussian_filter(5,1), 
																   bool erase_below_diagonal = false , 
																   size_t number_of_pixels = 1000 , 
																   double min_ = std::numeric_limits<double>::max() , double max_ = std::numeric_limits<double>::max()  )	
{	
	std::vector< Representation* > result( intervals_from_file.size() );
	for ( size_t i = 0 ; i != intervals_from_file.size() ; ++i )
	{										
		Representation* l = new Representation( intervals_from_file[i] , filter , erase_below_diagonal , number_of_pixels , min_ , max_ );			
		result[i] = l;		
	}
	return result;
}

//this one can be use for Persistence_landscape_on_grid.h
template <typename Representation>
std::vector< Representation* > construct_representation_from_file( const std::vector< std::vector< std::pair< double , double > > >& intervals_from_file, 
																   double grid_min_ , double grid_max_ , size_t number_of_points_ 
)	
{	
	std::vector< Representation* > result( intervals_from_file.size() );
	for ( size_t i = 0 ; i != intervals_from_file.size() ; ++i )
	{										
		Representation* l = new Representation( intervals_from_file[i] , grid_min_ , grid_max_ , number_of_points_ );			
		result[i] = l;		
	}
	return result;
}


//this one can be use for Vector_distances_in_diagram.h
template <typename Representation>
std::vector< Representation* > construct_representation_from_file( const std::vector< std::vector< std::pair< double , double > > >& intervals_from_file, 
																   size_t where_to_cut
)	
{
	std::vector< Representation* > result( intervals_from_file.size() );
	for ( size_t i = 0 ; i != intervals_from_file.size() ; ++i )
	{										
		Representation* l = new Representation( intervals_from_file[i] , where_to_cut );			
		result[i] = l;		
	}
	return result;
}
	

template <typename Representation>
class Topological_process
{
public:
	Topological_process(){};
	~Topological_process()
	{		
		for ( size_t i = 0  ; i != this->data.size() ; ++i )
		{
			delete this->data[i];
		}
	}
	
	Topological_process( const std::vector< Representation* >& data_ ):data(data_){}
	double distance( const Topological_process& second , double exponent = 1 )
	{
		if ( this->data.size() != second.data.size() )
		{
			throw "Incompatible lengths of topological processes, we cannot compute the distance in this case \n";
		}
		double result = 0;
		for ( size_t i = 0 ; i != this->data.size() ; ++i )
		{
			result += this->data[i]->distance( *second.data[i] , exponent );
		}
		return result;
	}	
	
	void compute_average( const std::vector< Topological_process* >& to_average )
	{
		//since we will substitute whatever data we have in this object with an average, we clear the data in this object first:
		this->data.clear();
		//then we need to check if all the topological processes in the vector to_average have the same length.
		if ( to_average.size() == 0 )return;
		for ( size_t i = 1 ; i != to_average.size() ; ++i )
		{
			if ( to_average[0]->data.size() != to_average[i]->data.size() )
			{
				throw "Incompatible lengths of topological processes, the averages cannot be computed \n";
			}
		}
		
		this->data.reserve( to_average[0]->data.size() );
		
		for ( size_t level = 0 ; level != to_average[0]->data.size() ; ++level )
		{
			//now we will be averaging the level level:
			std::vector< Representation* > to_average_at_this_level;
			to_average_at_this_level.reserve( to_average.size() );
			for ( size_t i = 0 ; i != to_average.size() ; ++i )
			{
				to_average_at_this_level.push_back( to_average[i]->data[level] );
			}
			Representation* average_representation_on_this_level = new Representation;
			average_representation_on_this_level->compute_average( to_average_at_this_level );
			this->data.push_back( average_representation_on_this_level );
		}				
	}
	
	std::pair< double , double > get_x_range()const
	{
		double min_x = std::numeric_limits< double >::max();		
		double max_x = -std::numeric_limits< double >::max();
		for ( size_t i = 0 ; i != this->data.size() ; ++i )
		{
			std::pair< double , double > xrange = this->data[i]->get_x_range();			
			if ( min_x > xrange.first )min_x = xrange.first;
			if ( max_x < xrange.second )max_x = xrange.second;			
		}
		return std::make_pair( min_x , max_x );
	}
	
	std::pair< double , double > get_y_range()const
	{
		double min_y = std::numeric_limits< double >::max();		
		double max_y = -std::numeric_limits< double >::max();
		for ( size_t i = 0 ; i != this->data.size() ; ++i )
		{
			std::pair< double , double > yrange = this->data[i]->get_y_range();			
			if ( min_y > yrange.first )min_y = yrange.first;
			if ( max_y < yrange.second )max_y = yrange.second;			
		}
		return std::make_pair( min_y , max_y );
	}
	
	/**
	 * The procedure checks if the ranges of data are the same for all of them.
	**/ 
	bool are_the_data_aligned()const
	{
		if ( this->data.size() == 0 )return true;//empty collection is aligned 
		std::pair< double , double > x_range = this->data[0]->get_x_range();
		std::pair< double , double > y_range = this->data[0]->get_y_range();
		for ( size_t i = 1 ; i != this->data.size() ; ++i )
		{
			if ( (x_range != this->data[i]->get_x_range()) || (y_range != this->data[i]->get_y_range()) )
			{
				return false;
			}
		}
		return true;
	}
	
	
	//scalar products?
	//confidence bounds?
	
	void plot( const char* filename , size_t delay = 30 , double min_x = std::numeric_limits<double>::max() , double max_x = std::numeric_limits<double>::max() , double min_y = std::numeric_limits<double>::max() , double max_y = std::numeric_limits<double>::max() )
	{				
		std::vector< std::string > filenames;		
		//over here we need to			
		for ( size_t i = 0 ; i != this->data.size() ; ++i )
		{
			std::stringstream ss;
			ss << filename << "_" << i;
			if ( ( min_x != max_x ) && ( min_y != max_y ) )
			{
				//in this case, we set up the uniform min and max values for pciture
				//this->data[i]->plot( ss.str().c_str() , min_x , max_x , min_y , max_y );
			}
			else
			{
				//in this case, we DO NOT set up the uniform min and max values for pciture
				this->data[i]->plot( ss.str().c_str() );
			}
			ss << "_GnuplotScript";
			filenames.push_back( ss.str() );									
		}
		//and now we have to call it somehow for all the files. We will create a script that will call all the other scripts and ceate a sequence of jpg/png files. 

	
		std::stringstream gif_file_name;
		gif_file_name << filename << ".gif";
		std::stringstream gif_gnuplot_script_file_name;
		gif_gnuplot_script_file_name << filename << "_gif_gnuplot_script";
		
		std::ofstream out;
		out.open( gif_gnuplot_script_file_name.str().c_str() );
		out << "set terminal gif animate delay " << delay << std::endl;
		out << "set output '" << gif_file_name.str() << "'" << std::endl;
		if ( min_x != max_x )
		{
			out << "set xrange [" << min_x << ":" << max_x << "]" << std::endl;
			out << "set yrange [" << min_y << ":" << max_y << "]" << std::endl;		
		}
		
		for ( size_t i = 0 ; i != filenames.size() ; ++i )
		{
			out << " load '" << filenames[i] << "'" << std::endl;
		}
		out.close();		
		
		std::cout << std::endl << std::endl << std::endl << "Open gnuplot terminal and type load '" << gif_gnuplot_script_file_name.str() << "' to create an animated gif \n";
	}//plot
	
	/**
	 * This procedure compute veliocity of the data in the topological process. The veliocity is devine as distance (of persistence information) over time. In this implementation we assume that
	 * the time points when the persistence information is sampled is equally distributed in time. Therefore that we may assume that time between two constitive persistence information is 1, in which
	 * case veliocity is just distance between two constitutitve diagrams. 
	 * There is an obvious intuition behinf the veliocity: large veliocity means large changes in the topoogy. 
	**/
	std::vector< double > compute_veliocity( double exponent = 1 )
	{
		std::vector< double > result;
		result.reserve( this->data.size()-1 );
		for ( size_t i = 0 ; i != this-data.size() - 1 ; ++i )
		{
			result.push_back( this->data[i]->distance( *this->data[i+1] , exponent ) );
		}
		return result;
	} 
	
	/**
	 * This procedure compute veliocity of the data in the topological process. Accleration is defined as an increase of veliocity over time. In this implementation we assume that
	 * the time points when the persistence information is sampled is equally distributed in time. Therefore that we may assume that time between two constitive persistence information is 1, in which
	 * case acceleration is simply an increase of veliocity (the number may be negative).
	 * We have two versions of this procedure:
	 * The first one compute the acceleration based on the raw data (computations of veliocity are need to be done first, and later the informationa about veliocity is lost). 
	 * The second one takes as an input the vector of veliocities, and return the vector of acceleration.
	 * There is an obvious intuition behid the acceleration: large (up to absolute value) value of acceleration implies large changes in the topology.  
	**/ 
	std::vector< double > compute_acceleration( const std::vector< double >& veliocity )
	{
		std::vector< double > acceleration;
		acceleration.reserve( veliocity.size() - 1 );
		for ( size_t i = 0 ; i != veliocity.size()-1 ; ++i )
		{
			acceleration.push_back( veliocity[i+1]-veliocity[i] );
		}
		return acceleration;
	}
	std::vector< double > compute_acceleration( double exponent = 1 )
	{		
		return this->compute_acceleration( this->compute_veliocity( exponent ) );
	}	
	
	
	std::vector< Representation* > get_data(){return this->data;}
private:
	std::vector< Representation* > data;
};


}//Gudhi_stat
}//Gudhi

#endif