diff options
Diffstat (limited to 'src')
9 files changed, 125 insertions, 59 deletions
diff --git a/src/Gudhi_stat/include/gudhi/bootstrap.h b/src/Gudhi_stat/include/gudhi/bootstrap.h index 83edb2e1..629240ed 100644 --- a/src/Gudhi_stat/include/gudhi/bootstrap.h +++ b/src/Gudhi_stat/include/gudhi/bootstrap.h @@ -26,6 +26,7 @@ #ifdef GUDHI_USE_TBB #include <tbb/parallel_sort.h> +#include <tbb/task_scheduler_init.h> #endif #include <vector> @@ -49,11 +50,16 @@ namespace Gudhi_stat **/ + template < typename PointCloudCharacteristics , typename CharacteristicFunction , typename DistanceBetweenPointsCharacteristics > -double bootstrap( size_t number_of_points , CharacteristicFunction f , DistanceBetweenPointsCharacteristics distance , size_t number_of_repetitions , size_t size_of_subsample , double quantile = 0.95 ) +double bootstrap( size_t number_of_points , CharacteristicFunction f , DistanceBetweenPointsCharacteristics distance , size_t number_of_repetitions , size_t size_of_subsample , double quantile = 0.95 , size_t maximal_number_of_threads_in_TBB = std::numeric_limits<size_t>::max() ) { bool dbg = false; + #ifdef GUDHI_USE_TBB + tbb::task_scheduler_init init(maximal_number_of_threads_in_TBB == std::numeric_limits<size_t>::max() ? tbb::task_scheduler_init::automatic : maximal_number_of_threads_in_TBB); + #endif + if ( size_of_subsample >= number_of_points ) { std::cerr << "Size of subsample is greater or equal to the number of points. The bootstrap procedure do not make sense in this case. \n"; @@ -63,34 +69,40 @@ double bootstrap( size_t number_of_points , CharacteristicFunction f , DistanceB //initialization of a random number generator: std::srand ( unsigned ( std::time(0) ) ); - //we will shuffle the vector of numbers 0,1,2,...,points.size()-1 in order to pick a subset of a size size_of_subsample - std::vector< size_t > numbers_to_sample( number_of_points ); - for ( size_t i = 0 ; i != number_of_points ; ++i ) - { - numbers_to_sample[i] = i; - } + //we will shuffle the vector of numbers 0,1,2,...,points.size()-1 in order to pick a subset of a size size_of_subsample + std::vector<size_t> numbers_to_sample_(number_of_points) ; //create vector of size_t of a size number_of_points + std::iota (std::begin(numbers_to_sample_), std::end(numbers_to_sample_), 0);//populate it with 1 2 3 ... number_of_points. //now we compute the characteristic od all the points: - PointCloudCharacteristics characteristic_of_all_points = f( numbers_to_sample ); + PointCloudCharacteristics characteristic_of_all_points = f( numbers_to_sample_ ); //vector to keep the distances between characteristic_of_points and characteristic_of_subsample: std::vector< double > vector_of_distances( number_of_repetitions , 0 ); - #ifdef GUDHI_USE_TBB - tbb::parallel_for ( tbb::blocked_range<size_t>(0, number_of_repetitions), [&](const tbb::blocked_range<size_t>& range) - { - for ( size_t it_no = range.begin() ; it_no != range.end() ; ++it_no ) - #else +//TODO- at the moment, the operations I am doing over here do not seems to be threat safe. When using TBB, I am getting wrong results. +//It is quite likelly because I am not using a method to compute persistence which is threat safe. VERIFY this as soon as I merge with +//the new metod to compute persistence. + +// #ifdef GUDHI_USE_TBB +// tbb::parallel_for ( tbb::blocked_range<size_t>(0, number_of_repetitions), [&](const tbb::blocked_range<size_t>& range) +// { +// for ( size_t it_no = range.begin() ; it_no != range.end() ; ++it_no ) +// #else for ( size_t it_no = 0 ; it_no < number_of_repetitions ; ++it_no ) - #endif +// #endif { if ( dbg ) { std::cout << "Still : " << number_of_repetitions-it_no << " tests to go. \n The subsampled vector consist of points number : "; + std::cout << "it_no : " << it_no << std::endl; + std::cout << "number_of_points : " << number_of_points << std::endl; } //do a random shuffle of vector_of_characteristics_of_poits + std::vector<size_t> numbers_to_sample(number_of_points) ; //create vector of size_t of a size number_of_points + std::iota (std::begin(numbers_to_sample), std::end(numbers_to_sample), 0);//populate it with 1 2 3 ... number_of_points. + //TODO: consider doing it in a smarter/faster way. std::random_shuffle( numbers_to_sample.begin() , numbers_to_sample.end() ); //construct a vector< PointType > of a size size_of_subsample: @@ -110,14 +122,18 @@ double bootstrap( size_t number_of_points , CharacteristicFunction f , DistanceB //and now we compute distance between characteristic_of_points and characteristic_of_subsample. Note that subsampled points go first, and this is neded, since sometimes all points are not needed. double dist = distance( characteristic_of_subsampled_points , characteristic_of_all_points ); - if ( dbg ) std::cout << "The distance between characteristic of all points and the characteristic of subsample is : " << dist << std::endl; + if ( dbg ) + { + std::cout << "The distance between characteristic of all points and the characteristic of subsample is : " << dist << std::endl; + getchar(); + } vector_of_distances[it_no] = dist; } - #ifdef GUDHI_USE_TBB - } - ); - #endif +// #ifdef GUDHI_USE_TBB +// } +// ); +// #endif size_t position_of_quantile = floor(quantile*vector_of_distances.size()); if ( position_of_quantile ) --position_of_quantile; diff --git a/src/Gudhi_stat/include/gudhi/multiplicative_bootstrap.h b/src/Gudhi_stat/include/gudhi/multiplicative_bootstrap.h index 627923fe..b38536d9 100644 --- a/src/Gudhi_stat/include/gudhi/multiplicative_bootstrap.h +++ b/src/Gudhi_stat/include/gudhi/multiplicative_bootstrap.h @@ -31,6 +31,7 @@ #ifdef GUDHI_USE_TBB #include <tbb/parallel_sort.h> +#include <tbb/task_scheduler_init.h> #endif #include <vector> @@ -58,13 +59,17 @@ template < typename TopologicalObject > class norm_of_objects { public: + norm_of_objects():power(1){} + norm_of_objects( double power_ ):power(power_){} double operator()( const TopologicalObject& obj )const { - TopologicalObject* empty = new TopologicalObject; - double dist = empty->distance( obj , -1 ); - delete empty; + TopologicalObject empty; + double dist = empty.distance( obj , power ); + //std::cerr << "dist : " << dist << std::endl;getchar(); return dist; } +private: + double power; }; @@ -73,10 +78,14 @@ public: * This is a generic function to perform multiplicative bootstrap. **/ template < typename TopologicalObject , typename OperationOnObjects , typename NormOnObjects > -double multiplicative_bootstrap( const std::vector< TopologicalObject* >& topological_objects , size_t number_of_bootstrap_operations , const OperationOnObjects& oper , const NormOnObjects& norm , double quantile = 0.95 ) +double multiplicative_bootstrap( const std::vector< TopologicalObject* >& topological_objects , size_t number_of_bootstrap_operations , const OperationOnObjects& oper , const NormOnObjects& norm , double quantile = 0.95 , size_t maximal_number_of_threads_in_TBB = std::numeric_limits<size_t>::max() ) { bool dbg = false; + #ifdef GUDHI_USE_TBB + tbb::task_scheduler_init init(maximal_number_of_threads_in_TBB == std::numeric_limits<size_t>::max() ? tbb::task_scheduler_init::automatic : maximal_number_of_threads_in_TBB); + #endif + //initialization of a random number generator: std::random_device rd; std::mt19937 generator( time(NULL) ); @@ -111,10 +120,30 @@ double multiplicative_bootstrap( const std::vector< TopologicalObject* >& topolo double rand_variable = norm_distribution( generator ); result = result + rand_variable*oper(*(topological_objects[i]) , average); } + if ( dbg ) + { + std::cerr << "Result 1 : " << result << std::endl; + getchar(); + } //HERE THE NORM SEEMS TO BE MISSING!! result = result.abs(); + if ( dbg ) + { + std::cerr << "Result 2 : " << result << std::endl; + getchar(); + } result = result*(1.0/sqrt( topological_objects.size() )); + if ( dbg ) + { + std::cerr << "Result 3 : " << result << std::endl; + getchar(); + } //NEED TO TAKE MAX + if ( dbg ) + { + std::cerr << "Result 4 : " << norm(result) << std::endl; + getchar(); + } vector_of_intermediate_characteristics[it_no] = norm(result); } #ifdef GUDHI_USE_TBB diff --git a/src/Gudhi_stat/include/gudhi/time_series_analysis/sliding_window.h b/src/Gudhi_stat/include/gudhi/time_series_analysis/sliding_window.h index 226cd809..c8d53a00 100644 --- a/src/Gudhi_stat/include/gudhi/time_series_analysis/sliding_window.h +++ b/src/Gudhi_stat/include/gudhi/time_series_analysis/sliding_window.h @@ -31,6 +31,11 @@ #include <gudhi/Simplex_tree.h> #include <gudhi/Persistent_cohomology.h> +//for alpha complexes. Should that be keept over here??? +//#include <CGAL/Epick_d.h> +#include <gudhi/Alpha_complex.h> + + using namespace std; namespace Gudhi @@ -159,14 +164,17 @@ public: out.close(); } + /** * This procedure compute persistence of the sliding window embedding point cloud by using Vietoris-Rips filtration. **/ - persistent_cohomology::Persistent_cohomology<ST, Field_Zp > compute_persistence_of_Vietoris_Rips_complex( double threshold , unsigned dim_max , unsigned field_coef = 2 , double min_persistence = 0 ) + + //persistent_cohomology::Persistent_cohomology<ST, Field_Zp > + void compute_persistence_of_Vietoris_Rips_complex( double threshold , unsigned dim_max , unsigned field_coef = 2 , double min_persistence = 0 ) { //compute points of the sliding window embedding. std::vector< std::vector< double > > points = this->create_point_cloud(); - + /* // Compute the proximity graph of the points. Graph_t prox_graph = compute_proximity_graph(points, threshold , euclidean_distance< std::vector< double > >); @@ -190,29 +198,29 @@ public: //compute persistent cohomology. pcoh.compute_persistent_cohomology(min_persistence); - return pcoh; + + //return pcoh; pcoh.output_diagram(); + */ }//compute_persistence_of_Vietoris_Rips_complex /** * This procedure compute persistence of the sliding window embedding point cloud by using Alpha complex filtration. **/ - + /*persistent_cohomology::Persistent_cohomology<ST, Field_Zp >*/ - /*void compute_persistence_of_Alpha_complex( double threshold , unsigned dim_max , unsigned field_coef = 2 , double min_persistence = 0 ) + void compute_persistence_of_Alpha_complex( double threshold , unsigned dim_max , unsigned field_coef = 2 , double min_persistence = 0 ) { - std::string off_file_points; - std::string output_file_diag; - Filtration_value alpha_square_max_value; - int coeff_field_characteristic; - Filtration_value min_persistence; - - program_options(argc, argv, off_file_points, output_file_diag, alpha_square_max_value, - coeff_field_characteristic, min_persistence); - - // ---------------------------------------------------------------------------- - // Init of an alpha complex from an OFF file - // ---------------------------------------------------------------------------- + /* + //first we need to take out points and convert it to CGAL points. + std::vector< std::vector< double > > points = this->create_point_cloud(); + typedef CGAL::Epick_d< CGAL::Dimension_tag<2> > Kernel; + std::vector< Kernel::Point_d > Vector_of_points; + Vector_of_points.reserve( points.size() ); + for ( size_t i = 0 ; i != points.size() ; ++i ) + { + Vector_of_points.push_back( Kernel::Point_d(points[i]) ); + } using Kernel = CGAL::Epick_d< CGAL::Dynamic_dimension_tag >; Gudhi::alpha_complex::Alpha_complex<Kernel> alpha_complex_from_file(off_file_points, alpha_square_max_value); @@ -233,19 +241,26 @@ public: // initializes the coefficient field for homology pcoh.init_coefficients(coeff_field_characteristic); - pcoh.compute_persistent_cohomology(min_persistence); + pcoh.compute_persistent_cohomology(min_persistence + + return pcoh; // Output the diagram in filediag - if (output_file_diag.empty()) { - pcoh.output_diagram(); - } else { - std::cout << "Result in file: " << output_file_diag << std::endl; - std::ofstream out(output_file_diag); - pcoh.output_diagram(out); - out.close(); + if (output_file_diag.empty()) + { + pcoh.output_diagram(); + } + else + { + std::cout << "Result in file: " << output_file_diag << std::endl; + std::ofstream out(output_file_diag); + pcoh.output_diagram(out); + out.close(); } + //here we shoud merge it with what Vincent did to write down the pairs, and we will simply return those pairs. + */ }//compute_persistence_of_Alpha_complex - */ + private: diff --git a/src/Gudhi_stat/utilities/CMakeLists.txt b/src/Gudhi_stat/utilities/CMakeLists.txt index 4b2f7081..b7989a96 100644 --- a/src/Gudhi_stat/utilities/CMakeLists.txt +++ b/src/Gudhi_stat/utilities/CMakeLists.txt @@ -140,5 +140,11 @@ target_link_libraries(Multiplicative_bootstrap ${Boost_SYSTEM_LIBRARY}) #add_executable ( compute_distance_between_vectors compute_distance_between_vectors.cpp ) #target_link_libraries(compute_distance_between_vectors ${Boost_SYSTEM_LIBRARY}) -add_executable ( sliding_window_embedding sliding_window_embedding.cpp ) -target_link_libraries(sliding_window_embedding ${Boost_SYSTEM_LIBRARY} ${TBB_LIBRARIES}) + +if(CGAL_FOUND) + add_executable ( sliding_window_embedding sliding_window_embedding.cpp ) + target_link_libraries(sliding_window_embedding ${Boost_SYSTEM_LIBRARY} ${CGAL_LIBRARY}) + if (TBB_FOUND) + target_link_libraries(sliding_window_embedding ${TBB_LIBRARIES}) + endif(TBB_FOUND) +endif(CGAL_FOUND) diff --git a/src/Gudhi_stat/utilities/Hausdorff_bootstrap.cpp b/src/Gudhi_stat/utilities/Hausdorff_bootstrap.cpp index a4eb73b8..78c75bd4 100644 --- a/src/Gudhi_stat/utilities/Hausdorff_bootstrap.cpp +++ b/src/Gudhi_stat/utilities/Hausdorff_bootstrap.cpp @@ -65,7 +65,7 @@ int main( int argc , char** argv ) //template < typename PointCloudCharacteristics , typename CharacteristicFunction , typename DistanceBetweenPointsCharacteristics > //In this case, the PointCloudCharacteristics is just a vector of numbers of points (in a order fixed on points vector). //CharacteristicFunction is just identity, transforming std::vector< size_t > to itself. - //DistanceBetweenPointsCharacteristics is the place were all happens. This class hace the information about the coordinates of the points, and allows to compute a Hausdorff distance between + //DistanceBetweenPointsCharacteristics is the place were all happens. This class have the information about the coordinates of the points, and allows to compute a Hausdorff distance between //the collection of all points, and the subsample. double result = bootstrap< std::vector< size_t > , //PointCloudCharacteristics diff --git a/src/Gudhi_stat/utilities/Landscape_bootstrap.cpp b/src/Gudhi_stat/utilities/Landscape_bootstrap.cpp index 1cf4de94..1b24771a 100644 --- a/src/Gudhi_stat/utilities/Landscape_bootstrap.cpp +++ b/src/Gudhi_stat/utilities/Landscape_bootstrap.cpp @@ -164,8 +164,7 @@ int main( int argc , char** argv ) std::cout << "Read : " << points.size() << " points.\n"; distance_between_landscapes distance( p );//L^p distance. - compute_persistence_landscape_of_a_point_cloud_in_certain_dimension characteristic_fun( points , dimension , threshold ); - + compute_persistence_landscape_of_a_point_cloud_in_certain_dimension characteristic_fun( points , dimension , threshold ); //and now we can run the real bootstrap. //template < typename PointCloudCharacteristics , typename CharacteristicFunction , typename DistanceBetweenPointsCharacteristics > diff --git a/src/Gudhi_stat/utilities/Multiplicative_bootstrap.cpp b/src/Gudhi_stat/utilities/Multiplicative_bootstrap.cpp index 9981d5c4..92020c21 100644 --- a/src/Gudhi_stat/utilities/Multiplicative_bootstrap.cpp +++ b/src/Gudhi_stat/utilities/Multiplicative_bootstrap.cpp @@ -51,7 +51,7 @@ int main( int argc , char** argv ) std::vector< Persistence_landscape* > collection_of_landscapes( filenames.size() ); for ( size_t i = 0 ; i != filenames.size() ; ++i ) { - std::vector< std::pair< double , double > > diag = read_standard_file( filenames[i].c_str() ); + std::vector< std::pair< double , double > > diag = read_gudhi_file( filenames[i].c_str() , 1 );//read_standard_file( filenames[i].c_str() ); collection_of_landscapes[i] = new Persistence_landscape( diag ); } @@ -61,7 +61,7 @@ int main( int argc , char** argv ) double result = multiplicative_bootstrap< Persistence_landscape , difference_of_objects<Persistence_landscape> , norm_of_objects<Persistence_landscape> > - ( collection_of_landscapes , number_of_repetitions_of_bootstrap , diff , norm , quantile ); + ( collection_of_landscapes , number_of_repetitions_of_bootstrap , diff , norm , quantile , 1 ); std::cout << "result of bootstrap : " << result << std::endl; return 0; diff --git a/src/Gudhi_stat/utilities/persistence_heat_maps/compute_distance_of_persistence_heat_maps.cpp b/src/Gudhi_stat/utilities/persistence_heat_maps/compute_distance_of_persistence_heat_maps.cpp index 631057af..cb094dff 100644 --- a/src/Gudhi_stat/utilities/persistence_heat_maps/compute_distance_of_persistence_heat_maps.cpp +++ b/src/Gudhi_stat/utilities/persistence_heat_maps/compute_distance_of_persistence_heat_maps.cpp @@ -34,9 +34,9 @@ using namespace Gudhi::Gudhi_stat; int main( int argc , char** argv ) { - std::cout << "This program compute dsitance of persistence landscapes stored in a file (the file needs to be created beforehand). \n"; - std::cout << "The first parameter of a program is an interger p. The program compute L^p distance of the two landscapes. For L^infty distance choose p = -1. \n"; - std::cout << "The remaining parameters of this programs are names of files with persistence landscapes.\n"; + std::cout << "This program compute dsitance of persistence heat maps stored in a file (the file needs to be created beforehand). \n"; + std::cout << "The first parameter of a program is an interger p. The program compute L^p distance of the two heat maps. For L^infty distance choose p = -1. \n"; + std::cout << "The remaining parameters of this programs are names of files with persistence heat maps.\n"; if ( argc < 3 ) { diff --git a/src/Gudhi_stat/utilities/sliding_window_embedding.cpp b/src/Gudhi_stat/utilities/sliding_window_embedding.cpp index 3df20125..1373b52c 100644 --- a/src/Gudhi_stat/utilities/sliding_window_embedding.cpp +++ b/src/Gudhi_stat/utilities/sliding_window_embedding.cpp @@ -32,6 +32,7 @@ int main( int argc , char** argv ) Sliding_window_embedding swe( time_series , 3 ); swe.create_point_cloud("pt_cloud"); + //persistent_cohomology::Persistent_cohomology<ST, Field_Zp >& a = swe.compute_persistence_of_Vietoris_Rips_complex( 1 , 2 ); |