summaryrefslogtreecommitdiff
path: root/src/Gudhi_stat/include
diff options
context:
space:
mode:
authorpdlotko <pdlotko@636b058d-ea47-450e-bf9e-a15bfbe3eedb>2017-03-17 14:00:32 +0000
committerpdlotko <pdlotko@636b058d-ea47-450e-bf9e-a15bfbe3eedb>2017-03-17 14:00:32 +0000
commit5b6290423812b24cc3f67a4a8e9c9027b9d95cd6 (patch)
treed673da130ac54998df1121c2f1f1327f2ef0aaab /src/Gudhi_stat/include
parenta9e78e63472264dd2baaeac196a8a74692265e22 (diff)
Adding a few corrections to gudhi_stat
git-svn-id: svn+ssh://scm.gforge.inria.fr/svnroot/gudhi/branches/gudhi_stat@2196 636b058d-ea47-450e-bf9e-a15bfbe3eedb Former-commit-id: e5bbe287169cb6750aaadf81db65ef873186da06
Diffstat (limited to 'src/Gudhi_stat/include')
-rw-r--r--src/Gudhi_stat/include/gudhi/bootstrap.h54
-rw-r--r--src/Gudhi_stat/include/gudhi/multiplicative_bootstrap.h37
-rw-r--r--src/Gudhi_stat/include/gudhi/time_series_analysis/sliding_window.h67
3 files changed, 109 insertions, 49 deletions
diff --git a/src/Gudhi_stat/include/gudhi/bootstrap.h b/src/Gudhi_stat/include/gudhi/bootstrap.h
index 83edb2e1..629240ed 100644
--- a/src/Gudhi_stat/include/gudhi/bootstrap.h
+++ b/src/Gudhi_stat/include/gudhi/bootstrap.h
@@ -26,6 +26,7 @@
#ifdef GUDHI_USE_TBB
#include <tbb/parallel_sort.h>
+#include <tbb/task_scheduler_init.h>
#endif
#include <vector>
@@ -49,11 +50,16 @@ namespace Gudhi_stat
**/
+
template < typename PointCloudCharacteristics , typename CharacteristicFunction , typename DistanceBetweenPointsCharacteristics >
-double bootstrap( size_t number_of_points , CharacteristicFunction f , DistanceBetweenPointsCharacteristics distance , size_t number_of_repetitions , size_t size_of_subsample , double quantile = 0.95 )
+double bootstrap( size_t number_of_points , CharacteristicFunction f , DistanceBetweenPointsCharacteristics distance , size_t number_of_repetitions , size_t size_of_subsample , double quantile = 0.95 , size_t maximal_number_of_threads_in_TBB = std::numeric_limits<size_t>::max() )
{
bool dbg = false;
+ #ifdef GUDHI_USE_TBB
+ tbb::task_scheduler_init init(maximal_number_of_threads_in_TBB == std::numeric_limits<size_t>::max() ? tbb::task_scheduler_init::automatic : maximal_number_of_threads_in_TBB);
+ #endif
+
if ( size_of_subsample >= number_of_points )
{
std::cerr << "Size of subsample is greater or equal to the number of points. The bootstrap procedure do not make sense in this case. \n";
@@ -63,34 +69,40 @@ double bootstrap( size_t number_of_points , CharacteristicFunction f , DistanceB
//initialization of a random number generator:
std::srand ( unsigned ( std::time(0) ) );
- //we will shuffle the vector of numbers 0,1,2,...,points.size()-1 in order to pick a subset of a size size_of_subsample
- std::vector< size_t > numbers_to_sample( number_of_points );
- for ( size_t i = 0 ; i != number_of_points ; ++i )
- {
- numbers_to_sample[i] = i;
- }
+ //we will shuffle the vector of numbers 0,1,2,...,points.size()-1 in order to pick a subset of a size size_of_subsample
+ std::vector<size_t> numbers_to_sample_(number_of_points) ; //create vector of size_t of a size number_of_points
+ std::iota (std::begin(numbers_to_sample_), std::end(numbers_to_sample_), 0);//populate it with 1 2 3 ... number_of_points.
//now we compute the characteristic od all the points:
- PointCloudCharacteristics characteristic_of_all_points = f( numbers_to_sample );
+ PointCloudCharacteristics characteristic_of_all_points = f( numbers_to_sample_ );
//vector to keep the distances between characteristic_of_points and characteristic_of_subsample:
std::vector< double > vector_of_distances( number_of_repetitions , 0 );
- #ifdef GUDHI_USE_TBB
- tbb::parallel_for ( tbb::blocked_range<size_t>(0, number_of_repetitions), [&](const tbb::blocked_range<size_t>& range)
- {
- for ( size_t it_no = range.begin() ; it_no != range.end() ; ++it_no )
- #else
+//TODO- at the moment, the operations I am doing over here do not seems to be threat safe. When using TBB, I am getting wrong results.
+//It is quite likelly because I am not using a method to compute persistence which is threat safe. VERIFY this as soon as I merge with
+//the new metod to compute persistence.
+
+// #ifdef GUDHI_USE_TBB
+// tbb::parallel_for ( tbb::blocked_range<size_t>(0, number_of_repetitions), [&](const tbb::blocked_range<size_t>& range)
+// {
+// for ( size_t it_no = range.begin() ; it_no != range.end() ; ++it_no )
+// #else
for ( size_t it_no = 0 ; it_no < number_of_repetitions ; ++it_no )
- #endif
+// #endif
{
if ( dbg )
{
std::cout << "Still : " << number_of_repetitions-it_no << " tests to go. \n The subsampled vector consist of points number : ";
+ std::cout << "it_no : " << it_no << std::endl;
+ std::cout << "number_of_points : " << number_of_points << std::endl;
}
//do a random shuffle of vector_of_characteristics_of_poits
+ std::vector<size_t> numbers_to_sample(number_of_points) ; //create vector of size_t of a size number_of_points
+ std::iota (std::begin(numbers_to_sample), std::end(numbers_to_sample), 0);//populate it with 1 2 3 ... number_of_points.
+ //TODO: consider doing it in a smarter/faster way.
std::random_shuffle( numbers_to_sample.begin() , numbers_to_sample.end() );
//construct a vector< PointType > of a size size_of_subsample:
@@ -110,14 +122,18 @@ double bootstrap( size_t number_of_points , CharacteristicFunction f , DistanceB
//and now we compute distance between characteristic_of_points and characteristic_of_subsample. Note that subsampled points go first, and this is neded, since sometimes all points are not needed.
double dist = distance( characteristic_of_subsampled_points , characteristic_of_all_points );
- if ( dbg ) std::cout << "The distance between characteristic of all points and the characteristic of subsample is : " << dist << std::endl;
+ if ( dbg )
+ {
+ std::cout << "The distance between characteristic of all points and the characteristic of subsample is : " << dist << std::endl;
+ getchar();
+ }
vector_of_distances[it_no] = dist;
}
- #ifdef GUDHI_USE_TBB
- }
- );
- #endif
+// #ifdef GUDHI_USE_TBB
+// }
+// );
+// #endif
size_t position_of_quantile = floor(quantile*vector_of_distances.size());
if ( position_of_quantile ) --position_of_quantile;
diff --git a/src/Gudhi_stat/include/gudhi/multiplicative_bootstrap.h b/src/Gudhi_stat/include/gudhi/multiplicative_bootstrap.h
index 627923fe..b38536d9 100644
--- a/src/Gudhi_stat/include/gudhi/multiplicative_bootstrap.h
+++ b/src/Gudhi_stat/include/gudhi/multiplicative_bootstrap.h
@@ -31,6 +31,7 @@
#ifdef GUDHI_USE_TBB
#include <tbb/parallel_sort.h>
+#include <tbb/task_scheduler_init.h>
#endif
#include <vector>
@@ -58,13 +59,17 @@ template < typename TopologicalObject >
class norm_of_objects
{
public:
+ norm_of_objects():power(1){}
+ norm_of_objects( double power_ ):power(power_){}
double operator()( const TopologicalObject& obj )const
{
- TopologicalObject* empty = new TopologicalObject;
- double dist = empty->distance( obj , -1 );
- delete empty;
+ TopologicalObject empty;
+ double dist = empty.distance( obj , power );
+ //std::cerr << "dist : " << dist << std::endl;getchar();
return dist;
}
+private:
+ double power;
};
@@ -73,10 +78,14 @@ public:
* This is a generic function to perform multiplicative bootstrap.
**/
template < typename TopologicalObject , typename OperationOnObjects , typename NormOnObjects >
-double multiplicative_bootstrap( const std::vector< TopologicalObject* >& topological_objects , size_t number_of_bootstrap_operations , const OperationOnObjects& oper , const NormOnObjects& norm , double quantile = 0.95 )
+double multiplicative_bootstrap( const std::vector< TopologicalObject* >& topological_objects , size_t number_of_bootstrap_operations , const OperationOnObjects& oper , const NormOnObjects& norm , double quantile = 0.95 , size_t maximal_number_of_threads_in_TBB = std::numeric_limits<size_t>::max() )
{
bool dbg = false;
+ #ifdef GUDHI_USE_TBB
+ tbb::task_scheduler_init init(maximal_number_of_threads_in_TBB == std::numeric_limits<size_t>::max() ? tbb::task_scheduler_init::automatic : maximal_number_of_threads_in_TBB);
+ #endif
+
//initialization of a random number generator:
std::random_device rd;
std::mt19937 generator( time(NULL) );
@@ -111,10 +120,30 @@ double multiplicative_bootstrap( const std::vector< TopologicalObject* >& topolo
double rand_variable = norm_distribution( generator );
result = result + rand_variable*oper(*(topological_objects[i]) , average);
}
+ if ( dbg )
+ {
+ std::cerr << "Result 1 : " << result << std::endl;
+ getchar();
+ }
//HERE THE NORM SEEMS TO BE MISSING!!
result = result.abs();
+ if ( dbg )
+ {
+ std::cerr << "Result 2 : " << result << std::endl;
+ getchar();
+ }
result = result*(1.0/sqrt( topological_objects.size() ));
+ if ( dbg )
+ {
+ std::cerr << "Result 3 : " << result << std::endl;
+ getchar();
+ }
//NEED TO TAKE MAX
+ if ( dbg )
+ {
+ std::cerr << "Result 4 : " << norm(result) << std::endl;
+ getchar();
+ }
vector_of_intermediate_characteristics[it_no] = norm(result);
}
#ifdef GUDHI_USE_TBB
diff --git a/src/Gudhi_stat/include/gudhi/time_series_analysis/sliding_window.h b/src/Gudhi_stat/include/gudhi/time_series_analysis/sliding_window.h
index 226cd809..c8d53a00 100644
--- a/src/Gudhi_stat/include/gudhi/time_series_analysis/sliding_window.h
+++ b/src/Gudhi_stat/include/gudhi/time_series_analysis/sliding_window.h
@@ -31,6 +31,11 @@
#include <gudhi/Simplex_tree.h>
#include <gudhi/Persistent_cohomology.h>
+//for alpha complexes. Should that be keept over here???
+//#include <CGAL/Epick_d.h>
+#include <gudhi/Alpha_complex.h>
+
+
using namespace std;
namespace Gudhi
@@ -159,14 +164,17 @@ public:
out.close();
}
+
/**
* This procedure compute persistence of the sliding window embedding point cloud by using Vietoris-Rips filtration.
**/
- persistent_cohomology::Persistent_cohomology<ST, Field_Zp > compute_persistence_of_Vietoris_Rips_complex( double threshold , unsigned dim_max , unsigned field_coef = 2 , double min_persistence = 0 )
+
+ //persistent_cohomology::Persistent_cohomology<ST, Field_Zp >
+ void compute_persistence_of_Vietoris_Rips_complex( double threshold , unsigned dim_max , unsigned field_coef = 2 , double min_persistence = 0 )
{
//compute points of the sliding window embedding.
std::vector< std::vector< double > > points = this->create_point_cloud();
-
+ /*
// Compute the proximity graph of the points.
Graph_t prox_graph = compute_proximity_graph(points, threshold , euclidean_distance< std::vector< double > >);
@@ -190,29 +198,29 @@ public:
//compute persistent cohomology.
pcoh.compute_persistent_cohomology(min_persistence);
- return pcoh;
+
+ //return pcoh;
pcoh.output_diagram();
+ */
}//compute_persistence_of_Vietoris_Rips_complex
/**
* This procedure compute persistence of the sliding window embedding point cloud by using Alpha complex filtration.
**/
-
+
/*persistent_cohomology::Persistent_cohomology<ST, Field_Zp >*/
- /*void compute_persistence_of_Alpha_complex( double threshold , unsigned dim_max , unsigned field_coef = 2 , double min_persistence = 0 )
+ void compute_persistence_of_Alpha_complex( double threshold , unsigned dim_max , unsigned field_coef = 2 , double min_persistence = 0 )
{
- std::string off_file_points;
- std::string output_file_diag;
- Filtration_value alpha_square_max_value;
- int coeff_field_characteristic;
- Filtration_value min_persistence;
-
- program_options(argc, argv, off_file_points, output_file_diag, alpha_square_max_value,
- coeff_field_characteristic, min_persistence);
-
- // ----------------------------------------------------------------------------
- // Init of an alpha complex from an OFF file
- // ----------------------------------------------------------------------------
+ /*
+ //first we need to take out points and convert it to CGAL points.
+ std::vector< std::vector< double > > points = this->create_point_cloud();
+ typedef CGAL::Epick_d< CGAL::Dimension_tag<2> > Kernel;
+ std::vector< Kernel::Point_d > Vector_of_points;
+ Vector_of_points.reserve( points.size() );
+ for ( size_t i = 0 ; i != points.size() ; ++i )
+ {
+ Vector_of_points.push_back( Kernel::Point_d(points[i]) );
+ }
using Kernel = CGAL::Epick_d< CGAL::Dynamic_dimension_tag >;
Gudhi::alpha_complex::Alpha_complex<Kernel> alpha_complex_from_file(off_file_points, alpha_square_max_value);
@@ -233,19 +241,26 @@ public:
// initializes the coefficient field for homology
pcoh.init_coefficients(coeff_field_characteristic);
- pcoh.compute_persistent_cohomology(min_persistence);
+ pcoh.compute_persistent_cohomology(min_persistence
+
+ return pcoh;
// Output the diagram in filediag
- if (output_file_diag.empty()) {
- pcoh.output_diagram();
- } else {
- std::cout << "Result in file: " << output_file_diag << std::endl;
- std::ofstream out(output_file_diag);
- pcoh.output_diagram(out);
- out.close();
+ if (output_file_diag.empty())
+ {
+ pcoh.output_diagram();
+ }
+ else
+ {
+ std::cout << "Result in file: " << output_file_diag << std::endl;
+ std::ofstream out(output_file_diag);
+ pcoh.output_diagram(out);
+ out.close();
}
+ //here we shoud merge it with what Vincent did to write down the pairs, and we will simply return those pairs.
+ */
}//compute_persistence_of_Alpha_complex
- */
+
private: