/* Thicharacteristic_of_all_pointss file is part of the Gudhi Library. The Gudhi library
* (Geometric Understanding in Higher Dimensions) is a generic C++
* library for computational topology.
*
* Author(s): Pawel Dlotko
*
* Copyright (C) 2015 INRIA (France)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
#ifndef BOOTSTRAP_H
#define BOOTSTRAP_H
#ifdef GUDHI_USE_TBB
#include
#include
#endif
#include
#include
#include
#include
#include
namespace Gudhi
{
namespace Gudhi_stat
{
/**
* This is a generic function to perform bootstrap.
* In this function we assume that there is a class to compute characteristic of collection of points (PointCloudCharacteristics) and that it stores coordinates of all points. It allows to compute the characteristic
* of the whole point cloud (by using CharacteristicFunction) or of it proper subset of the whole point cloud (given the list of numers of points in the subset).
* Both functionalities will be used in this implementation.
* The characteristic of point cloud, may be the poit cloud itself, its persistence diagram in a fixed dimension, or anything else. We only assume that space of points characteristics is a metric space
* and that we can compute a distance between two characteristics of collections of points by using DistanceBetweenPointsCharacteristics function.
**/
template < typename PointCloudCharacteristics , typename CharacteristicFunction , typename DistanceBetweenPointsCharacteristics >
double bootstrap( size_t number_of_points , CharacteristicFunction f , DistanceBetweenPointsCharacteristics distance , size_t number_of_repetitions , size_t size_of_subsample , double quantile = 0.95 , size_t maximal_number_of_threads_in_TBB = std::numeric_limits::max() )
{
bool dbg = false;
#ifdef GUDHI_USE_TBB
tbb::task_scheduler_init init(maximal_number_of_threads_in_TBB == std::numeric_limits::max() ? tbb::task_scheduler_init::automatic : maximal_number_of_threads_in_TBB);
#endif
if ( size_of_subsample >= number_of_points )
{
std::cerr << "Size of subsample is greater or equal to the number of points. The bootstrap procedure do not make sense in this case. \n";
return 0;
}
//initialization of a random number generator:
std::srand ( unsigned ( std::time(0) ) );
//we will shuffle the vector of numbers 0,1,2,...,points.size()-1 in order to pick a subset of a size size_of_subsample
std::vector numbers_to_sample_(number_of_points) ; //create vector of size_t of a size number_of_points
std::iota (std::begin(numbers_to_sample_), std::end(numbers_to_sample_), 0);//populate it with 1 2 3 ... number_of_points.
//now we compute the characteristic od all the points:
PointCloudCharacteristics characteristic_of_all_points = f( numbers_to_sample_ );
//vector to keep the distances between characteristic_of_points and characteristic_of_subsample:
std::vector< double > vector_of_distances( number_of_repetitions , 0 );
//TODO- at the moment, the operations I am doing over here do not seems to be threat safe. When using TBB, I am getting wrong results.
//It is quite likelly because I am not using a method to compute persistence which is threat safe. VERIFY this as soon as I merge with
//the new metod to compute persistence.
// #ifdef GUDHI_USE_TBB
// tbb::parallel_for ( tbb::blocked_range(0, number_of_repetitions), [&](const tbb::blocked_range& range)
// {
// for ( size_t it_no = range.begin() ; it_no != range.end() ; ++it_no )
// #else
for ( size_t it_no = 0 ; it_no < number_of_repetitions ; ++it_no )
// #endif
{
if ( dbg )
{
std::cout << "Still : " << number_of_repetitions-it_no << " tests to go. \n The subsampled vector consist of points number : ";
std::cout << "it_no : " << it_no << std::endl;
std::cout << "number_of_points : " << number_of_points << std::endl;
}
//do a random shuffle of vector_of_characteristics_of_poits
std::vector numbers_to_sample(number_of_points) ; //create vector of size_t of a size number_of_points
std::iota (std::begin(numbers_to_sample), std::end(numbers_to_sample), 0);//populate it with 1 2 3 ... number_of_points.
//TODO: consider doing it in a smarter/faster way.
std::random_shuffle( numbers_to_sample.begin() , numbers_to_sample.end() );
//construct a vector< PointType > of a size size_of_subsample:
std::vector< size_t > subsampled_points;
subsampled_points.reserve( size_of_subsample );
for ( size_t i = 0 ; i != size_of_subsample ; ++i )
{
subsampled_points.push_back( numbers_to_sample[i] );
if ( dbg )std::cout << numbers_to_sample[i] << " , ";
}
//now we can compute characteristic of subsampled_points:
PointCloudCharacteristics characteristic_of_subsampled_points = f( subsampled_points );
if ( dbg )std::cout << std::endl << "Characteristic of subsampled points computed.\n";
//and now we compute distance between characteristic_of_points and characteristic_of_subsample. Note that subsampled points go first, and this is neded, since sometimes all points are not needed.
double dist = distance( characteristic_of_subsampled_points , characteristic_of_all_points );
if ( dbg )
{
std::cout << "The distance between characteristic of all points and the characteristic of subsample is : " << dist << std::endl;
getchar();
}
vector_of_distances[it_no] = dist;
}
// #ifdef GUDHI_USE_TBB
// }
// );
// #endif
size_t position_of_quantile = floor(quantile*vector_of_distances.size());
if ( position_of_quantile ) --position_of_quantile;
if ( dbg )
{
std::cerr << "quantile : " << quantile << std::endl;
std::cerr << "position_of_quantile : " << position_of_quantile << std::endl;
std::sort( vector_of_distances.begin() , vector_of_distances.end() );
//std::cout << "position_of_quantile : " << position_of_quantile << ", and here is the array : " << std::endl;
for ( size_t i = 0 ; i != vector_of_distances.size() ; ++i )
{
std::cout << vector_of_distances[i] << " " ;
}
std::cout << std::endl;
}
//now we need to sort the vector_of_distances and find the quantile:
std::nth_element (vector_of_distances.begin(), vector_of_distances.begin()+position_of_quantile, vector_of_distances.end());
//for Hausdorff bootrstra I have to multily it by 2.
//In case of other bootsraps, I do not have to do it. We need a special variable saying if Ineed this multiplication or not.//This should be done outside the bootstrap, since the fact hat we need it do not come from bootstrab, but from geometry of bottleneck distance
if ( dbg )std::cout << "Result : " << vector_of_distances[ position_of_quantile ] << std::endl;
return vector_of_distances[ position_of_quantile ];
}//bootstrap
}//namespace Gudhi_stat
}//namespace Gudhi
#endif