From 8f9e4f64a2df82205a3a4551e0443b9e2a45edae Mon Sep 17 00:00:00 2001 From: mcarrier Date: Tue, 18 Sep 2018 20:48:24 +0000 Subject: templated Kernel parameter in Persistence heat maps git-svn-id: svn+ssh://scm.gforge.inria.fr/svnroot/gudhi/branches/kernels@3896 636b058d-ea47-450e-bf9e-a15bfbe3eedb Former-commit-id: b496194cbe5447fe040140e9fbef3beb39fd9fbc --- .../doc/Persistence_representations_doc.h | 2 +- .../example/persistence_heat_maps.cpp | 21 ++- .../include/gudhi/Persistence_heat_maps.h | 157 ++++++++++----------- .../include/gudhi/Sliced_Wasserstein.h | 10 +- .../gudhi/common_persistence_representations.h | 14 -- 5 files changed, 92 insertions(+), 112 deletions(-) (limited to 'src/Persistence_representations') diff --git a/src/Persistence_representations/doc/Persistence_representations_doc.h b/src/Persistence_representations/doc/Persistence_representations_doc.h index 091612ff..668904c9 100644 --- a/src/Persistence_representations/doc/Persistence_representations_doc.h +++ b/src/Persistence_representations/doc/Persistence_representations_doc.h @@ -238,7 +238,7 @@ namespace Persistence_representations { kernel methods (see \ref sec_persistence_kernels below for more details on kernels). Names can be a bit confusing so we recall that, with this second exact method, we implicitly define a kernel representation of diagrams that is built from a kernel between points - in the plane. Hence, we have two kernels here, which are independent. One is defined between points in the plane (its type in the code is Kernel2D), and is a parameter, + in the plane. Hence, we have two kernels here, which are independent. One is defined between points in the plane (its type in the code is Kernel2D), and is a template parameter, whereas the other is defined between persistence diagrams (it is the scalar product of the infinite-dimensional representations of the diagrams). \section sec_persistence_vectors Persistence vectors diff --git a/src/Persistence_representations/example/persistence_heat_maps.cpp b/src/Persistence_representations/example/persistence_heat_maps.cpp index 5874336e..c177c009 100644 --- a/src/Persistence_representations/example/persistence_heat_maps.cpp +++ b/src/Persistence_representations/example/persistence_heat_maps.cpp @@ -27,8 +27,9 @@ #include #include +using Gaussian_kernel = Gudhi::Persistence_representations::Gaussian_kernel_2D; using constant_scaling_function = Gudhi::Persistence_representations::constant_scaling_function; -using Persistence_heat_maps = Gudhi::Persistence_representations::Persistence_heat_maps; +using Persistence_heat_maps = Gudhi::Persistence_representations::Persistence_heat_maps; int main(int argc, char** argv) { // create two simple vectors with birth--death pairs: @@ -77,16 +78,14 @@ int main(int argc, char** argv) { // to compute scalar product of hm1 and hm2: std::cout << "Scalar product is : " << hm1.compute_scalar_product(hm2) << std::endl; - Gudhi::Persistence_representations::Kernel2D k = Gudhi::Persistence_representations::Gaussian_kernel(1.0); - - Persistence_heat_maps hm1k(persistence1, k); - Persistence_heat_maps hm2k(persistence2, k); - - Persistence_heat_maps hm1i(persistence1, 20, 20, 0, 11, 0, 11, k); - Persistence_heat_maps hm2i(persistence2, 20, 20, 0, 11, 0, 11, k); - - std::cout << "Scalar product computed with exact kernel is : " << hm1i.compute_scalar_product(hm2i) << std::endl; - std::cout << "Kernel value between PDs seen as functions is : " << hm1k.compute_scalar_product(hm2k) << std::endl; + // Mathieu's code ************************************************************************************************************ + Persistence_heat_maps hm1k(persistence1); + Persistence_heat_maps hm2k(persistence2); + Persistence_heat_maps hm1i(persistence1, 20, 20, 0, 11, 0, 11); + Persistence_heat_maps hm2i(persistence2, 20, 20, 0, 11, 0, 11); + std::cout << "Scalar product computed with exact 2D kernel on grid is : " << hm1i.compute_scalar_product(hm2i) << std::endl; + std::cout << "Scalar product computed with exact 2D kernel is : " << hm1k.compute_scalar_product(hm2k) << std::endl; + // *************************************************************************************************************************** return 0; } diff --git a/src/Persistence_representations/include/gudhi/Persistence_heat_maps.h b/src/Persistence_representations/include/gudhi/Persistence_heat_maps.h index 43f10b8c..b2354432 100644 --- a/src/Persistence_representations/include/gudhi/Persistence_heat_maps.h +++ b/src/Persistence_representations/include/gudhi/Persistence_heat_maps.h @@ -174,6 +174,14 @@ class weight_by_setting_maximal_interval_to_have_length_one { double letngth_of_maximal_interval; }; +// Mathieu's code *********************************************************************************************************************** +class Gaussian_kernel_2D { + public: + double operator()(const std::pair& p, const std::pair& q) const { + return (1.0/(std::sqrt(2*pi))) * std::exp(-((p.first-q.first)*(p.first-q.first) + (p.second-q.second)*(p.second-q.second)) / 2); } +}; +// ************************************************************************************************************************************** + /** * \class Persistence_heat_maps Persistence_heat_maps.h gudhi/Persistence_heat_maps.h * \brief A class implementing persistence heat maps. @@ -183,7 +191,7 @@ class weight_by_setting_maximal_interval_to_have_length_one { // This class implements the following concepts: Vectorized_topological_data, Topological_data_with_distances, // Real_valued_topological_data, Topological_data_with_averages, Topological_data_with_scalar_product -template +template class Persistence_heat_maps { public: /** @@ -212,7 +220,7 @@ class Persistence_heat_maps { *std::numeric_limits::max(), in which case the program compute the values based on the data. **/ Persistence_heat_maps(const std::vector >& interval, - std::vector > filter = create_Gaussian_filter(5, 1), + std::vector > filter, //= create_Gaussian_filter(5, 1), Mathieu: I had to comment this default parameter otherwise there is an ambiguity with constructor defined line 263 bool erase_below_diagonal = false, size_t number_of_pixels = 1000, double min_ = std::numeric_limits::max(), double max_ = std::numeric_limits::max()); @@ -244,19 +252,20 @@ class Persistence_heat_maps { double max_ = std::numeric_limits::max(), unsigned dimension = std::numeric_limits::max()); +// Mathieu's code *********************************************************************************************************************** /** - * Construction that takes as inputs (1) the diagram, (2) grid parameters (min, max and number of samples for x and y axes), and (3) a universal kernel on the plane used - * to turn the diagram into a function. + * Construction that takes as inputs (1) the diagram, and (2) the grid parameters (min, max and number of samples for x and y axes) **/ Persistence_heat_maps(const Persistence_diagram & interval, size_t number_of_x_pixels, size_t number_of_y_pixels, - double min_x = 0, double max_x = 1, double min_y = 0, double max_y = 1, const Kernel2D & kernel = Gaussian_kernel(1.0)); + double min_x = 0, double max_x = 1, double min_y = 0, double max_y = 1); /** - * Construction that takes as inputs (1) the diagram and (2) a universal kernel on the plane used - * to turn the diagram into a function. Note that this construction is infinite dimensional so - * only compute_scalar_product() method is valid after calling this constructor. + * Construction that takes only the diagram as input (weight and 2D kernel are template parameters) **/ - Persistence_heat_maps(const Persistence_diagram & interval, const Kernel2D & kernel = Gaussian_kernel(1.0)); + Persistence_heat_maps(const Persistence_diagram & interval); +// ************************************************************************************************************************************** + + /** * Compute a mean value of a collection of heat maps and store it in the current object. Note that all the persistence @@ -529,23 +538,17 @@ class Persistence_heat_maps { bool erase_below_diagonal = false, size_t number_of_pixels = 1000, double min_ = std::numeric_limits::max(), double max_ = std::numeric_limits::max()); - void construct_image_from_exact_universal_kernel(const Persistence_diagram & interval, - size_t number_of_x_pixels = 10, size_t number_of_y_pixels = 10, - double min_x = 0, double max_x = 1, double min_y = 0, double max_y = 1, const Kernel2D & kernel = Gaussian_kernel(1.0)); - void construct_kernel_from_exact_universal_kernel(const Persistence_diagram & interval, const Kernel2D & kernel = Gaussian_kernel(1.0)); - void set_up_parameters_for_basic_classes() { this->number_of_functions_for_vectorization = 1; this->number_of_functions_for_projections_to_reals = 1; } - // Boolean indicating if we are computing persistence image (true) or persistence weighted gaussian kernel (false) - bool discrete = true; - - // PWGK +// Mathieu's code *********************************************************************************************************************** + bool discrete = true; // Boolean indicating if we are computing persistence image (true) or persistence weighted gaussian kernel (false) Kernel2D k; Persistence_diagram d; std::vector weights; +// ************************************************************************************************************************************** // data Scalling_of_kernels f; @@ -555,61 +558,46 @@ class Persistence_heat_maps { std::vector > heat_map; }; -template -void Persistence_heat_maps::construct_image_from_exact_universal_kernel(const Persistence_diagram & diagram, - size_t number_of_x_pixels, size_t number_of_y_pixels, - double min_x, double max_x, - double min_y, double max_y, const Kernel2D & kernel) { - this->discrete = true; Scalling_of_kernels f; this->f = f; this->min_ = min_x; this->max_ = max_x; + +// Mathieu's code *********************************************************************************************************************** +template +Persistence_heat_maps::Persistence_heat_maps(const Persistence_diagram & diagram, + size_t number_of_x_pixels, size_t number_of_y_pixels, + double min_x, double max_x, + double min_y, double max_y) { + + this->discrete = true; this->min_ = min_x; this->max_ = max_x; this->heat_map.resize(number_of_y_pixels); double step_x = (max_x - min_x)/(number_of_x_pixels - 1); double step_y = (max_y - min_y)/(number_of_y_pixels - 1); int num_pts = diagram.size(); - for(size_t i = 0; i < number_of_y_pixels; i++){ double y = min_y + i*step_y; this->heat_map[i].reserve(number_of_x_pixels); for(size_t j = 0; j < number_of_x_pixels; j++){ double x = min_x + j*step_x; - std::pair grid_point(x,y); double pixel_value = 0; - for(int k = 0; k < num_pts; k++){ - pixel_value += this->f(diagram[k]) * kernel(diagram[k], grid_point); - } + for(int k = 0; k < num_pts; k++) pixel_value += this->f(diagram[k]) * this->k(diagram[k], grid_point); this->heat_map[i].push_back(pixel_value); - } } - -} - - -template -Persistence_heat_maps::Persistence_heat_maps(const Persistence_diagram & diagram, - size_t number_of_x_pixels, size_t number_of_y_pixels, - double min_x, double max_x, - double min_y, double max_y, const Kernel2D & kernel) { - this->construct_image_from_exact_universal_kernel(diagram, number_of_x_pixels, number_of_y_pixels, min_x, max_x, min_y, max_y, kernel); this->set_up_parameters_for_basic_classes(); } -template -void Persistence_heat_maps::construct_kernel_from_exact_universal_kernel(const Persistence_diagram & diagram, const Kernel2D & kernel){ - this->discrete = false; Scalling_of_kernels f; this->f = f; this->k = kernel; this->d = diagram; +template +Persistence_heat_maps::Persistence_heat_maps(const Persistence_diagram& diagram) { + this->discrete = false; this->d = diagram; int num_pts = this->d.size(); for (int i = 0; i < num_pts; i++) this->weights.push_back(this->f(this->d[i])); + this->set_up_parameters_for_basic_classes(); } +// ************************************************************************************************************************************** -template -Persistence_heat_maps::Persistence_heat_maps(const Persistence_diagram& diagram, const Kernel2D & kernel) { - this->construct_kernel_from_exact_universal_kernel(diagram, kernel); - this->set_up_parameters_for_basic_classes(); -} // if min_ == max_, then the program is requested to set up the values itself based on persistence intervals -template -void Persistence_heat_maps::construct(const std::vector >& intervals_, +template +void Persistence_heat_maps::construct(const std::vector >& intervals_, std::vector > filter, bool erase_below_diagonal, size_t number_of_pixels, double min_, double max_) { @@ -713,16 +701,16 @@ void Persistence_heat_maps::construct(const std::vector -Persistence_heat_maps::Persistence_heat_maps( +template +Persistence_heat_maps::Persistence_heat_maps( const std::vector >& interval, std::vector > filter, bool erase_below_diagonal, size_t number_of_pixels, double min_, double max_) { this->construct(interval, filter, erase_below_diagonal, number_of_pixels, min_, max_); this->set_up_parameters_for_basic_classes(); } -template -Persistence_heat_maps::Persistence_heat_maps(const char* filename, +template +Persistence_heat_maps::Persistence_heat_maps(const char* filename, std::vector > filter, bool erase_below_diagonal, size_t number_of_pixels, double min_, double max_, unsigned dimension) { @@ -736,8 +724,8 @@ Persistence_heat_maps::Persistence_heat_maps(const char* fi this->set_up_parameters_for_basic_classes(); } -template -std::vector > Persistence_heat_maps::check_and_initialize_maps( +template +std::vector > Persistence_heat_maps::check_and_initialize_maps( const std::vector& maps) { // checking if all the heat maps are of the same size: for (size_t i = 0; i != maps.size(); ++i) { @@ -758,8 +746,8 @@ std::vector > Persistence_heat_maps::ch return heat_maps; } -template -void Persistence_heat_maps::compute_median(const std::vector& maps) { +template +void Persistence_heat_maps::compute_median(const std::vector& maps) { std::vector > heat_maps = this->check_and_initialize_maps(maps); std::vector to_compute_median(maps.size()); @@ -778,8 +766,8 @@ void Persistence_heat_maps::compute_median(const std::vecto this->max_ = maps[0]->max_; } -template -void Persistence_heat_maps::compute_mean(const std::vector& maps) { +template +void Persistence_heat_maps::compute_mean(const std::vector& maps) { std::vector > heat_maps = this->check_and_initialize_maps(maps); for (size_t i = 0; i != heat_maps.size(); ++i) { for (size_t j = 0; j != heat_maps[i].size(); ++j) { @@ -795,8 +783,8 @@ void Persistence_heat_maps::compute_mean(const std::vector< this->max_ = maps[0]->max_; } -template -void Persistence_heat_maps::compute_percentage_of_active( +template +void Persistence_heat_maps::compute_percentage_of_active( const std::vector& maps, size_t cutoff) { std::vector > heat_maps = this->check_and_initialize_maps(maps); @@ -818,8 +806,8 @@ void Persistence_heat_maps::compute_percentage_of_active( this->max_ = maps[0]->max_; } -template -void Persistence_heat_maps::plot(const char* filename) const { +template +void Persistence_heat_maps::plot(const char* filename) const { std::ofstream out; std::stringstream gnuplot_script; gnuplot_script << filename << "_GnuplotScript"; @@ -837,8 +825,8 @@ void Persistence_heat_maps::plot(const char* filename) cons << gnuplot_script.str().c_str() << "\'\"" << std::endl; } -template -void Persistence_heat_maps::print_to_file(const char* filename) const { +template +void Persistence_heat_maps::print_to_file(const char* filename) const { std::ofstream out; out.open(filename); @@ -853,8 +841,8 @@ void Persistence_heat_maps::print_to_file(const char* filen out.close(); } -template -void Persistence_heat_maps::load_from_file(const char* filename) { +template +void Persistence_heat_maps::load_from_file(const char* filename) { bool dbg = false; std::ifstream in; @@ -902,8 +890,8 @@ void Persistence_heat_maps::load_from_file(const char* file } // Concretizations of virtual methods: -template -std::vector Persistence_heat_maps::vectorize(int number_of_function) const { +template +std::vector Persistence_heat_maps::vectorize(int number_of_function) const { std::vector result; if(!discrete){std::cout << "No vectorize method in case of infinite dimensional vectorization" << std::endl; return result;} @@ -925,8 +913,8 @@ std::vector Persistence_heat_maps::vectorize(int nu return result; } -template -double Persistence_heat_maps::distance(const Persistence_heat_maps& second, double power) const { +template +double Persistence_heat_maps::distance(const Persistence_heat_maps& second, double power) const { if(this->discrete){ // first we need to check if (*this) and second are defined on the same domain and have the same dimensions: if (!this->check_if_the_same(second)) { @@ -962,8 +950,8 @@ double Persistence_heat_maps::distance(const Persistence_he } } -template -double Persistence_heat_maps::project_to_R(int number_of_function) const { +template +double Persistence_heat_maps::project_to_R(int number_of_function) const { double result = 0; for (size_t i = 0; i != this->heat_map.size(); ++i) { for (size_t j = 0; j != this->heat_map[i].size(); ++j) { @@ -973,14 +961,14 @@ double Persistence_heat_maps::project_to_R(int number_of_fu return result; } -template -void Persistence_heat_maps::compute_average( +template +void Persistence_heat_maps::compute_average( const std::vector& to_average) { this->compute_mean(to_average); } -template -double Persistence_heat_maps::compute_scalar_product(const Persistence_heat_maps& second) const { +template +double Persistence_heat_maps::compute_scalar_product(const Persistence_heat_maps& second) const { if(discrete){ // first we need to check if (*this) and second are defined on the same domain and have the same dimensions: @@ -1001,14 +989,19 @@ double Persistence_heat_maps::compute_scalar_product(const return scalar_prod; } +// Mathieu's code *********************************************************************************************************************** else{ int num_pts1 = this->d.size(); int num_pts2 = second.d.size(); double kernel_val = 0; - for(int i = 0; i < num_pts1; i++) - for(int j = 0; j < num_pts2; j++) - kernel_val += this->weights[i] * second.weights[j] * this->k(this->d[i], second.d[j]); + for(int i = 0; i < num_pts1; i++){ + std::pair pi = this->d[i]; + for(int j = 0; j < num_pts2; j++){ + std::pair pj = second.d[j]; + kernel_val += this->weights[i] * second.weights[j] * this->k(pi, pj); + } + } return kernel_val; } - +// ************************************************************************************************************************************** } diff --git a/src/Persistence_representations/include/gudhi/Sliced_Wasserstein.h b/src/Persistence_representations/include/gudhi/Sliced_Wasserstein.h index 8bdcef65..79fe8690 100644 --- a/src/Persistence_representations/include/gudhi/Sliced_Wasserstein.h +++ b/src/Persistence_representations/include/gudhi/Sliced_Wasserstein.h @@ -49,12 +49,14 @@ namespace Persistence_representations { * * where \f$\pi_\theta\f$ is the projection onto the line defined with angle \f$\theta\f$ in the unit circle \f$\mathbb{S}\f$, * and \f$\pi_\Delta\f$ is the projection onto the diagonal. - * The integral can be either computed exactly in \f$O(n^2{\rm log}(n))\f$ time, where \f$n\f$ is the number of points - * in the diagrams, or approximated by sampling \f$N\f$ lines in the circle in \f$O(Nn{\rm log}(n))\f$ time. The Sliced Wasserstein Kernel is then computed as: + * Assuming that the diagrams are in general position (i.e. there is no collinear triple), the integral can be computed exactly in \f$O(n^2{\rm log}(n))\f$ time, where \f$n\f$ is the number of points + * in the diagrams. We provide two approximations of the integral: one in which we slightly perturb the diagram points so that they are in general position, + * and another in which we approximate the integral by sampling \f$N\f$ lines in the circle in \f$O(Nn{\rm log}(n))\f$ time. The Sliced Wasserstein Kernel is then computed as: * * \f$ k(D_1,D_2) = {\rm exp}\left(-\frac{SW(D_1,D_2)}{2\sigma^2}\right).\f$ * - * For more details, please see \cite pmlr-v70-carriere17a . + * The first method is usually much more accurate but also + * much slower. For more details, please see \cite pmlr-v70-carriere17a . * **/ @@ -261,7 +263,7 @@ class Sliced_Wasserstein { * * @param[in] _diagram persistence diagram. * @param[in] _sigma bandwidth parameter. - * @param[in] _approx number of directions used to approximate the integral in the Sliced Wasserstein distance, set to -1 for exact computation. If positive, then projections of the diagram + * @param[in] _approx number of directions used to approximate the integral in the Sliced Wasserstein distance, set to -1 for random perturbation. If positive, then projections of the diagram * points on all directions are stored in memory to reduce computation time. * */ diff --git a/src/Persistence_representations/include/gudhi/common_persistence_representations.h b/src/Persistence_representations/include/gudhi/common_persistence_representations.h index a51fcfe5..6fed019a 100644 --- a/src/Persistence_representations/include/gudhi/common_persistence_representations.h +++ b/src/Persistence_representations/include/gudhi/common_persistence_representations.h @@ -42,20 +42,6 @@ static constexpr double pi = boost::math::constants::pi(); */ using Persistence_diagram = std::vector >; -/** - * In this module, we use the name Kernel2D for the representation of a function taking a pair of two points in the plane and returning a double. - */ -using Kernel2D = std::function, std::pair )>; - -inline Kernel2D Gaussian_kernel(double sigma){ - return [=](std::pair p, std::pair q){return (1.0 / (std::sqrt(2*pi)*sigma)) * std::exp( -((p.first-q.first)*(p.first-q.first) + (p.second-q.second)*(p.second-q.second)) / (2*sigma*sigma) );}; -} - -inline Kernel2D polynomial_kernel(double c, double d){ - return [=](std::pair p, std::pair q){return std::pow( p.first*q.first + p.second*q.second + c, d);}; -} - - // double epsi = std::numeric_limits::epsilon(); double epsi = 0.000005; -- cgit v1.2.3