summaryrefslogtreecommitdiff
path: root/src/Persistence_representations
diff options
context:
space:
mode:
Diffstat (limited to 'src/Persistence_representations')
-rw-r--r--src/Persistence_representations/doc/Persistence_representations_doc.h32
-rw-r--r--src/Persistence_representations/example/CMakeLists.txt4
-rw-r--r--src/Persistence_representations/example/persistence_heat_maps.cpp12
-rw-r--r--src/Persistence_representations/example/sliced_wasserstein.cpp59
-rw-r--r--src/Persistence_representations/include/gudhi/Persistence_heat_maps.h173
-rw-r--r--src/Persistence_representations/include/gudhi/Persistence_landscape_on_grid.h6
-rw-r--r--src/Persistence_representations/include/gudhi/Sliced_Wasserstein.h317
-rw-r--r--src/Persistence_representations/include/gudhi/common_persistence_representations.h36
-rw-r--r--src/Persistence_representations/test/CMakeLists.txt5
-rw-r--r--src/Persistence_representations/test/kernels.cpp55
10 files changed, 654 insertions, 45 deletions
diff --git a/src/Persistence_representations/doc/Persistence_representations_doc.h b/src/Persistence_representations/doc/Persistence_representations_doc.h
index 4d850a02..3aa99315 100644
--- a/src/Persistence_representations/doc/Persistence_representations_doc.h
+++ b/src/Persistence_representations/doc/Persistence_representations_doc.h
@@ -227,6 +227,9 @@ namespace Persistence_representations {
to diagonal are given then sometimes the kernel have support that reaches the region
below the diagonal. If the value of this parameter is true, then the values below diagonal can be erased.
+ We also provide two methods to perform exact calculations. In both methods, the kernel is no longer provided as a filter (i.e. a square matrix---see parameters above), but rather as
+ a function assigning a real value to a 2D point. In the first method, calculations are done on a grid, leading to a finite-dimensional representation.
+ On the other hand, in the second method, we do not use grids, meaning that diagrams are represented as functions. Thus, only scalar products are available.
\section sec_persistence_vectors Persistence vectors
<b>Reference manual:</b> \ref Gudhi::Persistence_representations::Vector_distances_in_diagram <br>
@@ -250,6 +253,35 @@ namespace Persistence_representations {
absolute value of differences between coordinates. A scalar product is a sum of products of
values at the corresponding positions of two vectors.
+ \section sec_persistence_kernels Kernels on persistence diagrams
+ <b>Reference manual:</b> \ref Gudhi::Persistence_representations::Sliced_Wasserstein <br>
+ <b>Reference manual:</b> \ref Gudhi::Persistence_representations::Persistence_weighted_gaussian <br>
+
+ Kernels for persistence diagrams can be regarded as infinite-dimensional vectorizations. More specifically,
+ they are similarity functions whose evaluations on pairs of persistence diagrams equals the scalar products
+ between images of these pairs under a map \f$\Phi\f$ taking values in a specific (possibly non Euclidean) Hilbert space \f$k(D_i, D_j) = \langle \Phi(D_i),\Phi(D_j)\rangle\f$.
+ Reciprocally, classical results of learning theory ensure that such a \f$\Phi\f$ exists for a given similarity function \f$k\f$ if and only if \f$k\f$ is <i>positive semi-definite</i>.
+ Kernels are designed for algorithms that can be <i>kernelized</i>, i.e., algorithms that only require to know scalar products between instances in order to run.
+ Examples of such algorithms include Support Vector Machines, Principal Component Analysis and Ridge Regression.
+
+ There have been several attempts at defining kernels, i.e., positive semi-definite functions, between persistence diagrams within the last few years. We provide implementation
+ for the <i>Sliced Wasserstein Kernel</i>---see \cite pmlr-v70-carriere17a, which takes the form of a Gaussian kernel with a specific distance between persistence diagrams
+ called the <i>Sliced Wasserstein Distance</i>: \f$k(D_1,D_2)={\rm exp}\left(-\frac{SW(D_1,D_2)}{2\sigma^2}\right)\f$. Other kernels such as the Persistence Weighted Gaussian Kernel or
+ the Persistence Scale Space Kernel are implemented in Persistence_heat_maps.
+
+ When launching:
+
+ \code $> ./Sliced_Wasserstein
+ \endcode
+
+ the program output is:
+
+ \code $> Approx SW kernel: 0.0693743
+ $> Exact SW kernel: 0.0693218
+ $> Distance induced by approx SW kernel: 1.36428
+ $> Distance induced by exact SW kernel: 1.3643
+ \endcode
+
*/
/** @} */ // end defgroup Persistence_representations
diff --git a/src/Persistence_representations/example/CMakeLists.txt b/src/Persistence_representations/example/CMakeLists.txt
index 33558df3..a7c6ef39 100644
--- a/src/Persistence_representations/example/CMakeLists.txt
+++ b/src/Persistence_representations/example/CMakeLists.txt
@@ -26,3 +26,7 @@ add_test(NAME Persistence_representations_example_heat_maps
COMMAND $<TARGET_FILE:Persistence_representations_example_heat_maps>)
install(TARGETS Persistence_representations_example_heat_maps DESTINATION bin)
+add_executable ( Sliced_Wasserstein sliced_wasserstein.cpp )
+add_test(NAME Sliced_Wasserstein
+ COMMAND $<TARGET_FILE:Sliced_Wasserstein>)
+install(TARGETS Sliced_Wasserstein DESTINATION bin)
diff --git a/src/Persistence_representations/example/persistence_heat_maps.cpp b/src/Persistence_representations/example/persistence_heat_maps.cpp
index 323b57e9..5874336e 100644
--- a/src/Persistence_representations/example/persistence_heat_maps.cpp
+++ b/src/Persistence_representations/example/persistence_heat_maps.cpp
@@ -21,6 +21,7 @@
*/
#include <gudhi/Persistence_heat_maps.h>
+#include <gudhi/common_persistence_representations.h>
#include <iostream>
#include <vector>
@@ -76,5 +77,16 @@ int main(int argc, char** argv) {
// to compute scalar product of hm1 and hm2:
std::cout << "Scalar product is : " << hm1.compute_scalar_product(hm2) << std::endl;
+ Gudhi::Persistence_representations::Kernel2D k = Gudhi::Persistence_representations::Gaussian_kernel(1.0);
+
+ Persistence_heat_maps hm1k(persistence1, k);
+ Persistence_heat_maps hm2k(persistence2, k);
+
+ Persistence_heat_maps hm1i(persistence1, 20, 20, 0, 11, 0, 11, k);
+ Persistence_heat_maps hm2i(persistence2, 20, 20, 0, 11, 0, 11, k);
+
+ std::cout << "Scalar product computed with exact kernel is : " << hm1i.compute_scalar_product(hm2i) << std::endl;
+ std::cout << "Kernel value between PDs seen as functions is : " << hm1k.compute_scalar_product(hm2k) << std::endl;
+
return 0;
}
diff --git a/src/Persistence_representations/example/sliced_wasserstein.cpp b/src/Persistence_representations/example/sliced_wasserstein.cpp
new file mode 100644
index 00000000..089172a0
--- /dev/null
+++ b/src/Persistence_representations/example/sliced_wasserstein.cpp
@@ -0,0 +1,59 @@
+/* This file is part of the Gudhi Library. The Gudhi library
+ * (Geometric Understanding in Higher Dimensions) is a generic C++
+ * library for computational topology.
+ *
+ * Author(s): Mathieu Carriere
+ *
+ * Copyright (C) 2018 INRIA (France)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <gudhi/Sliced_Wasserstein.h>
+
+#include <iostream>
+#include <vector>
+#include <utility>
+
+using Persistence_diagram = Gudhi::Persistence_representations::Persistence_diagram;
+using SW = Gudhi::Persistence_representations::Sliced_Wasserstein;
+
+int main(int argc, char** argv) {
+
+ Persistence_diagram persistence1, persistence2;
+
+ persistence1.push_back(std::make_pair(1, 2));
+ persistence1.push_back(std::make_pair(6, 8));
+ persistence1.push_back(std::make_pair(0, 4));
+ persistence1.push_back(std::make_pair(3, 8));
+
+ persistence2.push_back(std::make_pair(2, 9));
+ persistence2.push_back(std::make_pair(1, 6));
+ persistence2.push_back(std::make_pair(3, 5));
+ persistence2.push_back(std::make_pair(6, 10));
+
+
+ SW sw1(persistence1, 1, 100);
+ SW sw2(persistence2, 1, 100);
+
+ SW swex1(persistence1, 1, -1);
+ SW swex2(persistence2, 1, -1);
+
+ std::cout << "Approx SW kernel: " << sw1.compute_scalar_product(sw2) << std::endl;
+ std::cout << "Exact SW kernel: " << swex1.compute_scalar_product(swex2) << std::endl;
+ std::cout << "Distance induced by approx SW kernel: " << sw1.distance(sw2) << std::endl;
+ std::cout << "Distance induced by exact SW kernel: " << swex1.distance(swex2) << std::endl;
+
+ return 0;
+}
diff --git a/src/Persistence_representations/include/gudhi/Persistence_heat_maps.h b/src/Persistence_representations/include/gudhi/Persistence_heat_maps.h
index 35e51e63..12188526 100644
--- a/src/Persistence_representations/include/gudhi/Persistence_heat_maps.h
+++ b/src/Persistence_representations/include/gudhi/Persistence_heat_maps.h
@@ -245,6 +245,20 @@ class Persistence_heat_maps {
unsigned dimension = std::numeric_limits<unsigned>::max());
/**
+ * Construction that takes as inputs (1) the diagram, (2) grid parameters (min, max and number of samples for x and y axes), and (3) a universal kernel on the plane used
+ * to turn the diagram into a function.
+ **/
+ Persistence_heat_maps(const Persistence_diagram & interval, size_t number_of_x_pixels, size_t number_of_y_pixels,
+ double min_x = 0, double max_x = 1, double min_y = 0, double max_y = 1, const Kernel2D & kernel = Gaussian_kernel(1.0));
+
+ /**
+ * Construction that takes as inputs (1) the diagram and (2) a universal kernel on the plane used
+ * to turn the diagram into a function. Note that this construction is infinite dimensional so
+ * only compute_scalar_product() method is valid after calling this constructor.
+ **/
+ Persistence_heat_maps(const Persistence_diagram & interval, const Kernel2D & kernel = Gaussian_kernel(1.0));
+
+ /**
* Compute a mean value of a collection of heat maps and store it in the current object. Note that all the persistence
*maps send in a vector to this procedure need to have the same parameters.
* If this is not the case, the program will throw an exception.
@@ -512,15 +526,27 @@ class Persistence_heat_maps {
size_t number_of_functions_for_projections_to_reals;
void construct(const std::vector<std::pair<double, double> >& intervals_,
std::vector<std::vector<double> > filter = create_Gaussian_filter(5, 1),
-
bool erase_below_diagonal = false, size_t number_of_pixels = 1000,
double min_ = std::numeric_limits<double>::max(), double max_ = std::numeric_limits<double>::max());
+ void construct_image_from_exact_universal_kernel(const Persistence_diagram & interval,
+ size_t number_of_x_pixels = 10, size_t number_of_y_pixels = 10,
+ double min_x = 0, double max_x = 1, double min_y = 0, double max_y = 1, const Kernel2D & kernel = Gaussian_kernel(1.0));
+ void construct_kernel_from_exact_universal_kernel(const Persistence_diagram & interval, const Kernel2D & kernel = Gaussian_kernel(1.0));
+
void set_up_parameters_for_basic_classes() {
this->number_of_functions_for_vectorization = 1;
this->number_of_functions_for_projections_to_reals = 1;
}
+ // Boolean indicating if we are computing persistence image (true) or persistence weighted gaussian kernel (false)
+ bool discrete = true;
+
+ // PWGK
+ Kernel2D k;
+ Persistence_diagram d;
+ std::vector<double> weights;
+
// data
Scalling_of_kernels f;
bool erase_below_diagonal;
@@ -529,6 +555,58 @@ class Persistence_heat_maps {
std::vector<std::vector<double> > heat_map;
};
+template <typename Scalling_of_kernels>
+void Persistence_heat_maps<Scalling_of_kernels>::construct_image_from_exact_universal_kernel(const Persistence_diagram & diagram,
+ size_t number_of_x_pixels, size_t number_of_y_pixels,
+ double min_x, double max_x,
+ double min_y, double max_y, const Kernel2D & kernel) {
+
+ this->discrete = true; Scalling_of_kernels f; this->f = f; this->min_ = min_x; this->max_ = max_x;
+ this->heat_map.resize(number_of_y_pixels);
+ double step_x = (max_x - min_x)/(number_of_x_pixels - 1); double step_y = (max_y - min_y)/(number_of_y_pixels - 1);
+
+ int num_pts = diagram.size();
+
+ for(size_t i = 0; i < number_of_y_pixels; i++){
+ double y = min_y + i*step_y; this->heat_map[i].reserve(number_of_x_pixels);
+ for(size_t j = 0; j < number_of_x_pixels; j++){
+ double x = min_x + j*step_x;
+
+ std::pair<double, double> grid_point(x,y); double pixel_value = 0;
+ for(int k = 0; k < num_pts; k++){
+ pixel_value += this->f(diagram[k]) * kernel(diagram[k], grid_point);
+ }
+ this->heat_map[i].push_back(pixel_value);
+
+ }
+ }
+
+}
+
+
+template <typename Scalling_of_kernels>
+Persistence_heat_maps<Scalling_of_kernels>::Persistence_heat_maps(const Persistence_diagram & diagram,
+ size_t number_of_x_pixels, size_t number_of_y_pixels,
+ double min_x, double max_x,
+ double min_y, double max_y, const Kernel2D & kernel) {
+ this->construct_image_from_exact_universal_kernel(diagram, number_of_x_pixels, number_of_y_pixels, min_x, max_x, min_y, max_y, kernel);
+ this->set_up_parameters_for_basic_classes();
+}
+
+template <typename Scalling_of_kernels>
+void Persistence_heat_maps<Scalling_of_kernels>::construct_kernel_from_exact_universal_kernel(const Persistence_diagram & diagram, const Kernel2D & kernel){
+ this->discrete = false; Scalling_of_kernels f; this->f = f; this->k = kernel; this->d = diagram;
+ int num_pts = this->d.size();
+ for (int i = 0; i < num_pts; i++) this->weights.push_back(this->f(this->d[i]));
+}
+
+
+template <typename Scalling_of_kernels>
+Persistence_heat_maps<Scalling_of_kernels>::Persistence_heat_maps(const Persistence_diagram& diagram, const Kernel2D & kernel) {
+ this->construct_kernel_from_exact_universal_kernel(diagram, kernel);
+ this->set_up_parameters_for_basic_classes();
+}
+
// if min_ == max_, then the program is requested to set up the values itself based on persistence intervals
template <typename Scalling_of_kernels>
void Persistence_heat_maps<Scalling_of_kernels>::construct(const std::vector<std::pair<double, double> >& intervals_,
@@ -826,13 +904,16 @@ void Persistence_heat_maps<Scalling_of_kernels>::load_from_file(const char* file
// Concretizations of virtual methods:
template <typename Scalling_of_kernels>
std::vector<double> Persistence_heat_maps<Scalling_of_kernels>::vectorize(int number_of_function) const {
+
+ std::vector<double> result;
+ if(!discrete){std::cout << "No vectorize method in case of infinite dimensional vectorization" << std::endl; return result;}
+
// convert this->heat_map into one large vector:
size_t size_of_result = 0;
for (size_t i = 0; i != this->heat_map.size(); ++i) {
size_of_result += this->heat_map[i].size();
}
- std::vector<double> result;
result.reserve(size_of_result);
for (size_t i = 0; i != this->heat_map.size(); ++i) {
@@ -846,34 +927,39 @@ std::vector<double> Persistence_heat_maps<Scalling_of_kernels>::vectorize(int nu
template <typename Scalling_of_kernels>
double Persistence_heat_maps<Scalling_of_kernels>::distance(const Persistence_heat_maps& second, double power) const {
- // first we need to check if (*this) and second are defined on the same domain and have the same dimensions:
- if (!this->check_if_the_same(second)) {
- std::cerr << "The persistence images are of non compatible sizes. We cannot therefore compute distance between "
- "them. The program will now terminate";
- throw "The persistence images are of non compatible sizes. The program will now terminate";
- }
+ if(this->discrete){
+ // first we need to check if (*this) and second are defined on the same domain and have the same dimensions:
+ if (!this->check_if_the_same(second)) {
+ std::cerr << "The persistence images are of non compatible sizes. We cannot therefore compute distance between "
+ "them. The program will now terminate";
+ throw "The persistence images are of non compatible sizes. The program will now terminate";
+ }
- // if we are here, we know that the two persistence images are defined on the same domain, so we can start computing
- // their distances:
+ // if we are here, we know that the two persistence images are defined on the same domain, so we can start computing their distances:
- double distance = 0;
- if (power < std::numeric_limits<double>::max()) {
- for (size_t i = 0; i != this->heat_map.size(); ++i) {
- for (size_t j = 0; j != this->heat_map[i].size(); ++j) {
- distance += pow(fabs(this->heat_map[i][j] - second.heat_map[i][j]), power);
+ double distance = 0;
+ if (power < std::numeric_limits<double>::max()) {
+ for (size_t i = 0; i != this->heat_map.size(); ++i) {
+ for (size_t j = 0; j != this->heat_map[i].size(); ++j) {
+ distance += pow(fabs(this->heat_map[i][j] - second.heat_map[i][j]), power);
+ }
}
- }
- } else {
- // in this case, we compute max norm distance
- for (size_t i = 0; i != this->heat_map.size(); ++i) {
- for (size_t j = 0; j != this->heat_map[i].size(); ++j) {
- if (distance < fabs(this->heat_map[i][j] - second.heat_map[i][j])) {
- distance = fabs(this->heat_map[i][j] - second.heat_map[i][j]);
+ } else {
+ // in this case, we compute max norm distance
+ for (size_t i = 0; i != this->heat_map.size(); ++i) {
+ for (size_t j = 0; j != this->heat_map[i].size(); ++j) {
+ if (distance < fabs(this->heat_map[i][j] - second.heat_map[i][j])) {
+ distance = fabs(this->heat_map[i][j] - second.heat_map[i][j]);
+ }
}
}
}
+ return distance;
+ } else {
+
+ return std::sqrt(this->compute_scalar_product(*this) + second.compute_scalar_product(second) -2 * this->compute_scalar_product(second));
+
}
- return distance;
}
template <typename Scalling_of_kernels>
@@ -895,22 +981,37 @@ void Persistence_heat_maps<Scalling_of_kernels>::compute_average(
template <typename Scalling_of_kernels>
double Persistence_heat_maps<Scalling_of_kernels>::compute_scalar_product(const Persistence_heat_maps& second) const {
- // first we need to check if (*this) and second are defined on the same domain and have the same dimensions:
- if (!this->check_if_the_same(second)) {
- std::cerr << "The persistence images are of non compatible sizes. We cannot therefore compute distance between "
- "them. The program will now terminate";
- throw "The persistence images are of non compatible sizes. The program will now terminate";
- }
- // if we are here, we know that the two persistence images are defined on the same domain, so we can start computing
- // their scalar product:
- double scalar_prod = 0;
- for (size_t i = 0; i != this->heat_map.size(); ++i) {
- for (size_t j = 0; j != this->heat_map[i].size(); ++j) {
- scalar_prod += this->heat_map[i][j] * second.heat_map[i][j];
+ if(discrete){
+ // first we need to check if (*this) and second are defined on the same domain and have the same dimensions:
+ if (!this->check_if_the_same(second)) {
+ std::cerr << "The persistence images are of non compatible sizes. We cannot therefore compute distance between "
+ "them. The program will now terminate";
+ throw "The persistence images are of non compatible sizes. The program will now terminate";
+ }
+
+ // if we are here, we know that the two persistence images are defined on the same domain, so we can start computing
+ // their scalar product:
+ double scalar_prod = 0;
+ for (size_t i = 0; i != this->heat_map.size(); ++i) {
+ for (size_t j = 0; j != this->heat_map[i].size(); ++j) {
+ scalar_prod += this->heat_map[i][j] * second.heat_map[i][j];
+ }
}
+ return scalar_prod;
+ }
+
+ else{
+ GUDHI_CHECK(this->approx != second.approx || this->f != second.f, std::invalid_argument("Error: different values for representations"));
+
+ int num_pts1 = this->d.size(); int num_pts2 = second.d.size(); double kernel_val = 0;
+ for(int i = 0; i < num_pts1; i++)
+ for(int j = 0; j < num_pts2; j++)
+ kernel_val += this->weights[i] * second.weights[j] * this->k(this->d[i], second.d[j]);
+ return kernel_val;
}
- return scalar_prod;
+
+
}
} // namespace Persistence_representations
diff --git a/src/Persistence_representations/include/gudhi/Persistence_landscape_on_grid.h b/src/Persistence_representations/include/gudhi/Persistence_landscape_on_grid.h
index fd8a181c..db0e362a 100644
--- a/src/Persistence_representations/include/gudhi/Persistence_landscape_on_grid.h
+++ b/src/Persistence_representations/include/gudhi/Persistence_landscape_on_grid.h
@@ -986,7 +986,7 @@ void Persistence_landscape_on_grid::set_up_values_of_landscapes(const std::vecto
for (size_t int_no = 0; int_no != p.size(); ++int_no) {
size_t grid_interval_begin = (p[int_no].first - grid_min_) / dx;
size_t grid_interval_end = (p[int_no].second - grid_min_) / dx;
- size_t grid_interval_midpoint = (size_t)(0.5 * (grid_interval_begin + grid_interval_end));
+ size_t grid_interval_midpoint = (size_t)(0.5 * (p[int_no].first + p[int_no].second) - grid_min + 1);
if (dbg) {
std::cerr << "Considering an interval : " << p[int_no].first << "," << p[int_no].second << std::endl;
@@ -996,7 +996,7 @@ void Persistence_landscape_on_grid::set_up_values_of_landscapes(const std::vecto
std::cerr << "grid_interval_midpoint : " << grid_interval_midpoint << std::endl;
}
- double landscape_value = dx;
+ double landscape_value = grid_min + dx * (grid_interval_begin + 1) - p[int_no].first;
for (size_t i = grid_interval_begin + 1; i < grid_interval_midpoint; ++i) {
if (dbg) {
std::cerr << "Adding landscape value (going up) for a point : " << i << " equal : " << landscape_value
@@ -1030,6 +1030,8 @@ void Persistence_landscape_on_grid::set_up_values_of_landscapes(const std::vecto
}
landscape_value += dx;
}
+
+ landscape_value = p[int_no].second - grid_min - dx * grid_interval_midpoint;
for (size_t i = grid_interval_midpoint; i <= grid_interval_end; ++i) {
if (landscape_value > 0) {
if (number_of_levels != std::numeric_limits<unsigned>::max()) {
diff --git a/src/Persistence_representations/include/gudhi/Sliced_Wasserstein.h b/src/Persistence_representations/include/gudhi/Sliced_Wasserstein.h
new file mode 100644
index 00000000..a3c0dc2f
--- /dev/null
+++ b/src/Persistence_representations/include/gudhi/Sliced_Wasserstein.h
@@ -0,0 +1,317 @@
+/* This file is part of the Gudhi Library. The Gudhi library
+ * (Geometric Understanding in Higher Dimensions) is a generic C++
+ * library for computational topology.
+ *
+ * Author(s): Mathieu Carriere
+ *
+ * Copyright (C) 2018 INRIA (France)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef SLICED_WASSERSTEIN_H_
+#define SLICED_WASSERSTEIN_H_
+
+// gudhi include
+#include <gudhi/read_persistence_from_file.h>
+#include <gudhi/common_persistence_representations.h>
+#include <gudhi/Debug_utils.h>
+
+namespace Gudhi {
+namespace Persistence_representations {
+
+/**
+ * \class Sliced_Wasserstein gudhi/Sliced_Wasserstein.h
+ * \brief A class implementing the Sliced Wasserstein kernel.
+ *
+ * \ingroup Persistence_representations
+ *
+ * \details
+ * The Sliced Wasserstein kernel is defined as a Gaussian-like kernel between persistence diagrams, where the distance used for
+ * comparison is the Sliced Wasserstein distance \f$SW\f$ between persistence diagrams, defined as the integral of the 1-norm
+ * between the sorted projections of the diagrams onto all lines passing through the origin:
+ *
+ * \f$ SW(D_1,D_2)=\int_{\theta\in\mathbb{S}}\,\|\pi_\theta(D_1\cup\pi_\Delta(D_2))-\pi_\theta(D_2\cup\pi_\Delta(D_1))\|_1{\rm d}\theta\f$,
+ *
+ * where \f$\pi_\theta\f$ is the projection onto the line defined with angle \f$\theta\f$ in the unit circle \f$\mathbb{S}\f$,
+ * and \f$\pi_\Delta\f$ is the projection onto the diagonal.
+ * The integral can be either computed exactly in \f$O(n^2{\rm log}(n))\f$ time, where \f$n\f$ is the number of points
+ * in the diagrams, or approximated by sampling \f$N\f$ lines in the circle in \f$O(Nn{\rm log}(n))\f$ time. The Sliced Wasserstein Kernel is then computed as:
+ *
+ * \f$ k(D_1,D_2) = {\rm exp}\left(-\frac{SW(D_1,D_2)}{2\sigma^2}\right).\f$
+ *
+ * For more details, please see \cite pmlr-v70-carriere17a .
+ *
+**/
+
+class Sliced_Wasserstein {
+
+ protected:
+
+ Persistence_diagram diagram;
+ int approx;
+ double sigma;
+ std::vector<std::vector<double> > projections, projections_diagonal;
+
+ // **********************************
+ // Utils.
+ // **********************************
+
+ void build_rep(){
+
+ if(approx > 0){
+
+ double step = pi/this->approx;
+ int n = diagram.size();
+
+ for (int i = 0; i < this->approx; i++){
+ std::vector<double> l,l_diag;
+ for (int j = 0; j < n; j++){
+
+ double px = diagram[j].first; double py = diagram[j].second;
+ double proj_diag = (px+py)/2;
+
+ l.push_back ( px * cos(-pi/2+i*step) + py * sin(-pi/2+i*step) );
+ l_diag.push_back ( proj_diag * cos(-pi/2+i*step) + proj_diag * sin(-pi/2+i*step) );
+ }
+
+ std::sort(l.begin(), l.end()); std::sort(l_diag.begin(), l_diag.end());
+ projections.push_back(std::move(l)); projections_diagonal.push_back(std::move(l_diag));
+
+ }
+
+ diagram.clear();
+ }
+ }
+
+ // Compute the angle formed by two points of a PD
+ double compute_angle(const Persistence_diagram & diag, int i, int j) const {
+ std::pair<double,double> vect; double x1,y1, x2,y2;
+ x1 = diag[i].first; y1 = diag[i].second;
+ x2 = diag[j].first; y2 = diag[j].second;
+ if (y1 - y2 > 0){
+ vect.first = y1 - y2;
+ vect.second = x2 - x1;}
+ else{
+ if(y1 - y2 < 0){
+ vect.first = y2 - y1;
+ vect.second = x1 - x2;
+ }
+ else{
+ vect.first = 0;
+ vect.second = abs(x1 - x2);}
+ }
+ double norm = std::sqrt(vect.first*vect.first + vect.second*vect.second);
+ return asin(vect.second/norm);
+ }
+
+ // Compute the integral of |cos()| between alpha and beta, valid only if alpha is in [-pi,pi] and beta-alpha is in [0,pi]
+ double compute_int_cos(double alpha, double beta) const {
+ double res = 0;
+ if (alpha >= 0 && alpha <= pi){
+ if (cos(alpha) >= 0){
+ if(pi/2 <= beta){res = 2-sin(alpha)-sin(beta);}
+ else{res = sin(beta)-sin(alpha);}
+ }
+ else{
+ if(1.5*pi <= beta){res = 2+sin(alpha)+sin(beta);}
+ else{res = sin(alpha)-sin(beta);}
+ }
+ }
+ if (alpha >= -pi && alpha <= 0){
+ if (cos(alpha) <= 0){
+ if(-pi/2 <= beta){res = 2+sin(alpha)+sin(beta);}
+ else{res = sin(alpha)-sin(beta);}
+ }
+ else{
+ if(pi/2 <= beta){res = 2-sin(alpha)-sin(beta);}
+ else{res = sin(beta)-sin(alpha);}
+ }
+ }
+ return res;
+ }
+
+ double compute_int(double theta1, double theta2, int p, int q, const Persistence_diagram & diag1, const Persistence_diagram & diag2) const {
+ double norm = std::sqrt( (diag1[p].first-diag2[q].first)*(diag1[p].first-diag2[q].first) + (diag1[p].second-diag2[q].second)*(diag1[p].second-diag2[q].second) );
+ double angle1;
+ if (diag1[p].first > diag2[q].first)
+ angle1 = theta1 - asin( (diag1[p].second-diag2[q].second)/norm );
+ else
+ angle1 = theta1 - asin( (diag2[q].second-diag1[p].second)/norm );
+ double angle2 = angle1 + theta2 - theta1;
+ double integral = compute_int_cos(angle1,angle2);
+ return norm*integral;
+ }
+
+ // Evaluation of the Sliced Wasserstein Distance between a pair of diagrams.
+ double compute_sliced_wasserstein_distance(const Sliced_Wasserstein & second) const {
+
+ GUDHI_CHECK(this->approx != second.approx, std::invalid_argument("Error: different approx values for representations"));
+
+ Persistence_diagram diagram1 = this->diagram; Persistence_diagram diagram2 = second.diagram; double sw = 0;
+
+ if(this->approx == -1){
+
+ // Add projections onto diagonal.
+ int n1, n2; n1 = diagram1.size(); n2 = diagram2.size(); double max_ordinate = std::numeric_limits<double>::lowest();
+ for (int i = 0; i < n2; i++){
+ max_ordinate = std::max(max_ordinate, diagram2[i].second);
+ diagram1.emplace_back( (diagram2[i].first+diagram2[i].second)/2, (diagram2[i].first+diagram2[i].second)/2 );
+ }
+ for (int i = 0; i < n1; i++){
+ max_ordinate = std::max(max_ordinate, diagram1[i].second);
+ diagram2.emplace_back( (diagram1[i].first+diagram1[i].second)/2, (diagram1[i].first+diagram1[i].second)/2 );
+ }
+ int num_pts_dgm = diagram1.size();
+
+ // Slightly perturb the points so that the PDs are in generic positions.
+ int mag = 0; while(max_ordinate > 10){mag++; max_ordinate/=10;}
+ double thresh = pow(10,-5+mag);
+ srand(time(NULL));
+ for (int i = 0; i < num_pts_dgm; i++){
+ diagram1[i].first += thresh*(1.0-2.0*rand()/RAND_MAX); diagram1[i].second += thresh*(1.0-2.0*rand()/RAND_MAX);
+ diagram2[i].first += thresh*(1.0-2.0*rand()/RAND_MAX); diagram2[i].second += thresh*(1.0-2.0*rand()/RAND_MAX);
+ }
+
+ // Compute all angles in both PDs.
+ std::vector<std::pair<double, std::pair<int,int> > > angles1, angles2;
+ for (int i = 0; i < num_pts_dgm; i++){
+ for (int j = i+1; j < num_pts_dgm; j++){
+ double theta1 = compute_angle(diagram1,i,j); double theta2 = compute_angle(diagram2,i,j);
+ angles1.emplace_back(theta1, std::pair<int,int>(i,j));
+ angles2.emplace_back(theta2, std::pair<int,int>(i,j));
+ }
+ }
+
+ // Sort angles.
+ std::sort(angles1.begin(), angles1.end(), [](const std::pair<double, std::pair<int,int> >& p1, const std::pair<double, std::pair<int,int> >& p2){return (p1.first < p2.first);});
+ std::sort(angles2.begin(), angles2.end(), [](const std::pair<double, std::pair<int,int> >& p1, const std::pair<double, std::pair<int,int> >& p2){return (p1.first < p2.first);});
+
+ // Initialize orders of the points of both PDs (given by ordinates when theta = -pi/2).
+ std::vector<int> orderp1, orderp2;
+ for (int i = 0; i < num_pts_dgm; i++){ orderp1.push_back(i); orderp2.push_back(i); }
+ std::sort( orderp1.begin(), orderp1.end(), [=](int i, int j){ if(diagram1[i].second != diagram1[j].second) return (diagram1[i].second < diagram1[j].second); else return (diagram1[i].first > diagram1[j].first); } );
+ std::sort( orderp2.begin(), orderp2.end(), [=](int i, int j){ if(diagram2[i].second != diagram2[j].second) return (diagram2[i].second < diagram2[j].second); else return (diagram2[i].first > diagram2[j].first); } );
+
+ // Find the inverses of the orders.
+ std::vector<int> order1(num_pts_dgm); std::vector<int> order2(num_pts_dgm);
+ for(int i = 0; i < num_pts_dgm; i++) for (int j = 0; j < num_pts_dgm; j++) if(orderp1[j] == i){ order1[i] = j; break; }
+ for(int i = 0; i < num_pts_dgm; i++) for (int j = 0; j < num_pts_dgm; j++) if(orderp2[j] == i){ order2[i] = j; break; }
+
+ // Record all inversions of points in the orders as theta varies along the positive half-disk.
+ std::vector<std::vector<std::pair<int,double> > > anglePerm1(num_pts_dgm);
+ std::vector<std::vector<std::pair<int,double> > > anglePerm2(num_pts_dgm);
+
+ int m1 = angles1.size();
+ for (int i = 0; i < m1; i++){
+ double theta = angles1[i].first; int p = angles1[i].second.first; int q = angles1[i].second.second;
+ anglePerm1[order1[p]].emplace_back(p,theta);
+ anglePerm1[order1[q]].emplace_back(q,theta);
+ int a = order1[p]; int b = order1[q]; order1[p] = b; order1[q] = a;
+ }
+
+ int m2 = angles2.size();
+ for (int i = 0; i < m2; i++){
+ double theta = angles2[i].first; int p = angles2[i].second.first; int q = angles2[i].second.second;
+ anglePerm2[order2[p]].emplace_back(p,theta);
+ anglePerm2[order2[q]].emplace_back(q,theta);
+ int a = order2[p]; int b = order2[q]; order2[p] = b; order2[q] = a;
+ }
+
+ for (int i = 0; i < num_pts_dgm; i++){
+ anglePerm1[order1[i]].emplace_back(i,pi/2);
+ anglePerm2[order2[i]].emplace_back(i,pi/2);
+ }
+
+ // Compute the SW distance with the list of inversions.
+ for (int i = 0; i < num_pts_dgm; i++){
+ std::vector<std::pair<int,double> > u,v; u = anglePerm1[i]; v = anglePerm2[i];
+ double theta1, theta2; theta1 = -pi/2;
+ unsigned int ku, kv; ku = 0; kv = 0; theta2 = std::min(u[ku].second,v[kv].second);
+ while(theta1 != pi/2){
+ if(diagram1[u[ku].first].first != diagram2[v[kv].first].first || diagram1[u[ku].first].second != diagram2[v[kv].first].second)
+ if(theta1 != theta2)
+ sw += compute_int(theta1, theta2, u[ku].first, v[kv].first, diagram1, diagram2);
+ theta1 = theta2;
+ if ( (theta2 == u[ku].second) && ku < u.size()-1 ) ku++;
+ if ( (theta2 == v[kv].second) && kv < v.size()-1 ) kv++;
+ theta2 = std::min(u[ku].second, v[kv].second);
+ }
+ }
+ }
+
+
+ else{
+
+ double step = pi/this->approx; std::vector<double> v1, v2;
+ for (int i = 0; i < this->approx; i++){
+
+ v1.clear(); v2.clear();
+ std::merge(this->projections[i].begin(), this->projections[i].end(), second.projections_diagonal[i].begin(), second.projections_diagonal[i].end(), std::back_inserter(v1));
+ std::merge(second.projections[i].begin(), second.projections[i].end(), this->projections_diagonal[i].begin(), this->projections_diagonal[i].end(), std::back_inserter(v2));
+
+ int n = v1.size(); double f = 0;
+ for (int j = 0; j < n; j++) f += std::abs(v1[j] - v2[j]);
+ sw += f*step;
+
+ }
+ }
+
+ return sw/pi;
+ }
+
+ public:
+
+ /** \brief Sliced Wasserstein kernel constructor.
+ * \ingroup Sliced_Wasserstein
+ *
+ * @param[in] _diagram persistence diagram.
+ * @param[in] _sigma bandwidth parameter.
+ * @param[in] _approx number of directions used to approximate the integral in the Sliced Wasserstein distance, set to -1 for exact computation. If positive, then projections of the diagram
+ * points on all directions are stored in memory to reduce computation time.
+ *
+ */
+ // This class implements the following concepts: Topological_data_with_distances, Real_valued_topological_data, Topological_data_with_scalar_product
+ Sliced_Wasserstein(const Persistence_diagram & _diagram, double _sigma = 1.0, int _approx = 10):diagram(_diagram), approx(_approx), sigma(_sigma) {build_rep();}
+
+ /** \brief Evaluation of the kernel on a pair of diagrams.
+ * \ingroup Sliced_Wasserstein
+ *
+ * @pre approx and sigma attributes need to be the same for both instances.
+ * @param[in] second other instance of class Sliced_Wasserstein.
+ *
+ */
+ double compute_scalar_product(const Sliced_Wasserstein & second) const {
+ GUDHI_CHECK(this->sigma != second.sigma, std::invalid_argument("Error: different sigma values for representations"));
+ return std::exp(-compute_sliced_wasserstein_distance(second)/(2*this->sigma*this->sigma));
+ }
+
+ /** \brief Evaluation of the distance between images of diagrams in the Hilbert space of the kernel.
+ * \ingroup Sliced_Wasserstein
+ *
+ * @pre approx and sigma attributes need to be the same for both instances.
+ * @param[in] second other instance of class Sliced_Wasserstein.
+ *
+ */
+ double distance(const Sliced_Wasserstein & second) const {
+ GUDHI_CHECK(this->sigma != second.sigma, std::invalid_argument("Error: different sigma values for representations"));
+ return std::pow(this->compute_scalar_product(*this) + second.compute_scalar_product(second)-2*this->compute_scalar_product(second), 0.5);
+ }
+
+
+}; // class Sliced_Wasserstein
+} // namespace Persistence_representations
+} // namespace Gudhi
+
+#endif // SLICED_WASSERSTEIN_H_
diff --git a/src/Persistence_representations/include/gudhi/common_persistence_representations.h b/src/Persistence_representations/include/gudhi/common_persistence_representations.h
index 3d03f1f6..f0cd8146 100644
--- a/src/Persistence_representations/include/gudhi/common_persistence_representations.h
+++ b/src/Persistence_representations/include/gudhi/common_persistence_representations.h
@@ -26,17 +26,42 @@
#include <utility>
#include <string>
#include <cmath>
+#include <boost/math/constants/constants.hpp>
+
+
namespace Gudhi {
namespace Persistence_representations {
// this file contain an implementation of some common procedures used in Persistence_representations.
+static constexpr double pi = boost::math::constants::pi<double>();
+
+
+/**
+ * In this module, we use the name Persistence_diagram for the representation of a diagram in a vector of pairs of two double.
+ */
+using Persistence_diagram = std::vector<std::pair<double, double> >;
+
+/**
+ * In this module, we use the name Kernel for the representation of a function taking a pair of two points in the plane and returning a double.
+ */
+using Kernel2D = std::function<double (std::pair<double, double>, std::pair<double, double> )>;
+
+inline Kernel2D Gaussian_kernel(double sigma){
+ return [=](std::pair<double, double> p, std::pair<double, double> q){return (1.0 / (std::sqrt(2*pi)*sigma)) * std::exp( -((p.first-q.first)*(p.first-q.first) + (p.second-q.second)*(p.second-q.second)) / (2*sigma*sigma) );};
+}
+
+inline Kernel2D polynomial_kernel(double c, double d){
+ return [=](std::pair<double, double> p, std::pair<double, double> q){return std::pow( p.first*q.first + p.second*q.second + c, d);};
+}
+
+
// double epsi = std::numeric_limits<double>::epsilon();
double epsi = 0.000005;
/**
* A procedure used to compare doubles. Typically given two doubles A and B, comparing A == B is not good idea. In this
- *case, we use the procedure almostEqual with the epsi defined at
+ * case, we use the procedure almostEqual with the epsi defined at
* the top of the file. Setting up the epsi gives the user a tolerance on what should be consider equal.
**/
inline bool almost_equal(double a, double b) {
@@ -53,8 +78,7 @@ double birth_plus_deaths(std::pair<double, double> a) { return a.first + a.secon
// landscapes
/**
- * Given two points in R^2, the procedure compute the parameters A and B of the line y = Ax + B that crosses those two
- *points.
+ * Given two points in R^2, the procedure compute the parameters A and B of the line y = Ax + B that crosses those two points.
**/
std::pair<double, double> compute_parameters_of_a_line(std::pair<double, double> p1, std::pair<double, double> p2) {
double a = (p2.second - p1.second) / (p2.first - p1.first);
@@ -64,8 +88,7 @@ std::pair<double, double> compute_parameters_of_a_line(std::pair<double, double>
// landscapes
/**
- * This procedure given two points which lies on the opposite sides of x axis, compute x for which the line connecting
- *those two points crosses x axis.
+ * This procedure given two points which lies on the opposite sides of x axis, compute x for which the line connecting those two points crosses x axis.
**/
double find_zero_of_a_line_segment_between_those_two_points(std::pair<double, double> p1,
std::pair<double, double> p2) {
@@ -89,8 +112,7 @@ double find_zero_of_a_line_segment_between_those_two_points(std::pair<double, do
// landscapes
/**
* This method provides a comparison of points that is used in construction of persistence landscapes. The ordering is
- *lexicographical for the first coordinate, and reverse-lexicographical for the
- * second coordinate.
+ * lexicographical for the first coordinate, and reverse-lexicographical for the second coordinate.
**/
bool compare_points_sorting(std::pair<double, double> f, std::pair<double, double> s) {
if (f.first < s.first) {
diff --git a/src/Persistence_representations/test/CMakeLists.txt b/src/Persistence_representations/test/CMakeLists.txt
index 5e2b6910..fb650485 100644
--- a/src/Persistence_representations/test/CMakeLists.txt
+++ b/src/Persistence_representations/test/CMakeLists.txt
@@ -34,6 +34,11 @@ target_link_libraries(Read_persistence_from_file_test_unit ${Boost_UNIT_TEST_FRA
gudhi_add_coverage_test(Read_persistence_from_file_test_unit)
+add_executable ( kernels_unit kernels.cpp )
+target_link_libraries(kernels_unit ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY})
+
+gudhi_add_coverage_test(kernels_unit)
+
if (NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.8.1)
add_executable (Persistence_intervals_with_distances_test_unit persistence_intervals_with_distances_test.cpp )
target_link_libraries(Persistence_intervals_with_distances_test_unit ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY})
diff --git a/src/Persistence_representations/test/kernels.cpp b/src/Persistence_representations/test/kernels.cpp
new file mode 100644
index 00000000..c95e8086
--- /dev/null
+++ b/src/Persistence_representations/test/kernels.cpp
@@ -0,0 +1,55 @@
+/* This file is part of the Gudhi Library. The Gudhi library
+ * (Geometric Understanding in Higher Dimensions) is a generic C++
+ * library for computational topology.
+ *
+ * Author(s): Mathieu Carrière
+ *
+ * Copyright (C) 2018 INRIA
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define BOOST_TEST_DYN_LINK
+#define BOOST_TEST_MODULE "kernel"
+
+#include <boost/test/unit_test.hpp>
+#include <cmath> // float comparison
+#include <limits>
+#include <string>
+#include <vector>
+#include <algorithm> // std::max
+#include <gudhi/Persistence_heat_maps.h>
+#include <gudhi/common_persistence_representations.h>
+#include <gudhi/Sliced_Wasserstein.h>
+#include <gudhi/distance_functions.h>
+#include <gudhi/reader_utils.h>
+
+using constant_scaling_function = Gudhi::Persistence_representations::constant_scaling_function;
+using SW = Gudhi::Persistence_representations::Sliced_Wasserstein;
+using PWG = Gudhi::Persistence_representations::Persistence_heat_maps<constant_scaling_function>;
+using Persistence_diagram = std::vector<std::pair<double,double> >;
+
+BOOST_AUTO_TEST_CASE(check_PWG) {
+ Persistence_diagram v1, v2; v1.emplace_back(0,1); v2.emplace_back(0,2);
+ PWG pwg1(v1, Gudhi::Persistence_representations::Gaussian_kernel(1.0));
+ PWG pwg2(v2, Gudhi::Persistence_representations::Gaussian_kernel(1.0));
+ BOOST_CHECK(std::abs(pwg1.compute_scalar_product(pwg2) - std::exp(-0.5)/(std::sqrt(2*Gudhi::Persistence_representations::pi))) <= 1e-3);
+}
+
+BOOST_AUTO_TEST_CASE(check_SW) {
+ Persistence_diagram v1, v2; v1.emplace_back(0,1); v2.emplace_back(0,2);
+ SW sw1(v1, 1.0, 100); SW swex1(v1, 1.0, -1);
+ SW sw2(v2, 1.0, 100); SW swex2(v2, 1.0, -1);
+ BOOST_CHECK(std::abs(sw1.compute_scalar_product(sw2) - swex1.compute_scalar_product(swex2)) <= 1e-1);
+}