summaryrefslogtreecommitdiff
path: root/src/Persistence_representations/include/gudhi
diff options
context:
space:
mode:
Diffstat (limited to 'src/Persistence_representations/include/gudhi')
-rw-r--r--src/Persistence_representations/include/gudhi/Persistence_heat_maps.h316
-rw-r--r--src/Persistence_representations/include/gudhi/Sliced_Wasserstein.h392
-rw-r--r--src/Persistence_representations/include/gudhi/common_persistence_representations.h22
3 files changed, 608 insertions, 122 deletions
diff --git a/src/Persistence_representations/include/gudhi/Persistence_heat_maps.h b/src/Persistence_representations/include/gudhi/Persistence_heat_maps.h
index 35e51e63..a8458bda 100644
--- a/src/Persistence_representations/include/gudhi/Persistence_heat_maps.h
+++ b/src/Persistence_representations/include/gudhi/Persistence_heat_maps.h
@@ -2,9 +2,12 @@
* (Geometric Understanding in Higher Dimensions) is a generic C++
* library for computational topology.
*
- * Author(s): Pawel Dlotko
+ * Author(s): Pawel Dlotko and Mathieu Carriere
*
- * Copyright (C) 2016 Inria
+ * Modifications:
+ * - 2018/04 MC: Add discrete/non-discrete mechanism and non-discrete version
+ *
+ * Copyright (C) 2019 Inria
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -44,7 +47,7 @@ namespace Persistence_representations {
/**
* This is a simple procedure to create n by n (or 2*pixel_radius times 2*pixel_radius cubical approximation of a
*Gaussian kernel.
-**/
+ **/
std::vector<std::vector<double> > create_Gaussian_filter(size_t pixel_radius, double sigma) {
bool dbg = false;
// we are computing the kernel mask to 2 standard deviations away from the center. We discretize it in a grid of a
@@ -74,7 +77,7 @@ std::vector<std::vector<double> > create_Gaussian_filter(size_t pixel_radius, do
for (int y = -pixel_radius; y <= static_cast<int>(pixel_radius); y++) {
double real_x = 2 * sigma * x / pixel_radius;
double real_y = 2 * sigma * y / pixel_radius;
- r = sqrt(real_x * real_x + real_y * real_y);
+ r = std::sqrt(real_x * real_x + real_y * real_y);
kernel[x + pixel_radius][y + pixel_radius] = (exp(-(r * r) / sigma_sqr)) / (3.141592 * sigma_sqr);
sum += kernel[x + pixel_radius][y + pixel_radius];
}
@@ -100,18 +103,18 @@ std::vector<std::vector<double> > create_Gaussian_filter(size_t pixel_radius, do
}
/*
-* There are various options to scale the points depending on their location. One can for instance:
-* (1) do nothing (scale all of them with the weight 1), as in the function constant_function
-* (2) Scale them by the distance to the diagonal. This is implemented in function
-* (3) Scale them with the square of their distance to diagonal. This is implemented in function
-* (4) Scale them with
-*/
+ * There are various options to scale the points depending on their location. One can for instance:
+ * (1) do nothing (scale all of them with the weight 1), as in the function constant_function
+ * (2) Scale them by the distance to the diagonal. This is implemented in function
+ * (3) Scale them with the square of their distance to diagonal. This is implemented in function
+ * (4) Scale them with
+ */
/**
* This is one of a scaling functions used to weight points depending on their persistence and/or location in the
*diagram.
* This particular functionality is a function which always assign value 1 to a point in the diagram.
-**/
+ **/
class constant_scaling_function {
public:
double operator()(const std::pair<double, double>& point_in_diagram) { return 1; }
@@ -121,13 +124,13 @@ class constant_scaling_function {
* This is one of a scaling functions used to weight points depending on their persistence and/or location in the
*diagram.
* The scaling given by this function to a point (b,d) is Euclidean distance of (b,d) from diagonal.
-**/
+ **/
class distance_from_diagonal_scaling {
public:
double operator()(const std::pair<double, double>& point_in_diagram) {
// (point_in_diagram.first+point_in_diagram.second)/2.0
- return sqrt(pow((point_in_diagram.first - (point_in_diagram.first + point_in_diagram.second) / 2.0), 2) +
- pow((point_in_diagram.second - (point_in_diagram.first + point_in_diagram.second) / 2.0), 2));
+ return std::sqrt(std::pow((point_in_diagram.first - (point_in_diagram.first + point_in_diagram.second) / 2.0), 2) +
+ std::pow((point_in_diagram.second - (point_in_diagram.first + point_in_diagram.second) / 2.0), 2));
}
};
@@ -135,12 +138,12 @@ class distance_from_diagonal_scaling {
* This is one of a scaling functions used to weight points depending on their persistence and/or location in the
*diagram.
* The scaling given by this function to a point (b,d) is a square of Euclidean distance of (b,d) from diagonal.
-**/
+ **/
class squared_distance_from_diagonal_scaling {
public:
double operator()(const std::pair<double, double>& point_in_diagram) {
- return pow((point_in_diagram.first - (point_in_diagram.first + point_in_diagram.second) / 2.0), 2) +
- pow((point_in_diagram.second - (point_in_diagram.first + point_in_diagram.second) / 2.0), 2);
+ return std::pow((point_in_diagram.first - (point_in_diagram.first + point_in_diagram.second) / 2.0), 2) +
+ std::pow((point_in_diagram.second - (point_in_diagram.first + point_in_diagram.second) / 2.0), 2);
}
};
@@ -148,7 +151,7 @@ class squared_distance_from_diagonal_scaling {
* This is one of a scaling functions used to weight points depending on their persistence and/or location in the
*diagram.
* The scaling given by this function to a point (b,d) is an arctan of a persistence of a point (i.e. arctan( b-d ).
-**/
+ **/
class arc_tan_of_persistence_of_point {
public:
double operator()(const std::pair<double, double>& point_in_diagram) {
@@ -162,16 +165,16 @@ class arc_tan_of_persistence_of_point {
* This scaling function do not only depend on a point (p,d) in the diagram, but it depends on the whole diagram.
* The longest persistence pair get a scaling 1. Any other pair get a scaling belong to [0,1], which is proportional
* to the persistence of that pair.
-**/
+ **/
class weight_by_setting_maximal_interval_to_have_length_one {
public:
- weight_by_setting_maximal_interval_to_have_length_one(double len) : letngth_of_maximal_interval(len) {}
+ weight_by_setting_maximal_interval_to_have_length_one(double len) : length_of_maximal_interval(len) {}
double operator()(const std::pair<double, double>& point_in_diagram) {
- return (point_in_diagram.second - point_in_diagram.first) / this->letngth_of_maximal_interval;
+ return (point_in_diagram.second - point_in_diagram.first) / this->length_of_maximal_interval;
}
private:
- double letngth_of_maximal_interval;
+ double length_of_maximal_interval;
};
/**
@@ -179,7 +182,7 @@ class weight_by_setting_maximal_interval_to_have_length_one {
* \brief A class implementing persistence heat maps.
*
* \ingroup Persistence_representations
-**/
+ **/
// This class implements the following concepts: Vectorized_topological_data, Topological_data_with_distances,
// Real_valued_topological_data, Topological_data_with_averages, Topological_data_with_scalar_product
@@ -189,7 +192,7 @@ class Persistence_heat_maps {
/**
* The default constructor. A scaling function from the diagonal is set up to a constant function. The image is not
*erased below the diagonal. The Gaussian have diameter 5.
- **/
+ **/
Persistence_heat_maps() {
Scalling_of_kernels f;
this->f = f;
@@ -210,7 +213,7 @@ class Persistence_heat_maps {
*std::numeric_limits<double>::max(), in which case the program compute the values based on the data,
* (6) a max x and y value of points that are to be taken into account. By default it is set to
*std::numeric_limits<double>::max(), in which case the program compute the values based on the data.
- **/
+ **/
Persistence_heat_maps(const std::vector<std::pair<double, double> >& interval,
std::vector<std::vector<double> > filter = create_Gaussian_filter(5, 1),
bool erase_below_diagonal = false, size_t number_of_pixels = 1000,
@@ -218,12 +221,12 @@ class Persistence_heat_maps {
double max_ = std::numeric_limits<double>::max());
/**
- * Construction that takes at the input a name of a file with persistence intervals, a filter (radius 5 by
- *default), a scaling function (constant by default), a boolean value which determines if the area of image below
- *diagonal should, or should not be erased (should by default). The next parameter is the number of pixels in each
- *direction (set to 1000 by default) and min and max values of images (both set to std::numeric_limits<double>::max()
- *by default. If this is the case, the program will pick the right values based on the data).
- **/
+ * Construction that takes at the input a name of a file with persistence intervals, a filter (radius 5 by
+ *default), a scaling function (constant by default), a boolean value which determines if the area of image below
+ *diagonal should, or should not be erased (should by default). The next parameter is the number of pixels in each
+ *direction (set to 1000 by default) and min and max values of images (both set to std::numeric_limits<double>::max()
+ *by default. If this is the case, the program will pick the right values based on the data).
+ **/
/**
* Construction that takes at the input the following parameters:
* (1) A name of a file with persistence intervals. The file should be readable by the function
@@ -237,7 +240,7 @@ class Persistence_heat_maps {
*std::numeric_limits<double>::max(), in which case the program compute the values based on the data,
* (6) a max x and y value of points that are to be taken into account. By default it is set to
*std::numeric_limits<double>::max(), in which case the program compute the values based on the data.
- **/
+ **/
Persistence_heat_maps(const char* filename, std::vector<std::vector<double> > filter = create_Gaussian_filter(5, 1),
bool erase_below_diagonal = false, size_t number_of_pixels = 1000,
double min_ = std::numeric_limits<double>::max(),
@@ -245,22 +248,37 @@ class Persistence_heat_maps {
unsigned dimension = std::numeric_limits<unsigned>::max());
/**
+ * Construction that takes as inputs (1) the diagram, and (2) the grid parameters (min, max and number of samples for
+ * x and y axes)
+ **/
+ Persistence_heat_maps(const std::vector<std::pair<double, double> >& interval,
+ const std::function<double(std::pair<double, double>, std::pair<double, double>)>& kernel,
+ size_t number_of_x_pixels, size_t number_of_y_pixels, double min_x = 0, double max_x = 1,
+ double min_y = 0, double max_y = 1);
+
+ /**
+ * Construction that takes only the diagram as input (weight and 2D kernel are template parameters)
+ **/
+ Persistence_heat_maps(const std::vector<std::pair<double, double> >& interval,
+ const std::function<double(std::pair<double, double>, std::pair<double, double>)>& kernel);
+
+ /**
* Compute a mean value of a collection of heat maps and store it in the current object. Note that all the persistence
*maps send in a vector to this procedure need to have the same parameters.
* If this is not the case, the program will throw an exception.
- **/
+ **/
void compute_mean(const std::vector<Persistence_heat_maps*>& maps);
/**
* Compute a median value of a collection of heat maps and store it in the current object. Note that all the
*persistence maps send in a vector to this procedure need to have the same parameters.
* If this is not the case, the program will throw an exception.
- **/
+ **/
void compute_median(const std::vector<Persistence_heat_maps*>& maps);
/**
* Compute a percentage of active (i.e) values above the cutoff of a collection of heat maps.
- **/
+ **/
void compute_percentage_of_active(const std::vector<Persistence_heat_maps*>& maps, size_t cutoff = 1);
// put to file subroutine
@@ -268,18 +286,18 @@ class Persistence_heat_maps {
* The function outputs the persistence image to a text file. The format as follow:
* In the first line, the values min and max of the image are stored
* In the next lines, we have the persistence images in a form of a bitmap image.
- **/
+ **/
void print_to_file(const char* filename) const;
/**
* A function that load a heat map from file to the current object (and erase whatever was stored in the current
*object before).
- **/
+ **/
void load_from_file(const char* filename);
/**
* The procedure checks if min_, max_ and this->heat_maps sizes are the same.
- **/
+ **/
inline bool check_if_the_same(const Persistence_heat_maps& second) const {
bool dbg = false;
if (this->heat_map.size() != second.heat_map.size()) {
@@ -302,17 +320,17 @@ class Persistence_heat_maps {
/**
* Return minimal range value of persistent image.
- **/
+ **/
inline double get_min() const { return this->min_; }
/**
* Return maximal range value of persistent image.
- **/
+ **/
inline double get_max() const { return this->max_; }
/**
* Operator == to check if to persistence heat maps are the same.
- **/
+ **/
bool operator==(const Persistence_heat_maps& rhs) const {
bool dbg = false;
if (!this->check_if_the_same(rhs)) {
@@ -335,12 +353,12 @@ class Persistence_heat_maps {
/**
* Operator != to check if to persistence heat maps are different.
- **/
+ **/
bool operator!=(const Persistence_heat_maps& rhs) const { return !((*this) == rhs); }
/**
* A function to generate a gnuplot script to visualize the persistent image.
- **/
+ **/
void plot(const char* filename) const;
template <typename Operation_type>
@@ -370,7 +388,7 @@ class Persistence_heat_maps {
/**
* Multiplication of Persistence_heat_maps by scalar (so that all values of the heat map gets multiplied by that
*scalar).
- **/
+ **/
Persistence_heat_maps multiply_by_scalar(double scalar) const {
Persistence_heat_maps result;
result.min_ = this->min_;
@@ -389,56 +407,56 @@ class Persistence_heat_maps {
/**
* This function computes a sum of two objects of a type Persistence_heat_maps.
- **/
+ **/
friend Persistence_heat_maps operator+(const Persistence_heat_maps& first, const Persistence_heat_maps& second) {
return operation_on_pair_of_heat_maps(first, second, std::plus<double>());
}
/**
-* This function computes a difference of two objects of a type Persistence_heat_maps.
-**/
+ * This function computes a difference of two objects of a type Persistence_heat_maps.
+ **/
friend Persistence_heat_maps operator-(const Persistence_heat_maps& first, const Persistence_heat_maps& second) {
return operation_on_pair_of_heat_maps(first, second, std::minus<double>());
}
/**
-* This function computes a product of an object of a type Persistence_heat_maps with real number.
-**/
+ * This function computes a product of an object of a type Persistence_heat_maps with real number.
+ **/
friend Persistence_heat_maps operator*(double scalar, const Persistence_heat_maps& A) {
return A.multiply_by_scalar(scalar);
}
/**
-* This function computes a product of an object of a type Persistence_heat_maps with real number.
-**/
+ * This function computes a product of an object of a type Persistence_heat_maps with real number.
+ **/
friend Persistence_heat_maps operator*(const Persistence_heat_maps& A, double scalar) {
return A.multiply_by_scalar(scalar);
}
/**
-* This function computes a product of an object of a type Persistence_heat_maps with real number.
-**/
+ * This function computes a product of an object of a type Persistence_heat_maps with real number.
+ **/
Persistence_heat_maps operator*(double scalar) { return this->multiply_by_scalar(scalar); }
/**
* += operator for Persistence_heat_maps.
- **/
+ **/
Persistence_heat_maps operator+=(const Persistence_heat_maps& rhs) {
*this = *this + rhs;
return *this;
}
/**
- * -= operator for Persistence_heat_maps.
- **/
+ * -= operator for Persistence_heat_maps.
+ **/
Persistence_heat_maps operator-=(const Persistence_heat_maps& rhs) {
*this = *this - rhs;
return *this;
}
/**
- * *= operator for Persistence_heat_maps.
- **/
+ * *= operator for Persistence_heat_maps.
+ **/
Persistence_heat_maps operator*=(double x) {
*this = *this * x;
return *this;
}
/**
- * /= operator for Persistence_heat_maps.
- **/
+ * /= operator for Persistence_heat_maps.
+ **/
Persistence_heat_maps operator/=(double x) {
if (x == 0) throw("In operator /=, division by 0. Program terminated.");
*this = *this * (1 / x);
@@ -448,14 +466,14 @@ class Persistence_heat_maps {
// Implementations of functions for various concepts.
/**
- * This function produce a vector of doubles based on a persistence heat map. It is required in a concept
+ * This function produce a vector of doubles based on a persistence heat map. It is required in a concept
* Vectorized_topological_data
- */
+ */
std::vector<double> vectorize(int number_of_function) const;
/**
- * This function return the number of functions that allows vectorization of persistence heat map. It is required
- *in a concept Vectorized_topological_data.
- **/
+ * This function return the number of functions that allows vectorization of persistence heat map. It is required
+ *in a concept Vectorized_topological_data.
+ **/
size_t number_of_vectorize_functions() const { return this->number_of_functions_for_vectorization; }
/**
@@ -464,45 +482,45 @@ class Persistence_heat_maps {
* At the moment this function is not tested, since it is quite likely to be changed in the future. Given this, when
*using it, keep in mind that it
* will be most likely changed in the next versions.
- **/
+ **/
double project_to_R(int number_of_function) const;
/**
* The function gives the number of possible projections to R. This function is required by the
*Real_valued_topological_data concept.
- **/
+ **/
size_t number_of_projections_to_R() const { return this->number_of_functions_for_projections_to_reals; }
/**
- * A function to compute distance between persistence heat maps.
- * The parameter of this function is a const reference to an object of a class Persistence_heat_maps.
- * This function is required in Topological_data_with_distances concept.
-* For max norm distance, set power to std::numeric_limits<double>::max()
- **/
+ * A function to compute distance between persistence heat maps.
+ * The parameter of this function is a const reference to an object of a class Persistence_heat_maps.
+ * This function is required in Topological_data_with_distances concept.
+ * For max norm distance, set power to std::numeric_limits<double>::max()
+ **/
double distance(const Persistence_heat_maps& second_, double power = 1) const;
/**
* A function to compute averaged persistence heat map, based on vector of persistence heat maps.
* This function is required by Topological_data_with_averages concept.
- **/
+ **/
void compute_average(const std::vector<Persistence_heat_maps*>& to_average);
/**
- * A function to compute scalar product of persistence heat maps.
- * The parameter of this function is a const reference to an object of a class Persistence_heat_maps.
- * This function is required in Topological_data_with_scalar_product concept.
- **/
+ * A function to compute scalar product of persistence heat maps.
+ * The parameter of this function is a const reference to an object of a class Persistence_heat_maps.
+ * This function is required in Topological_data_with_scalar_product concept.
+ **/
double compute_scalar_product(const Persistence_heat_maps& second_) const;
// end of implementation of functions needed for concepts.
/**
* The x-range of the persistence heat map.
- **/
+ **/
std::pair<double, double> get_x_range() const { return std::make_pair(this->min_, this->max_); }
/**
* The y-range of the persistence heat map.
- **/
+ **/
std::pair<double, double> get_y_range() const { return this->get_x_range(); }
protected:
@@ -512,7 +530,6 @@ class Persistence_heat_maps {
size_t number_of_functions_for_projections_to_reals;
void construct(const std::vector<std::pair<double, double> >& intervals_,
std::vector<std::vector<double> > filter = create_Gaussian_filter(5, 1),
-
bool erase_below_diagonal = false, size_t number_of_pixels = 1000,
double min_ = std::numeric_limits<double>::max(), double max_ = std::numeric_limits<double>::max());
@@ -521,6 +538,12 @@ class Persistence_heat_maps {
this->number_of_functions_for_projections_to_reals = 1;
}
+ // Boolean indicating if we are computing persistence image (true) or persistence weighted gaussian kernel (false)
+ bool discrete = true;
+ std::function<double(std::pair<double, double>, std::pair<double, double>)> kernel;
+ std::vector<std::pair<double, double> > interval;
+ std::vector<double> weights;
+
// data
Scalling_of_kernels f;
bool erase_below_diagonal;
@@ -529,6 +552,45 @@ class Persistence_heat_maps {
std::vector<std::vector<double> > heat_map;
};
+template <typename Scalling_of_kernels>
+Persistence_heat_maps<Scalling_of_kernels>::Persistence_heat_maps(
+ const std::vector<std::pair<double, double> >& interval,
+ const std::function<double(std::pair<double, double>, std::pair<double, double>)>& kernel,
+ size_t number_of_x_pixels, size_t number_of_y_pixels, double min_x, double max_x, double min_y, double max_y) {
+ this->discrete = true;
+ this->min_ = min_x;
+ this->max_ = max_x;
+ this->heat_map.resize(number_of_y_pixels);
+ double step_x = (max_x - min_x) / (number_of_x_pixels - 1);
+ double step_y = (max_y - min_y) / (number_of_y_pixels - 1);
+
+ int num_pts = interval.size();
+ for (size_t i = 0; i < number_of_y_pixels; i++) {
+ double y = min_y + i * step_y;
+ this->heat_map[i].reserve(number_of_x_pixels);
+ for (size_t j = 0; j < number_of_x_pixels; j++) {
+ double x = min_x + j * step_x;
+ std::pair<double, double> grid_point(x, y);
+ double pixel_value = 0;
+ for (int k = 0; k < num_pts; k++) pixel_value += this->f(interval[k]) * kernel(interval[k], grid_point);
+ this->heat_map[i].push_back(pixel_value);
+ }
+ }
+ this->set_up_parameters_for_basic_classes();
+}
+
+template <typename Scalling_of_kernels>
+Persistence_heat_maps<Scalling_of_kernels>::Persistence_heat_maps(
+ const std::vector<std::pair<double, double> >& interval,
+ const std::function<double(std::pair<double, double>, std::pair<double, double>)>& kernel) {
+ this->discrete = false;
+ this->interval = interval;
+ this->kernel = kernel;
+ int num_pts = this->interval.size();
+ for (int i = 0; i < num_pts; i++) this->weights.push_back(this->f(this->interval[i]));
+ this->set_up_parameters_for_basic_classes();
+}
+
// if min_ == max_, then the program is requested to set up the values itself based on persistence intervals
template <typename Scalling_of_kernels>
void Persistence_heat_maps<Scalling_of_kernels>::construct(const std::vector<std::pair<double, double> >& intervals_,
@@ -826,13 +888,18 @@ void Persistence_heat_maps<Scalling_of_kernels>::load_from_file(const char* file
// Concretizations of virtual methods:
template <typename Scalling_of_kernels>
std::vector<double> Persistence_heat_maps<Scalling_of_kernels>::vectorize(int number_of_function) const {
+ std::vector<double> result;
+ if (!discrete) {
+ std::cout << "No vectorize method in case of infinite dimensional vectorization" << std::endl;
+ return result;
+ }
+
// convert this->heat_map into one large vector:
size_t size_of_result = 0;
for (size_t i = 0; i != this->heat_map.size(); ++i) {
size_of_result += this->heat_map[i].size();
}
- std::vector<double> result;
result.reserve(size_of_result);
for (size_t i = 0; i != this->heat_map.size(); ++i) {
@@ -846,34 +913,39 @@ std::vector<double> Persistence_heat_maps<Scalling_of_kernels>::vectorize(int nu
template <typename Scalling_of_kernels>
double Persistence_heat_maps<Scalling_of_kernels>::distance(const Persistence_heat_maps& second, double power) const {
- // first we need to check if (*this) and second are defined on the same domain and have the same dimensions:
- if (!this->check_if_the_same(second)) {
- std::cerr << "The persistence images are of non compatible sizes. We cannot therefore compute distance between "
- "them. The program will now terminate";
- throw "The persistence images are of non compatible sizes. The program will now terminate";
- }
+ if (this->discrete) {
+ // first we need to check if (*this) and second are defined on the same domain and have the same dimensions:
+ if (!this->check_if_the_same(second)) {
+ std::cerr << "The persistence images are of non compatible sizes. We cannot therefore compute distance between "
+ "them. The program will now terminate";
+ throw "The persistence images are of non compatible sizes. The program will now terminate";
+ }
- // if we are here, we know that the two persistence images are defined on the same domain, so we can start computing
- // their distances:
+ // if we are here, we know that the two persistence images are defined on the same domain, so we can start
+ // computing their distances:
- double distance = 0;
- if (power < std::numeric_limits<double>::max()) {
- for (size_t i = 0; i != this->heat_map.size(); ++i) {
- for (size_t j = 0; j != this->heat_map[i].size(); ++j) {
- distance += pow(fabs(this->heat_map[i][j] - second.heat_map[i][j]), power);
+ double distance = 0;
+ if (power < std::numeric_limits<double>::max()) {
+ for (size_t i = 0; i != this->heat_map.size(); ++i) {
+ for (size_t j = 0; j != this->heat_map[i].size(); ++j) {
+ distance += std::pow(std::fabs(this->heat_map[i][j] - second.heat_map[i][j]), power);
+ }
}
- }
- } else {
- // in this case, we compute max norm distance
- for (size_t i = 0; i != this->heat_map.size(); ++i) {
- for (size_t j = 0; j != this->heat_map[i].size(); ++j) {
- if (distance < fabs(this->heat_map[i][j] - second.heat_map[i][j])) {
- distance = fabs(this->heat_map[i][j] - second.heat_map[i][j]);
+ } else {
+ // in this case, we compute max norm distance
+ for (size_t i = 0; i != this->heat_map.size(); ++i) {
+ for (size_t j = 0; j != this->heat_map[i].size(); ++j) {
+ if (distance < std::fabs(this->heat_map[i][j] - second.heat_map[i][j])) {
+ distance = std::fabs(this->heat_map[i][j] - second.heat_map[i][j]);
+ }
}
}
}
+ return distance;
+ } else {
+ return std::sqrt(this->compute_scalar_product(*this) + second.compute_scalar_product(second) -
+ 2 * this->compute_scalar_product(second));
}
- return distance;
}
template <typename Scalling_of_kernels>
@@ -895,22 +967,36 @@ void Persistence_heat_maps<Scalling_of_kernels>::compute_average(
template <typename Scalling_of_kernels>
double Persistence_heat_maps<Scalling_of_kernels>::compute_scalar_product(const Persistence_heat_maps& second) const {
- // first we need to check if (*this) and second are defined on the same domain and have the same dimensions:
- if (!this->check_if_the_same(second)) {
- std::cerr << "The persistence images are of non compatible sizes. We cannot therefore compute distance between "
- "them. The program will now terminate";
- throw "The persistence images are of non compatible sizes. The program will now terminate";
- }
+ if (discrete) {
+ // first we need to check if (*this) and second are defined on the same domain and have the same dimensions:
+ if (!this->check_if_the_same(second)) {
+ std::cerr << "The persistence images are of non compatible sizes. We cannot therefore compute distance between "
+ "them. The program will now terminate";
+ throw "The persistence images are of non compatible sizes. The program will now terminate";
+ }
- // if we are here, we know that the two persistence images are defined on the same domain, so we can start computing
- // their scalar product:
- double scalar_prod = 0;
- for (size_t i = 0; i != this->heat_map.size(); ++i) {
- for (size_t j = 0; j != this->heat_map[i].size(); ++j) {
- scalar_prod += this->heat_map[i][j] * second.heat_map[i][j];
+ // if we are here, we know that the two persistence images are defined on the same domain, so we can start computing
+ // their scalar product:
+ double scalar_prod = 0;
+ for (size_t i = 0; i != this->heat_map.size(); ++i) {
+ for (size_t j = 0; j != this->heat_map[i].size(); ++j) {
+ scalar_prod += this->heat_map[i][j] * second.heat_map[i][j];
+ }
+ }
+ return scalar_prod;
+ } else {
+ int num_pts1 = this->interval.size();
+ int num_pts2 = second.interval.size();
+ double kernel_val = 0;
+ for (int i = 0; i < num_pts1; i++) {
+ std::pair<double, double> pi = this->interval[i];
+ for (int j = 0; j < num_pts2; j++) {
+ std::pair<double, double> pj = second.interval[j];
+ kernel_val += this->weights[i] * second.weights[j] * this->kernel(pi, pj);
+ }
}
+ return kernel_val;
}
- return scalar_prod;
}
} // namespace Persistence_representations
diff --git a/src/Persistence_representations/include/gudhi/Sliced_Wasserstein.h b/src/Persistence_representations/include/gudhi/Sliced_Wasserstein.h
new file mode 100644
index 00000000..18165c5f
--- /dev/null
+++ b/src/Persistence_representations/include/gudhi/Sliced_Wasserstein.h
@@ -0,0 +1,392 @@
+/* This file is part of the Gudhi Library. The Gudhi library
+ * (Geometric Understanding in Higher Dimensions) is a generic C++
+ * library for computational topology.
+ *
+ * Author(s): Mathieu Carriere
+ *
+ * Copyright (C) 2018 Inria
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef SLICED_WASSERSTEIN_H_
+#define SLICED_WASSERSTEIN_H_
+
+// gudhi include
+#include <gudhi/read_persistence_from_file.h>
+#include <gudhi/common_persistence_representations.h>
+#include <gudhi/Debug_utils.h>
+
+#include <vector> // for std::vector<>
+#include <utility> // for std::pair<>, std::move
+#include <algorithm> // for std::sort, std::max, std::merge
+#include <cmath> // for std::abs, std::sqrt
+#include <stdexcept> // for std::invalid_argument
+#include <random> // for std::random_device
+
+namespace Gudhi {
+namespace Persistence_representations {
+
+/**
+ * \class Sliced_Wasserstein gudhi/Sliced_Wasserstein.h
+ * \brief A class implementing the Sliced Wasserstein kernel.
+ *
+ * \ingroup Persistence_representations
+ *
+ * \details
+ * In this class, we compute infinite-dimensional representations of persistence diagrams by using the
+ * Sliced Wasserstein kernel (see \ref sec_persistence_kernels for more details on kernels). We recall that
+ * infinite-dimensional representations are defined implicitly, so only scalar products and distances are available for
+ * the representations defined in this class.
+ * The Sliced Wasserstein kernel is defined as a Gaussian-like kernel between persistence diagrams, where the distance
+ * used for comparison is the Sliced Wasserstein distance \f$SW\f$ between persistence diagrams, defined as the
+ * integral of the 1-norm between the sorted projections of the diagrams onto all lines passing through the origin:
+ *
+ * \f$ SW(D_1,D_2)=\int_{\theta\in\mathbb{S}}\,\|\pi_\theta(D_1\cup\pi_\Delta(D_2))-\pi_\theta(D_2\cup\pi_\Delta(D_1))\
+ * |_1{\rm d}\theta\f$,
+ *
+ * where \f$\pi_\theta\f$ is the projection onto the line defined with angle \f$\theta\f$ in the unit circle
+ * \f$\mathbb{S}\f$, and \f$\pi_\Delta\f$ is the projection onto the diagonal.
+ * Assuming that the diagrams are in general position (i.e. there is no collinear triple), the integral can be computed
+ * exactly in \f$O(n^2{\rm log}(n))\f$ time, where \f$n\f$ is the number of points in the diagrams. We provide two
+ * approximations of the integral: one in which we slightly perturb the diagram points so that they are in general
+ * position, and another in which we approximate the integral by sampling \f$N\f$ lines in the circle in
+ * \f$O(Nn{\rm log}(n))\f$ time. The Sliced Wasserstein Kernel is then computed as:
+ *
+ * \f$ k(D_1,D_2) = {\rm exp}\left(-\frac{SW(D_1,D_2)}{2\sigma^2}\right).\f$
+ *
+ * The first method is usually much more accurate but also
+ * much slower. For more details, please see \cite pmlr-v70-carriere17a .
+ *
+ **/
+
+class Sliced_Wasserstein {
+ protected:
+ Persistence_diagram diagram;
+ int approx;
+ double sigma;
+ std::vector<std::vector<double> > projections, projections_diagonal;
+
+ // **********************************
+ // Utils.
+ // **********************************
+
+ void build_rep() {
+ if (approx > 0) {
+ double step = pi / this->approx;
+ int n = diagram.size();
+
+ for (int i = 0; i < this->approx; i++) {
+ std::vector<double> l, l_diag;
+ for (int j = 0; j < n; j++) {
+ double px = diagram[j].first;
+ double py = diagram[j].second;
+ double proj_diag = (px + py) / 2;
+
+ l.push_back(px * cos(-pi / 2 + i * step) + py * sin(-pi / 2 + i * step));
+ l_diag.push_back(proj_diag * cos(-pi / 2 + i * step) + proj_diag * sin(-pi / 2 + i * step));
+ }
+
+ std::sort(l.begin(), l.end());
+ std::sort(l_diag.begin(), l_diag.end());
+ projections.push_back(std::move(l));
+ projections_diagonal.push_back(std::move(l_diag));
+ }
+
+ diagram.clear();
+ }
+ }
+
+ // Compute the angle formed by two points of a PD
+ double compute_angle(const Persistence_diagram& diag, int i, int j) const {
+ if (diag[i].second == diag[j].second)
+ return pi / 2;
+ else
+ return atan((diag[j].first - diag[i].first) / (diag[i].second - diag[j].second));
+ }
+
+ // Compute the integral of |cos()| between alpha and beta, valid only if alpha is in [-pi,pi] and beta-alpha is in
+ // [0,pi]
+ double compute_int_cos(double alpha, double beta) const {
+ double res = 0;
+ if (alpha >= 0 && alpha <= pi) {
+ if (cos(alpha) >= 0) {
+ if (pi / 2 <= beta) {
+ res = 2 - sin(alpha) - sin(beta);
+ } else {
+ res = sin(beta) - sin(alpha);
+ }
+ } else {
+ if (1.5 * pi <= beta) {
+ res = 2 + sin(alpha) + sin(beta);
+ } else {
+ res = sin(alpha) - sin(beta);
+ }
+ }
+ }
+ if (alpha >= -pi && alpha <= 0) {
+ if (cos(alpha) <= 0) {
+ if (-pi / 2 <= beta) {
+ res = 2 + sin(alpha) + sin(beta);
+ } else {
+ res = sin(alpha) - sin(beta);
+ }
+ } else {
+ if (pi / 2 <= beta) {
+ res = 2 - sin(alpha) - sin(beta);
+ } else {
+ res = sin(beta) - sin(alpha);
+ }
+ }
+ }
+ return res;
+ }
+
+ double compute_int(double theta1, double theta2, int p, int q, const Persistence_diagram& diag1,
+ const Persistence_diagram& diag2) const {
+ double norm = std::sqrt((diag1[p].first - diag2[q].first) * (diag1[p].first - diag2[q].first) +
+ (diag1[p].second - diag2[q].second) * (diag1[p].second - diag2[q].second));
+ double angle1;
+ if (diag1[p].first == diag2[q].first)
+ angle1 = theta1 - pi / 2;
+ else
+ angle1 = theta1 - atan((diag1[p].second - diag2[q].second) / (diag1[p].first - diag2[q].first));
+ double angle2 = angle1 + theta2 - theta1;
+ double integral = compute_int_cos(angle1, angle2);
+ return norm * integral;
+ }
+
+ // Evaluation of the Sliced Wasserstein Distance between a pair of diagrams.
+ double compute_sliced_wasserstein_distance(const Sliced_Wasserstein& second) const {
+ GUDHI_CHECK(this->approx == second.approx,
+ std::invalid_argument("Error: different approx values for representations"));
+
+ Persistence_diagram diagram1 = this->diagram;
+ Persistence_diagram diagram2 = second.diagram;
+ double sw = 0;
+
+ if (this->approx == -1) {
+ // Add projections onto diagonal.
+ int n1, n2;
+ n1 = diagram1.size();
+ n2 = diagram2.size();
+ double min_ordinate = std::numeric_limits<double>::max();
+ double min_abscissa = std::numeric_limits<double>::max();
+ double max_ordinate = std::numeric_limits<double>::lowest();
+ double max_abscissa = std::numeric_limits<double>::lowest();
+ for (int i = 0; i < n2; i++) {
+ min_ordinate = std::min(min_ordinate, diagram2[i].second);
+ min_abscissa = std::min(min_abscissa, diagram2[i].first);
+ max_ordinate = std::max(max_ordinate, diagram2[i].second);
+ max_abscissa = std::max(max_abscissa, diagram2[i].first);
+ diagram1.emplace_back((diagram2[i].first + diagram2[i].second) / 2,
+ (diagram2[i].first + diagram2[i].second) / 2);
+ }
+ for (int i = 0; i < n1; i++) {
+ min_ordinate = std::min(min_ordinate, diagram1[i].second);
+ min_abscissa = std::min(min_abscissa, diagram1[i].first);
+ max_ordinate = std::max(max_ordinate, diagram1[i].second);
+ max_abscissa = std::max(max_abscissa, diagram1[i].first);
+ diagram2.emplace_back((diagram1[i].first + diagram1[i].second) / 2,
+ (diagram1[i].first + diagram1[i].second) / 2);
+ }
+ int num_pts_dgm = diagram1.size();
+
+ // Slightly perturb the points so that the PDs are in generic positions.
+ double epsilon = 0.0001;
+ double thresh_y = (max_ordinate - min_ordinate) * epsilon;
+ double thresh_x = (max_abscissa - min_abscissa) * epsilon;
+ std::random_device rd;
+ std::default_random_engine re(rd());
+ std::uniform_real_distribution<double> uni(-1, 1);
+ for (int i = 0; i < num_pts_dgm; i++) {
+ double u = uni(re);
+ diagram1[i].first += u * thresh_x;
+ diagram1[i].second += u * thresh_y;
+ diagram2[i].first += u * thresh_x;
+ diagram2[i].second += u * thresh_y;
+ }
+
+ // Compute all angles in both PDs.
+ std::vector<std::pair<double, std::pair<int, int> > > angles1, angles2;
+ for (int i = 0; i < num_pts_dgm; i++) {
+ for (int j = i + 1; j < num_pts_dgm; j++) {
+ double theta1 = compute_angle(diagram1, i, j);
+ double theta2 = compute_angle(diagram2, i, j);
+ angles1.emplace_back(theta1, std::pair<int, int>(i, j));
+ angles2.emplace_back(theta2, std::pair<int, int>(i, j));
+ }
+ }
+
+ // Sort angles.
+ std::sort(angles1.begin(), angles1.end(),
+ [](const std::pair<double, std::pair<int, int> >& p1,
+ const std::pair<double, std::pair<int, int> >& p2) { return (p1.first < p2.first); });
+ std::sort(angles2.begin(), angles2.end(),
+ [](const std::pair<double, std::pair<int, int> >& p1,
+ const std::pair<double, std::pair<int, int> >& p2) { return (p1.first < p2.first); });
+
+ // Initialize orders of the points of both PDs (given by ordinates when theta = -pi/2).
+ std::vector<int> orderp1, orderp2;
+ for (int i = 0; i < num_pts_dgm; i++) {
+ orderp1.push_back(i);
+ orderp2.push_back(i);
+ }
+ std::sort(orderp1.begin(), orderp1.end(), [&](int i, int j) {
+ if (diagram1[i].second != diagram1[j].second)
+ return (diagram1[i].second < diagram1[j].second);
+ else
+ return (diagram1[i].first > diagram1[j].first);
+ });
+ std::sort(orderp2.begin(), orderp2.end(), [&](int i, int j) {
+ if (diagram2[i].second != diagram2[j].second)
+ return (diagram2[i].second < diagram2[j].second);
+ else
+ return (diagram2[i].first > diagram2[j].first);
+ });
+
+ // Find the inverses of the orders.
+ std::vector<int> order1(num_pts_dgm);
+ std::vector<int> order2(num_pts_dgm);
+ for (int i = 0; i < num_pts_dgm; i++) {
+ order1[orderp1[i]] = i;
+ order2[orderp2[i]] = i;
+ }
+
+ // Record all inversions of points in the orders as theta varies along the positive half-disk.
+ std::vector<std::vector<std::pair<int, double> > > anglePerm1(num_pts_dgm);
+ std::vector<std::vector<std::pair<int, double> > > anglePerm2(num_pts_dgm);
+
+ int m1 = angles1.size();
+ for (int i = 0; i < m1; i++) {
+ double theta = angles1[i].first;
+ int p = angles1[i].second.first;
+ int q = angles1[i].second.second;
+ anglePerm1[order1[p]].emplace_back(p, theta);
+ anglePerm1[order1[q]].emplace_back(q, theta);
+ int a = order1[p];
+ int b = order1[q];
+ order1[p] = b;
+ order1[q] = a;
+ }
+
+ int m2 = angles2.size();
+ for (int i = 0; i < m2; i++) {
+ double theta = angles2[i].first;
+ int p = angles2[i].second.first;
+ int q = angles2[i].second.second;
+ anglePerm2[order2[p]].emplace_back(p, theta);
+ anglePerm2[order2[q]].emplace_back(q, theta);
+ int a = order2[p];
+ int b = order2[q];
+ order2[p] = b;
+ order2[q] = a;
+ }
+
+ for (int i = 0; i < num_pts_dgm; i++) {
+ anglePerm1[order1[i]].emplace_back(i, pi / 2);
+ anglePerm2[order2[i]].emplace_back(i, pi / 2);
+ }
+
+ // Compute the SW distance with the list of inversions.
+ for (int i = 0; i < num_pts_dgm; i++) {
+ std::vector<std::pair<int, double> > u, v;
+ u = anglePerm1[i];
+ v = anglePerm2[i];
+ double theta1, theta2;
+ theta1 = -pi / 2;
+ unsigned int ku, kv;
+ ku = 0;
+ kv = 0;
+ theta2 = std::min(u[ku].second, v[kv].second);
+ while (theta1 != pi / 2) {
+ if (diagram1[u[ku].first].first != diagram2[v[kv].first].first ||
+ diagram1[u[ku].first].second != diagram2[v[kv].first].second)
+ if (theta1 != theta2) sw += compute_int(theta1, theta2, u[ku].first, v[kv].first, diagram1, diagram2);
+ theta1 = theta2;
+ if ((theta2 == u[ku].second) && ku < u.size() - 1) ku++;
+ if ((theta2 == v[kv].second) && kv < v.size() - 1) kv++;
+ theta2 = std::min(u[ku].second, v[kv].second);
+ }
+ }
+ } else {
+ double step = pi / this->approx;
+ std::vector<double> v1, v2;
+ for (int i = 0; i < this->approx; i++) {
+ v1.clear();
+ v2.clear();
+ std::merge(this->projections[i].begin(), this->projections[i].end(), second.projections_diagonal[i].begin(),
+ second.projections_diagonal[i].end(), std::back_inserter(v1));
+ std::merge(second.projections[i].begin(), second.projections[i].end(), this->projections_diagonal[i].begin(),
+ this->projections_diagonal[i].end(), std::back_inserter(v2));
+
+ int n = v1.size();
+ double f = 0;
+ for (int j = 0; j < n; j++) f += std::abs(v1[j] - v2[j]);
+ sw += f * step;
+ }
+ }
+
+ return sw / pi;
+ }
+
+ public:
+ /** \brief Sliced Wasserstein kernel constructor.
+ * \implements Topological_data_with_distances, Real_valued_topological_data, Topological_data_with_scalar_product
+ * \ingroup Sliced_Wasserstein
+ *
+ * @param[in] _diagram persistence diagram.
+ * @param[in] _sigma bandwidth parameter.
+ * @param[in] _approx number of directions used to approximate the integral in the Sliced Wasserstein distance, set
+ * to -1 for random perturbation. If positive, then projections of the diagram points on all
+ * directions are stored in memory to reduce computation time.
+ *
+ */
+ Sliced_Wasserstein(const Persistence_diagram& _diagram, double _sigma = 1.0, int _approx = 10)
+ : diagram(_diagram), approx(_approx), sigma(_sigma) {
+ build_rep();
+ }
+
+ /** \brief Evaluation of the kernel on a pair of diagrams.
+ * \ingroup Sliced_Wasserstein
+ *
+ * @pre approx and sigma attributes need to be the same for both instances.
+ * @param[in] second other instance of class Sliced_Wasserstein.
+ *
+ */
+ double compute_scalar_product(const Sliced_Wasserstein& second) const {
+ GUDHI_CHECK(this->sigma == second.sigma,
+ std::invalid_argument("Error: different sigma values for representations"));
+ return std::exp(-compute_sliced_wasserstein_distance(second) / (2 * this->sigma * this->sigma));
+ }
+
+ /** \brief Evaluation of the distance between images of diagrams in the Hilbert space of the kernel.
+ * \ingroup Sliced_Wasserstein
+ *
+ * @pre approx and sigma attributes need to be the same for both instances.
+ * @param[in] second other instance of class Sliced_Wasserstein.
+ *
+ */
+ double distance(const Sliced_Wasserstein& second) const {
+ GUDHI_CHECK(this->sigma == second.sigma,
+ std::invalid_argument("Error: different sigma values for representations"));
+ return std::sqrt(this->compute_scalar_product(*this) + second.compute_scalar_product(second) -
+ 2 * this->compute_scalar_product(second));
+ }
+
+}; // class Sliced_Wasserstein
+} // namespace Persistence_representations
+} // namespace Gudhi
+
+#endif // SLICED_WASSERSTEIN_H_
diff --git a/src/Persistence_representations/include/gudhi/common_persistence_representations.h b/src/Persistence_representations/include/gudhi/common_persistence_representations.h
index 3d03f1f6..6fed019a 100644
--- a/src/Persistence_representations/include/gudhi/common_persistence_representations.h
+++ b/src/Persistence_representations/include/gudhi/common_persistence_representations.h
@@ -26,17 +26,28 @@
#include <utility>
#include <string>
#include <cmath>
+#include <boost/math/constants/constants.hpp>
+
+
namespace Gudhi {
namespace Persistence_representations {
// this file contain an implementation of some common procedures used in Persistence_representations.
+static constexpr double pi = boost::math::constants::pi<double>();
+
+
+/**
+ * In this module, we use the name Persistence_diagram for the representation of a diagram in a vector of pairs of two double.
+ */
+using Persistence_diagram = std::vector<std::pair<double, double> >;
+
// double epsi = std::numeric_limits<double>::epsilon();
double epsi = 0.000005;
/**
* A procedure used to compare doubles. Typically given two doubles A and B, comparing A == B is not good idea. In this
- *case, we use the procedure almostEqual with the epsi defined at
+ * case, we use the procedure almostEqual with the epsi defined at
* the top of the file. Setting up the epsi gives the user a tolerance on what should be consider equal.
**/
inline bool almost_equal(double a, double b) {
@@ -53,8 +64,7 @@ double birth_plus_deaths(std::pair<double, double> a) { return a.first + a.secon
// landscapes
/**
- * Given two points in R^2, the procedure compute the parameters A and B of the line y = Ax + B that crosses those two
- *points.
+ * Given two points in R^2, the procedure compute the parameters A and B of the line y = Ax + B that crosses those two points.
**/
std::pair<double, double> compute_parameters_of_a_line(std::pair<double, double> p1, std::pair<double, double> p2) {
double a = (p2.second - p1.second) / (p2.first - p1.first);
@@ -64,8 +74,7 @@ std::pair<double, double> compute_parameters_of_a_line(std::pair<double, double>
// landscapes
/**
- * This procedure given two points which lies on the opposite sides of x axis, compute x for which the line connecting
- *those two points crosses x axis.
+ * This procedure given two points which lies on the opposite sides of x axis, compute x for which the line connecting those two points crosses x axis.
**/
double find_zero_of_a_line_segment_between_those_two_points(std::pair<double, double> p1,
std::pair<double, double> p2) {
@@ -89,8 +98,7 @@ double find_zero_of_a_line_segment_between_those_two_points(std::pair<double, do
// landscapes
/**
* This method provides a comparison of points that is used in construction of persistence landscapes. The ordering is
- *lexicographical for the first coordinate, and reverse-lexicographical for the
- * second coordinate.
+ * lexicographical for the first coordinate, and reverse-lexicographical for the second coordinate.
**/
bool compare_points_sorting(std::pair<double, double> f, std::pair<double, double> s) {
if (f.first < s.first) {