diff options
-rw-r--r-- | CMakeLists.txt | 1 | ||||
-rw-r--r-- | biblio/bibliography.bib | 22 | ||||
-rw-r--r-- | data/persistence_diagram/PD1.pers | 3 | ||||
-rw-r--r-- | data/persistence_diagram/PD2.pers | 2 | ||||
-rw-r--r-- | src/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/Doxyfile | 3 | ||||
-rw-r--r-- | src/Kernels/doc/COPYRIGHT | 19 | ||||
-rw-r--r-- | src/Kernels/doc/Intro_kernels.h | 108 | ||||
-rw-r--r-- | src/Kernels/example/CMakeLists.txt | 10 | ||||
-rw-r--r-- | src/Kernels/example/kernel.txt | 8 | ||||
-rw-r--r-- | src/Kernels/example/kernel_basic_example.cpp | 65 | ||||
-rw-r--r-- | src/Kernels/include/gudhi/kernel.h | 365 | ||||
-rw-r--r-- | src/Kernels/test/CMakeLists.txt | 12 | ||||
-rw-r--r-- | src/Kernels/test/test_kernel.cpp | 56 | ||||
-rw-r--r-- | src/Persistence_representations/example/persistence_weighted_gaussian.cpp | 96 | ||||
-rw-r--r-- | src/Persistence_representations/example/sliced_wasserstein.cpp | 55 | ||||
-rw-r--r-- | src/Persistence_representations/include/gudhi/Persistence_weighted_gaussian.h | 143 | ||||
-rw-r--r-- | src/Persistence_representations/include/gudhi/Sliced_Wasserstein.h | 285 | ||||
-rw-r--r-- | src/cmake/modules/GUDHI_modules.cmake | 4 |
19 files changed, 1255 insertions, 3 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 10373f75..b28dcbf2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,6 +50,7 @@ add_gudhi_module(Subsampling) add_gudhi_module(Tangential_complex) add_gudhi_module(Witness_complex) add_gudhi_module(Nerve_GIC) +add_gudhi_module(Kernels) message("++ GUDHI_MODULES list is:\"${GUDHI_MODULES}\"") diff --git a/biblio/bibliography.bib b/biblio/bibliography.bib index b101cb76..e56734e4 100644 --- a/biblio/bibliography.bib +++ b/biblio/bibliography.bib @@ -1072,3 +1072,25 @@ language={English} + +@InProceedings{pmlr-v70-carriere17a, + title = {Sliced {W}asserstein Kernel for Persistence Diagrams}, + author = {Mathieu Carri{\`e}re and Marco Cuturi and Steve Oudot}, + booktitle = {Proceedings of the 34th International Conference on Machine Learning}, + pages = {664--673}, + year = {2017}, + editor = {Doina Precup and Yee Whye Teh}, + volume = {70}, + series = {Proceedings of Machine Learning Research}, + address = {International Convention Centre, Sydney, Australia}, + month = {06--11 Aug}, + publisher = {PMLR}, +} + +@INPROCEEDINGS{Rahimi07randomfeatures, + author = {Ali Rahimi and Ben Recht}, + title = {Random features for large-scale kernel machines}, + booktitle = {In Neural Information Processing Systems}, + year = {2007} +} + diff --git a/data/persistence_diagram/PD1.pers b/data/persistence_diagram/PD1.pers new file mode 100644 index 00000000..404199b4 --- /dev/null +++ b/data/persistence_diagram/PD1.pers @@ -0,0 +1,3 @@ +2.7 3.7 +9.6 14 +34.2 34.974
\ No newline at end of file diff --git a/data/persistence_diagram/PD2.pers b/data/persistence_diagram/PD2.pers new file mode 100644 index 00000000..125d8e4b --- /dev/null +++ b/data/persistence_diagram/PD2.pers @@ -0,0 +1,2 @@ +2.8 4.45 +9.5 14.1
\ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 94587044..0ae26081 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -26,6 +26,7 @@ add_gudhi_module(Subsampling) add_gudhi_module(Tangential_complex) add_gudhi_module(Witness_complex) add_gudhi_module(Nerve_GIC) +add_gudhi_module(Kernels) message("++ GUDHI_MODULES list is:\"${GUDHI_MODULES}\"") diff --git a/src/Doxyfile b/src/Doxyfile index f1981e2e..2348b290 100644 --- a/src/Doxyfile +++ b/src/Doxyfile @@ -854,7 +854,8 @@ IMAGE_PATH = doc/Skeleton_blocker/ \ doc/Tangential_complex/ \ doc/Bottleneck_distance/ \ doc/Nerve_GIC/ \ - doc/Persistence_representations/ + doc/Persistence_representations/ \ + doc/Kernels/ # The INPUT_FILTER tag can be used to specify a program that doxygen should # invoke to filter for each input file. Doxygen will invoke the filter program diff --git a/src/Kernels/doc/COPYRIGHT b/src/Kernels/doc/COPYRIGHT new file mode 100644 index 00000000..0c36a526 --- /dev/null +++ b/src/Kernels/doc/COPYRIGHT @@ -0,0 +1,19 @@ +The files of this directory are part of the Gudhi Library. The Gudhi library +(Geometric Understanding in Higher Dimensions) is a generic C++ library for +computational topology. + +Author(s): Mathieu Carrière + +Copyright (C) 2017 INRIA + +This program is free software: you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see <http://www.gnu.org/licenses/>. diff --git a/src/Kernels/doc/Intro_kernels.h b/src/Kernels/doc/Intro_kernels.h new file mode 100644 index 00000000..163690b1 --- /dev/null +++ b/src/Kernels/doc/Intro_kernels.h @@ -0,0 +1,108 @@ +/* This file is part of the Gudhi Library. The Gudhi library + * (Geometric Understanding in Higher Dimensions) is a generic C++ + * library for computational topology. + * + * Author(s): Mathieu Carriere + * + * Copyright (C) 2017 INRIA + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef DOC_KERNEL_INTRO_KERNEL_H_ +#define DOC_KERNEL_INTRO_KERNEL_H_ + +namespace Gudhi { + +namespace kernel { + +/** \defgroup kernel Kernels + * + * \author Mathieu Carrière + * + * @{ + * + * Kernels are generalized scalar products. They take the form of functions whose evaluations on pairs of persistence diagrams are equal + * to the scalar products of the images of the diagrams under some feature map into a (generally unknown and infinite dimensional) + * Hilbert space. Kernels are + * very useful to handle any type of data for algorithms that require at least a Hilbert structure, such as Principal Component Analysis + * or Support Vector Machines. In this package, we implement three kernels for persistence diagrams: + * the Persistence Scale Space Kernel (PSSK)---see \cite Reininghaus_Huber_ALL_PSSK, + * the Persistence Weighted Gaussian Kernel (PWGK)---see \cite Kusano_Fukumizu_Hiraoka_PWGK, + * and the Sliced Wasserstein Kernel (SWK)---see \cite pmlr-v70-carriere17a. + * + * \section pwg Persistence Weighted Gaussian Kernel and Persistence Scale Space Kernel + * + * The PWGK is built with Gaussian Kernel Mean Embedding, meaning that each persistence diagram is first + * sent to the Hilbert space of a Gaussian kernel with bandwidth parameter \f$\sigma >0\f$ using a weighted mean embedding \f$\Phi\f$: + * + * \f$ \Phi\,:\,D\,\rightarrow\,\sum_{p\in D}\,w(p)\,{\rm exp}\left(-\frac{\|p-\cdot\|_2^2}{2\sigma^2}\right) \f$, + * + * Usually, the weight function is chosen to be an arctan function of the distance of the point to the diagonal: + * \f$w(p) = {\rm arctan}(C\,|y-x|^\alpha)\f$, for some parameters \f$C,\alpha >0\f$. + * Then, either their scalar product in this space is + * computed (Linear Persistence Weighted Gaussian Kernel): + * + * \f$ LPWGK(D_1,D_2)=\langle\Phi(D_1),\Phi(D_2)\rangle + * \,=\,\sum_{p\in D_1}\,\sum_{q\in D_2}\,w(p)\,w(q)\,{\rm exp}\left(-\frac{\|p-q\|_2^2}{2\sigma^2}\right)\f$, + * + * or a second Gaussian kernel with bandwidth parameter \f$\tau >0\f$ is applied to their distance in this space + * (Gaussian Persistence Weighted Gaussian Kernel): + * + * \f$ GPWGK(D_1,D_2)={\rm exp}\left(-\frac{\|\Phi(D_1)-\Phi(D_2)\|^2}{2\tau^2} \right)\f$, + * where \f$\|\Phi(D_1)-\Phi(D_2)\|^2 = \langle\Phi(D_1)-\Phi(D_2),\Phi(D_1)-\Phi(D_2)\rangle\f$. + * + * It follows that the computation time is \f$O(n^2)\f$ where \f$n\f$ is the number of points + * in the diagrams. This time can be improved by computing approximations of the kernel + * with \f$m\f$ Fourier features \cite Rahimi07randomfeatures. In that case, the computation time becomes \f$O(mn)\f$. + * + * The PSSK is a Linear Persistence Weighted Gaussian Kernel between modified diagrams: + * the symmetric of each point with respect to the diagonal is first added in each diagram, and then the weight function + * is set to be +1 if the point is above the diagonal and -1 otherwise. + * + * \section sw Sliced Wasserstein Kernel + * + * The Sliced Wasserstein Kernel is defined as a Gaussian-like Kernel between persistence diagrams, where the distance used for + * comparison is the Sliced Wasserstein distance \f$SW\f$ between persistence diagrams, defined as the integral of the 1-norm + * between the sorted projections of the diagrams onto all lines passing through the origin: + * + * \f$ SW(D_1,D_2)=\int_{\theta\in\mathbb{S}}\,\|\pi_\theta(D_1\cup\pi_\Delta(D_2))-\pi_\theta(D_2\cup\pi_\Delta(D_1))\|_1{\rm d}\theta\f$, + * + * where \f$\pi_\theta\f$ is the projection onto the line defined with angle \f$\theta\f$ in the unit circle \f$\mathbb{S}\f$, + * and \f$\pi_\Delta\f$ is the projection onto the diagonal. + * The integral can be either computed exactly in \f$O(n^2{\rm log}(n))\f$ time, where \f$n\f$ is the number of points + * in the diagrams, or approximated by sampling \f$m\f$ lines in the circle in \f$O(mn{\rm log}(n))\f$ time. The SWK is then computed as: + * + * \f$ SWK(D_1,D_2) = {\rm exp}\left(-\frac{SW(D_1,D_2)}{2\sigma^2}\right).\f$ + * + * When launching: + * + * \code $> ./BasicEx ../../../../data/persistence_diagram/PD1 ../../../../data/persistence_diagram/PD2 + * \endcode + * + * the program output is: + * + * \include Kernels/kernel.txt + * + * + * \copyright GNU General Public License v3. + * \verbatim Contact: gudhi-users@lists.gforge.inria.fr \endverbatim + */ +/** @} */ // end defgroup kernel + +} // namespace kernel + +} // namespace Gudhi + +#endif // DOC_KERNEL_INTRO_KERNEL_H_ diff --git a/src/Kernels/example/CMakeLists.txt b/src/Kernels/example/CMakeLists.txt new file mode 100644 index 00000000..d8ad4b42 --- /dev/null +++ b/src/Kernels/example/CMakeLists.txt @@ -0,0 +1,10 @@ +cmake_minimum_required(VERSION 2.6) +project(Kernels_examples) + +add_executable ( BasicEx kernel_basic_example.cpp ) + +if (TBB_FOUND) + target_link_libraries(BasicEx ${TBB_LIBRARIES}) +endif() + +add_test(NAME Kernels_example_basicex COMMAND $<TARGET_FILE:BasicEx> "${CMAKE_SOURCE_DIR}/data/persistence_diagram/PD1" "${CMAKE_SOURCE_DIR}/data/persistence_diagram/PD2")
\ No newline at end of file diff --git a/src/Kernels/example/kernel.txt b/src/Kernels/example/kernel.txt new file mode 100644 index 00000000..5fb8b504 --- /dev/null +++ b/src/Kernels/example/kernel.txt @@ -0,0 +1,8 @@ +SWK exact = 0.875446 +SWK approx = 0.875204 +PSSK exact = 0.0218669 +PSSK approx = 0.0213766 +LPWGK exact = 2.57351 +LPWGK approx = 2.49102 +GPWGK exact = 0.98783 +GPWGK approx = 0.987591
\ No newline at end of file diff --git a/src/Kernels/example/kernel_basic_example.cpp b/src/Kernels/example/kernel_basic_example.cpp new file mode 100644 index 00000000..7ecbe401 --- /dev/null +++ b/src/Kernels/example/kernel_basic_example.cpp @@ -0,0 +1,65 @@ +/* This file is part of the Gudhi Library. The Gudhi library + * (Geometric Understanding in Higher Dimensions) is a generic C++ + * library for computational topology. + * + * Authors: Mathieu Carrière + * + * Copyright (C) 2017 INRIA + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <gudhi/kernel.h> +#include <iostream> +#include <string> +#include <fstream> +#include <sstream> + + +void usage(int nbArgs, char *const progName) { + std::cerr << "Error: Number of arguments (" << nbArgs << ") is not correct\n"; + std::cerr << "Usage: " << progName << " PD1 PD2 \n"; + std::cerr << " i.e.: " << progName << " ../../../../data/persistence_diagram/PD1.pers ../../../../data/persistence_diagram/PD2.pers \n"; + exit(-1); // ----- >> +} + +int main(int argc, char **argv) { + + if (argc != 3) usage(argc, argv[0]); + + double sigma = 2; double tau = 5; + + std::string PDname1(argv[1]); std::string PDname2(argv[2]); + std::vector< std::pair<double, double> > v1, v2; std::string line; double b,d; + + std::ifstream input1(PDname1); + while(std::getline(input1,line)){ + std::stringstream stream(line); stream >> b; stream >> d; v1.push_back(std::pair<double,double>(b,d)); + } + + std::ifstream input2(PDname2); + while(std::getline(input2,line)){ + std::stringstream stream(line); stream >> b; stream >> d; v2.push_back(std::pair<double,double>(b,d)); + } + + std::cout << "SWK exact = " << Gudhi::kernel::sliced_wasserstein_kernel (v1,v2,sigma,-1) << std::endl; + std::cout << "SWK approx = " << Gudhi::kernel::sliced_wasserstein_kernel (v1,v2,sigma) << std::endl; + std::cout << "PSSK exact = " << Gudhi::kernel::persistence_scale_space_kernel (v1,v2,sigma,-1) << std::endl; + std::cout << "PSSK approx = " << Gudhi::kernel::persistence_scale_space_kernel (v1,v2,sigma) << std::endl; + std::cout << "LPWGK exact = " << Gudhi::kernel::linear_persistence_weighted_gaussian_kernel (v1,v2,sigma,Gudhi::kernel::arctan_weight,-1) << std::endl; + std::cout << "LPWGK approx = " << Gudhi::kernel::linear_persistence_weighted_gaussian_kernel (v1,v2,sigma,Gudhi::kernel::arctan_weight) << std::endl; + std::cout << "GPWGK exact = " << Gudhi::kernel::gaussian_persistence_weighted_gaussian_kernel (v1,v2,sigma,tau,Gudhi::kernel::arctan_weight,-1) << std::endl; + std::cout << "GPWGK approx = " << Gudhi::kernel::gaussian_persistence_weighted_gaussian_kernel (v1,v2,sigma,tau,Gudhi::kernel::arctan_weight) << std::endl; + +} diff --git a/src/Kernels/include/gudhi/kernel.h b/src/Kernels/include/gudhi/kernel.h new file mode 100644 index 00000000..3293cc62 --- /dev/null +++ b/src/Kernels/include/gudhi/kernel.h @@ -0,0 +1,365 @@ +/* This file is part of the Gudhi Library. The Gudhi library + * (Geometric Understanding in Higher Dimensions) is a generic C++ + * library for computational topology. + * + * Author(s): Mathieu Carrière + * + * Copyright (C) 2018 INRIA (France) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef KERNEL_H_ +#define KERNEL_H_ + +#include <cstdlib> +#include <vector> +#include <algorithm> +#include <cmath> +#include <random> +#include <limits> //for numeric_limits<> +#include <utility> //for pair<> + +#include <boost/math/constants/constants.hpp> + + +namespace Gudhi { +namespace kernel { + +using PD = std::vector<std::pair<double,double> >; +double pi = boost::math::constants::pi<double>(); + + + + +// ******************************************************************** +// Utils. +// ******************************************************************** + +bool sortAngle(const std::pair<double, std::pair<int,int> >& p1, const std::pair<double, std::pair<int,int> >& p2){return (p1.first < p2.first);} +bool myComp(const std::pair<int,double> & P1, const std::pair<int,double> & P2){return P1.second < P2.second;} + +double pss_weight(std::pair<double,double> P){ + if(P.second > P.first) return 1; + else return -1; +} + +double arctan_weight(std::pair<double,double> P){ + return atan(P.second - P.first); +} + +// Compute the angle formed by two points of a PD +double compute_angle(const PD & PersDiag, const int & i, const int & j){ + std::pair<double,double> vect; double x1,y1, x2,y2; + x1 = PersDiag[i].first; y1 = PersDiag[i].second; + x2 = PersDiag[j].first; y2 = PersDiag[j].second; + if (y1 - y2 > 0){ + vect.first = y1 - y2; + vect.second = x2 - x1;} + else{ + if(y1 - y2 < 0){ + vect.first = y2 - y1; + vect.second = x1 - x2; + } + else{ + vect.first = 0; + vect.second = abs(x1 - x2);} + } + double norm = std::sqrt(vect.first*vect.first + vect.second*vect.second); + return asin(vect.second/norm); +} + +// Compute the integral of |cos()| between alpha and beta, valid only if alpha is in [-pi,pi] and beta-alpha is in [0,pi] +double compute_int_cos(const double & alpha, const double & beta){ + double res = 0; + if (alpha >= 0 && alpha <= pi){ + if (cos(alpha) >= 0){ + if(pi/2 <= beta){res = 2-sin(alpha)-sin(beta);} + else{res = sin(beta)-sin(alpha);} + } + else{ + if(1.5*pi <= beta){res = 2+sin(alpha)+sin(beta);} + else{res = sin(alpha)-sin(beta);} + } + } + if (alpha >= -pi && alpha <= 0){ + if (cos(alpha) <= 0){ + if(-pi/2 <= beta){res = 2+sin(alpha)+sin(beta);} + else{res = sin(alpha)-sin(beta);} + } + else{ + if(pi/2 <= beta){res = 2-sin(alpha)-sin(beta);} + else{res = sin(beta)-sin(alpha);} + } + } + return res; +} + +double compute_int(const double & theta1, const double & theta2, const int & p, const int & q, const PD & PD1, const PD & PD2){ + double norm = std::sqrt( (PD1[p].first-PD2[q].first)*(PD1[p].first-PD2[q].first) + (PD1[p].second-PD2[q].second)*(PD1[p].second-PD2[q].second) ); + double angle1; + if (PD1[p].first > PD2[q].first) + angle1 = theta1 - asin( (PD1[p].second-PD2[q].second)/norm ); + else + angle1 = theta1 - asin( (PD2[q].second-PD1[p].second)/norm ); + double angle2 = angle1 + theta2 - theta1; + double integral = compute_int_cos(angle1,angle2); + return norm*integral; +} + +template<class Weight = std::function<double (std::pair<double,double>) > > +std::vector<std::pair<double,double> > Fourier_feat(PD D, std::vector<std::pair<double,double> > Z, Weight weight = arctan_weight){ + int m = D.size(); std::vector<std::pair<double,double> > B; int M = Z.size(); + for(int i = 0; i < M; i++){ + double d1 = 0; double d2 = 0; double zx = Z[i].first; double zy = Z[i].second; + for(int j = 0; j < m; j++){ + double x = D[j].first; double y = D[j].second; + d1 += weight(D[j])*cos(x*zx + y*zy); + d2 += weight(D[j])*sin(x*zx + y*zy); + } + B.emplace_back(d1,d2); + } + return B; +} + +std::vector<std::pair<double,double> > random_Fourier(double sigma, int M = 1000){ + std::normal_distribution<double> distrib(0,1); std::vector<std::pair<double,double> > Z; std::random_device rd; + for(int i = 0; i < M; i++){ + std::mt19937 e1(rd()); std::mt19937 e2(rd()); + double zx = distrib(e1); double zy = distrib(e2); + Z.emplace_back(zx/sigma,zy/sigma); + } + return Z; +} + + + + + + + + + + +// ******************************************************************** +// Kernel computation. +// ******************************************************************** + + + + + +/** \brief Computes the Linear Persistence Weighted Gaussian Kernel between two persistence diagrams with random Fourier features. + * \ingroup kernel + * + * @param[in] PD1 first persistence diagram. + * @param[in] PD2 second persistence diagram. + * @param[in] sigma bandwidth parameter of the Gaussian Kernel used for the Kernel Mean Embedding of the diagrams. + * @param[in] weight weight function for the points in the diagrams. + * @param[in] M number of Fourier features (set -1 for exact computation). + * + */ +template<class Weight = std::function<double (std::pair<double,double>) > > +double linear_persistence_weighted_gaussian_kernel(const PD & PD1, const PD & PD2, double sigma, Weight weight = arctan_weight, int M = 1000){ + + if(M == -1){ + int num_pts1 = PD1.size(); int num_pts2 = PD2.size(); double k = 0; + for(int i = 0; i < num_pts1; i++) + for(int j = 0; j < num_pts2; j++) + k += weight(PD1[i])*weight(PD2[j])*exp(-((PD1[i].first-PD2[j].first)*(PD1[i].first-PD2[j].first) + (PD1[i].second-PD2[j].second)*(PD1[i].second-PD2[j].second))/(2*sigma*sigma)); + return k; + } + else{ + std::vector<std::pair<double,double> > Z = random_Fourier(sigma, M); + std::vector<std::pair<double,double> > B1 = Fourier_feat(PD1,Z,weight); + std::vector<std::pair<double,double> > B2 = Fourier_feat(PD2,Z,weight); + double d = 0; for(int i = 0; i < M; i++) d += B1[i].first*B2[i].first + B1[i].second*B2[i].second; + return d/M; + } +} + +/** \brief Computes the Persistence Scale Space Kernel between two persistence diagrams with random Fourier features. + * \ingroup kernel + * + * @param[in] PD1 first persistence diagram. + * @param[in] PD2 second persistence diagram. + * @param[in] sigma bandwidth parameter of the Gaussian Kernel used for the Kernel Mean Embedding of the diagrams. + * @param[in] M number of Fourier features (set -1 for exact computation). + * + */ +double persistence_scale_space_kernel(const PD & PD1, const PD & PD2, double sigma, int M = 1000){ + PD pd1 = PD1; int numpts = PD1.size(); for(int i = 0; i < numpts; i++) pd1.emplace_back(PD1[i].second,PD1[i].first); + PD pd2 = PD2; numpts = PD2.size(); for(int i = 0; i < numpts; i++) pd2.emplace_back(PD2[i].second,PD2[i].first); + return linear_persistence_weighted_gaussian_kernel(pd1, pd2, 2*sqrt(sigma), pss_weight, M) / (2*8*pi*sigma); +} + + +/** \brief Computes the Gaussian Persistence Weighted Gaussian Kernel between two persistence diagrams with random Fourier features. + * \ingroup kernel + * + * @param[in] PD1 first persistence diagram. + * @param[in] PD2 second persistence diagram. + * @param[in] sigma bandwidth parameter of the Gaussian Kernel used for the Kernel Mean Embedding of the diagrams. + * @param[in] tau bandwidth parameter of the Gaussian Kernel used between the embeddings. + * @param[in] weight weight function for the points in the diagrams. + * @param[in] M number of Fourier features (set -1 for exact computation). + * + */ +template<class Weight = std::function<double (std::pair<double,double>) > > +double gaussian_persistence_weighted_gaussian_kernel(const PD & PD1, const PD & PD2, double sigma, double tau, Weight weight = arctan_weight, int M = 1000){ + double k1 = linear_persistence_weighted_gaussian_kernel(PD1,PD1,sigma,weight,M); + double k2 = linear_persistence_weighted_gaussian_kernel(PD2,PD2,sigma,weight,M); + double k3 = linear_persistence_weighted_gaussian_kernel(PD1,PD2,sigma,weight,M); + return exp( - (k1+k2-2*k3) / (2*tau*tau) ); +} + + +/** \brief Computes the Sliced Wasserstein Kernel between two persistence diagrams with sampled directions. + * \ingroup kernel + * + * @param[in] PD1 first persistence diagram. + * @param[in] PD2 second persistence diagram. + * @param[in] sigma bandwidth parameter. + * @param[in] N number of points sampled on the circle (set -1 for exact computation). + * + */ +double sliced_wasserstein_kernel(PD PD1, PD PD2, double sigma, int N = 100){ + + if(N == -1){ + + // Add projections onto diagonal. + int n1, n2; n1 = PD1.size(); n2 = PD2.size(); double max_ordinate = std::numeric_limits<double>::lowest(); + for (int i = 0; i < n2; i++){ + max_ordinate = std::max(max_ordinate, PD2[i].second); + PD1.emplace_back( (PD2[i].first+PD2[i].second)/2, (PD2[i].first+PD2[i].second)/2 ); + } + for (int i = 0; i < n1; i++){ + max_ordinate = std::max(max_ordinate, PD1[i].second); + PD2.emplace_back( (PD1[i].first+PD1[i].second)/2, (PD1[i].first+PD1[i].second)/2 ); + } + int num_pts_dgm = PD1.size(); + + // Slightly perturb the points so that the PDs are in generic positions. + int mag = 0; while(max_ordinate > 10){mag++; max_ordinate/=10;} + double thresh = pow(10,-5+mag); + srand(time(NULL)); + for (int i = 0; i < num_pts_dgm; i++){ + PD1[i].first += thresh*(1.0-2.0*rand()/RAND_MAX); PD1[i].second += thresh*(1.0-2.0*rand()/RAND_MAX); + PD2[i].first += thresh*(1.0-2.0*rand()/RAND_MAX); PD2[i].second += thresh*(1.0-2.0*rand()/RAND_MAX); + } + + // Compute all angles in both PDs. + std::vector<std::pair<double, std::pair<int,int> > > angles1, angles2; + for (int i = 0; i < num_pts_dgm; i++){ + for (int j = i+1; j < num_pts_dgm; j++){ + double theta1 = compute_angle(PD1,i,j); double theta2 = compute_angle(PD2,i,j); + angles1.emplace_back(theta1, std::pair<int,int>(i,j)); + angles2.emplace_back(theta2, std::pair<int,int>(i,j)); + } + } + + // Sort angles. + std::sort(angles1.begin(), angles1.end(), sortAngle); std::sort(angles2.begin(), angles2.end(), sortAngle); + + // Initialize orders of the points of both PDs (given by ordinates when theta = -pi/2). + std::vector<int> orderp1, orderp2; + for (int i = 0; i < num_pts_dgm; i++){ orderp1.push_back(i); orderp2.push_back(i); } + std::sort( orderp1.begin(), orderp1.end(), [=](int i, int j){ if(PD1[i].second != PD1[j].second) return (PD1[i].second < PD1[j].second); else return (PD1[i].first > PD1[j].first); } ); + std::sort( orderp2.begin(), orderp2.end(), [=](int i, int j){ if(PD2[i].second != PD2[j].second) return (PD2[i].second < PD2[j].second); else return (PD2[i].first > PD2[j].first); } ); + + // Find the inverses of the orders. + std::vector<int> order1(num_pts_dgm); std::vector<int> order2(num_pts_dgm); + for(int i = 0; i < num_pts_dgm; i++) for (int j = 0; j < num_pts_dgm; j++) if(orderp1[j] == i){ order1[i] = j; break; } + for(int i = 0; i < num_pts_dgm; i++) for (int j = 0; j < num_pts_dgm; j++) if(orderp2[j] == i){ order2[i] = j; break; } + + // Record all inversions of points in the orders as theta varies along the positive half-disk. + std::vector<std::vector<std::pair<int,double> > > anglePerm1(num_pts_dgm); + std::vector<std::vector<std::pair<int,double> > > anglePerm2(num_pts_dgm); + + int M1 = angles1.size(); + for (int i = 0; i < M1; i++){ + double theta = angles1[i].first; int p = angles1[i].second.first; int q = angles1[i].second.second; + anglePerm1[order1[p]].emplace_back(p,theta); + anglePerm1[order1[q]].emplace_back(q,theta); + int a = order1[p]; int b = order1[q]; order1[p] = b; order1[q] = a; + } + + int M2 = angles2.size(); + for (int i = 0; i < M2; i++){ + double theta = angles2[i].first; int p = angles2[i].second.first; int q = angles2[i].second.second; + anglePerm2[order2[p]].emplace_back(p,theta); + anglePerm2[order2[q]].emplace_back(q,theta); + int a = order2[p]; int b = order2[q]; order2[p] = b; order2[q] = a; + } + + for (int i = 0; i < num_pts_dgm; i++){ + anglePerm1[order1[i]].emplace_back(i,pi/2); + anglePerm2[order2[i]].emplace_back(i,pi/2); + } + + // Compute the SW distance with the list of inversions. + double sw = 0; + for (int i = 0; i < num_pts_dgm; i++){ + std::vector<std::pair<int,double> > U,V; U = anglePerm1[i]; V = anglePerm2[i]; + double theta1, theta2; theta1 = -pi/2; + unsigned int ku, kv; ku = 0; kv = 0; theta2 = std::min(U[ku].second,V[kv].second); + while(theta1 != pi/2){ + if(PD1[U[ku].first].first != PD2[V[kv].first].first || PD1[U[ku].first].second != PD2[V[kv].first].second) + if(theta1 != theta2) + sw += compute_int(theta1, theta2, U[ku].first, V[kv].first, PD1, PD2); + theta1 = theta2; + if ( (theta2 == U[ku].second) && ku < U.size()-1 ) ku++; + if ( (theta2 == V[kv].second) && kv < V.size()-1 ) kv++; + theta2 = std::min(U[ku].second, V[kv].second); + } + } + + return exp( -(sw/pi)/(2*sigma*sigma) ); + + } + + + else{ + double step = pi/N; double sw = 0; + + // Add projections onto diagonal. + int n1, n2; n1 = PD1.size(); n2 = PD2.size(); + for (int i = 0; i < n2; i++) + PD1.emplace_back( (PD2[i].first + PD2[i].second)/2, (PD2[i].first + PD2[i].second)/2 ); + for (int i = 0; i < n1; i++) + PD2.emplace_back( (PD1[i].first + PD1[i].second)/2, (PD1[i].first + PD1[i].second)/2 ); + int n = PD1.size(); + + // Sort and compare all projections. + //#pragma omp parallel for + for (int i = 0; i < N; i++){ + std::vector<std::pair<int,double> > L1, L2; + for (int j = 0; j < n; j++){ + L1.emplace_back( j, PD1[j].first*cos(-pi/2+i*step) + PD1[j].second*sin(-pi/2+i*step) ); + L2.emplace_back( j, PD2[j].first*cos(-pi/2+i*step) + PD2[j].second*sin(-pi/2+i*step) ); + } + std::sort(L1.begin(),L1.end(), myComp); std::sort(L2.begin(),L2.end(), myComp); + double f = 0; for (int j = 0; j < n; j++) f += std::abs(L1[j].second - L2[j].second); + sw += f*step; + } + return exp( -(sw/pi)/(2*sigma*sigma) ); + } +} + + +} // namespace kernel + +} // namespace Gudhi + +#endif //KERNEL_H_ diff --git a/src/Kernels/test/CMakeLists.txt b/src/Kernels/test/CMakeLists.txt new file mode 100644 index 00000000..95c72a7f --- /dev/null +++ b/src/Kernels/test/CMakeLists.txt @@ -0,0 +1,12 @@ +cmake_minimum_required(VERSION 2.6) +project(kernel_tests) + +include(GUDHI_test_coverage) + +add_executable ( kernel_test_unit test_kernel.cpp ) +target_link_libraries(kernel_test_unit ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY}) +if (TBB_FOUND) + target_link_libraries(kernel_test_unit ${TBB_LIBRARIES}) +endif() + +gudhi_add_coverage_test(kernel_test_unit) diff --git a/src/Kernels/test/test_kernel.cpp b/src/Kernels/test/test_kernel.cpp new file mode 100644 index 00000000..db05fd28 --- /dev/null +++ b/src/Kernels/test/test_kernel.cpp @@ -0,0 +1,56 @@ +/* This file is part of the Gudhi Library. The Gudhi library + * (Geometric Understanding in Higher Dimensions) is a generic C++ + * library for computational topology. + * + * Author(s): Mathieu Carrière + * + * Copyright (C) 2017 INRIA + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#define BOOST_TEST_DYN_LINK +#define BOOST_TEST_MODULE "kernel" + +#include <boost/test/unit_test.hpp> +#include <cmath> // float comparison +#include <limits> +#include <string> +#include <vector> +#include <algorithm> // std::max +#include <gudhi/kernel.h> +#include <gudhi/distance_functions.h> +#include <gudhi/reader_utils.h> + +BOOST_AUTO_TEST_CASE(check_PSS) { + std::vector< std::pair<double, double> > v1, v2; + v1.emplace_back(std::pair<double,double>(0,1)); + v2.emplace_back(std::pair<double,double>(0,2)); + BOOST_CHECK(std::abs(Gudhi::kernel::pssk(v1,v2,1) - Gudhi::kernel::approx_pssk(v1,v2,1)) <= 1e-1); +} + +BOOST_AUTO_TEST_CASE(check_PWG) { + std::vector< std::pair<double, double> > v1, v2; + v1.emplace_back(std::pair<double,double>(0,1)); + v2.emplace_back(std::pair<double,double>(0,2)); + BOOST_CHECK(std::abs(Gudhi::kernel::lpwgk(v1,v2,1) - Gudhi::kernel::approx_lpwgk(v1,v2,1)) <= 1e-1); + BOOST_CHECK(std::abs(Gudhi::kernel::gpwgk(v1,v2,1,1) - Gudhi::kernel::approx_gpwgk(v1,v2,1,1)) <= 1e-1); +} + +BOOST_AUTO_TEST_CASE(check_SW) { + std::vector< std::pair<double, double> > v1, v2; + v2.emplace_back(std::pair<double,double>(0,2)); + BOOST_CHECK(std::abs(Gudhi::kernel::sw(v1,v2) - Gudhi::kernel::approx_sw(v1,v2)) <= 1e-3); + BOOST_CHECK(std::abs(Gudhi::kernel::sw(v1,v2) - 2*std::sqrt(2)/3.1415) <= 1e-3); +} diff --git a/src/Persistence_representations/example/persistence_weighted_gaussian.cpp b/src/Persistence_representations/example/persistence_weighted_gaussian.cpp new file mode 100644 index 00000000..e95b9445 --- /dev/null +++ b/src/Persistence_representations/example/persistence_weighted_gaussian.cpp @@ -0,0 +1,96 @@ +/* This file is part of the Gudhi Library. The Gudhi library + * (Geometric Understanding in Higher Dimensions) is a generic C++ + * library for computational topology. + * + * Author(s): Mathieu Carriere + * + * Copyright (C) 2018 INRIA (France) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <gudhi/Persistence_weighted_gaussian.h> + +#include <iostream> +#include <vector> +#include <utility> + +using PD = std::vector<std::pair<double,double> >; +using PWG = Gudhi::Persistence_representations::Persistence_weighted_gaussian; + +int main(int argc, char** argv) { + + std::vector<std::pair<double, double> > persistence1; + std::vector<std::pair<double, double> > persistence2; + + persistence1.push_back(std::make_pair(1, 2)); + persistence1.push_back(std::make_pair(6, 8)); + persistence1.push_back(std::make_pair(0, 4)); + persistence1.push_back(std::make_pair(3, 8)); + + persistence2.push_back(std::make_pair(2, 9)); + persistence2.push_back(std::make_pair(1, 6)); + persistence2.push_back(std::make_pair(3, 5)); + persistence2.push_back(std::make_pair(6, 10)); + + PWG PWG1(persistence1); + PWG PWG2(persistence2); + double sigma = 1; + double tau = 1; + int m = 1000; + + + + // Linear PWG + + std::cout << PWG1.compute_scalar_product (PWG2, sigma, PWG::arctan_weight, m) << std::endl; + std::cout << PWG1.compute_scalar_product (PWG2, sigma, PWG::arctan_weight, -1) << std::endl; + + std::cout << PWG1.distance (PWG2, sigma, PWG::arctan_weight, m) << std::endl; + std::cout << PWG1.distance (PWG2, sigma, PWG::arctan_weight, -1) << std::endl; + + + + + + + + // Gaussian PWG + + std::cout << std::exp( -PWG1.distance (PWG2, sigma, PWG::arctan_weight, m, 2) ) / (2*tau*tau) << std::endl; + std::cout << std::exp( -PWG1.distance (PWG2, sigma, PWG::arctan_weight, -1, 2) ) / (2*tau*tau) << std::endl; + + + + + + + + // PSS + + PD pd1 = persistence1; int numpts = persistence1.size(); for(int i = 0; i < numpts; i++) pd1.emplace_back(persistence1[i].second,persistence1[i].first); + PD pd2 = persistence2; numpts = persistence2.size(); for(int i = 0; i < numpts; i++) pd2.emplace_back(persistence2[i].second,persistence2[i].first); + + PWG pwg1(pd1); + PWG pwg2(pd2); + + std::cout << pwg1.compute_scalar_product (pwg2, 2*std::sqrt(sigma), PWG::pss_weight, m) / (16*pi*sigma) << std::endl; + std::cout << pwg1.compute_scalar_product (pwg2, 2*std::sqrt(sigma), PWG::pss_weight, -1) / (16*pi*sigma) << std::endl; + + std::cout << pwg1.distance (pwg2, 2*std::sqrt(sigma), PWG::pss_weight, m) / (16*pi*sigma) << std::endl; + std::cout << pwg1.distance (pwg2, 2*std::sqrt(sigma), PWG::pss_weight, -1) / (16*pi*sigma) << std::endl; + + + return 0; +} diff --git a/src/Persistence_representations/example/sliced_wasserstein.cpp b/src/Persistence_representations/example/sliced_wasserstein.cpp new file mode 100644 index 00000000..673d8474 --- /dev/null +++ b/src/Persistence_representations/example/sliced_wasserstein.cpp @@ -0,0 +1,55 @@ +/* This file is part of the Gudhi Library. The Gudhi library + * (Geometric Understanding in Higher Dimensions) is a generic C++ + * library for computational topology. + * + * Author(s): Mathieu Carriere + * + * Copyright (C) 2018 INRIA (France) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <gudhi/Sliced_Wasserstein.h> + +#include <iostream> +#include <vector> +#include <utility> + +using SW = Gudhi::Persistence_representations::Sliced_Wasserstein; + +int main(int argc, char** argv) { + + std::vector<std::pair<double, double> > persistence1; + std::vector<std::pair<double, double> > persistence2; + + persistence1.push_back(std::make_pair(1, 2)); + persistence1.push_back(std::make_pair(6, 8)); + persistence1.push_back(std::make_pair(0, 4)); + persistence1.push_back(std::make_pair(3, 8)); + + persistence2.push_back(std::make_pair(2, 9)); + persistence2.push_back(std::make_pair(1, 6)); + persistence2.push_back(std::make_pair(3, 5)); + persistence2.push_back(std::make_pair(6, 10)); + + SW SW1(persistence1); + SW SW2(persistence2); + + std::cout << SW1.compute_sliced_wasserstein_distance(SW2,100) << std::endl; + std::cout << SW1.compute_sliced_wasserstein_distance(SW2,-1) << std::endl; + std::cout << SW1.compute_scalar_product(SW2,1,100) << std::endl; + std::cout << SW1.distance(SW2,1,100,1) << std::endl; + + return 0; +} diff --git a/src/Persistence_representations/include/gudhi/Persistence_weighted_gaussian.h b/src/Persistence_representations/include/gudhi/Persistence_weighted_gaussian.h new file mode 100644 index 00000000..2884885c --- /dev/null +++ b/src/Persistence_representations/include/gudhi/Persistence_weighted_gaussian.h @@ -0,0 +1,143 @@ +/* This file is part of the Gudhi Library. The Gudhi library + * (Geometric Understanding in Higher Dimensions) is a generic C++ + * library for computational topology. + * + * Author(s): Mathieu Carriere + * + * Copyright (C) 2018 INRIA (France) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef PERSISTENCE_WEIGHTED_GAUSSIAN_H_ +#define PERSISTENCE_WEIGHTED_GAUSSIAN_H_ + +#ifdef GUDHI_USE_TBB +#include <tbb/parallel_for.h> +#endif + +// gudhi include +#include <gudhi/read_persistence_from_file.h> + +// standard include +#include <cmath> +#include <iostream> +#include <vector> +#include <limits> +#include <fstream> +#include <sstream> +#include <algorithm> +#include <string> +#include <utility> +#include <functional> +#include <boost/math/constants/constants.hpp> + +double pi = boost::math::constants::pi<double>(); +using PD = std::vector<std::pair<double,double> >; + +namespace Gudhi { +namespace Persistence_representations { + +class Persistence_weighted_gaussian{ + + protected: + PD diagram; + + public: + + Persistence_weighted_gaussian(PD _diagram){diagram = _diagram;} + PD get_diagram(){return this->diagram;} + + + // ********************************** + // Utils. + // ********************************** + + + static double pss_weight(std::pair<double,double> P){ + if(P.second > P.first) return 1; + else return -1; + } + + static double arctan_weight(std::pair<double,double> P){ + return atan(P.second - P.first); + } + + template<class Weight = std::function<double (std::pair<double,double>) > > + std::vector<std::pair<double,double> > Fourier_feat(PD D, std::vector<std::pair<double,double> > Z, Weight weight = arctan_weight){ + int m = D.size(); std::vector<std::pair<double,double> > B; int M = Z.size(); + for(int i = 0; i < M; i++){ + double d1 = 0; double d2 = 0; double zx = Z[i].first; double zy = Z[i].second; + for(int j = 0; j < m; j++){ + double x = D[j].first; double y = D[j].second; + d1 += weight(D[j])*cos(x*zx + y*zy); + d2 += weight(D[j])*sin(x*zx + y*zy); + } + B.emplace_back(d1,d2); + } + return B; + } + + std::vector<std::pair<double,double> > random_Fourier(double sigma, int M = 1000){ + std::normal_distribution<double> distrib(0,1); std::vector<std::pair<double,double> > Z; std::random_device rd; + for(int i = 0; i < M; i++){ + std::mt19937 e1(rd()); std::mt19937 e2(rd()); + double zx = distrib(e1); double zy = distrib(e2); + Z.emplace_back(zx/sigma,zy/sigma); + } + return Z; + } + + + + // ********************************** + // Scalar product + distance. + // ********************************** + + + template<class Weight = std::function<double (std::pair<double,double>) > > + double compute_scalar_product(Persistence_weighted_gaussian second, double sigma, Weight weight = arctan_weight, int m = 1000){ + + PD diagram1 = this->diagram; PD diagram2 = second.diagram; + + if(m == -1){ + int num_pts1 = diagram1.size(); int num_pts2 = diagram2.size(); double k = 0; + for(int i = 0; i < num_pts1; i++) + for(int j = 0; j < num_pts2; j++) + k += weight(diagram1[i])*weight(diagram2[j])*exp(-((diagram1[i].first - diagram2[j].first) * (diagram1[i].first - diagram2[j].first) + + (diagram1[i].second - diagram2[j].second) * (diagram1[i].second - diagram2[j].second)) + /(2*sigma*sigma)); + return k; + } + else{ + std::vector<std::pair<double,double> > z = random_Fourier(sigma, m); + std::vector<std::pair<double,double> > b1 = Fourier_feat(diagram1,z,weight); + std::vector<std::pair<double,double> > b2 = Fourier_feat(diagram2,z,weight); + double d = 0; for(int i = 0; i < m; i++) d += b1[i].first*b2[i].first + b1[i].second*b2[i].second; + return d/m; + } + } + + template<class Weight = std::function<double (std::pair<double,double>) > > + double distance(Persistence_weighted_gaussian second, double sigma, Weight weight = arctan_weight, int m = 1000, double power = 1) { + return std::pow(this->compute_scalar_product(*this, sigma, weight, m) + second.compute_scalar_product(second, sigma, weight, m)-2*this->compute_scalar_product(second, sigma, weight, m), power/2.0); + } + + +}; + +} // namespace Persistence_weighted_gaussian +} // namespace Gudhi + +#endif // PERSISTENCE_WEIGHTED_GAUSSIAN_H_ diff --git a/src/Persistence_representations/include/gudhi/Sliced_Wasserstein.h b/src/Persistence_representations/include/gudhi/Sliced_Wasserstein.h new file mode 100644 index 00000000..4fa6151f --- /dev/null +++ b/src/Persistence_representations/include/gudhi/Sliced_Wasserstein.h @@ -0,0 +1,285 @@ +/* This file is part of the Gudhi Library. The Gudhi library + * (Geometric Understanding in Higher Dimensions) is a generic C++ + * library for computational topology. + * + * Author(s): Mathieu Carriere + * + * Copyright (C) 2018 INRIA (France) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef SLICED_WASSERSTEIN_H_ +#define SLICED_WASSERSTEIN_H_ + +#ifdef GUDHI_USE_TBB +#include <tbb/parallel_for.h> +#endif + +// gudhi include +#include <gudhi/read_persistence_from_file.h> + +// standard include +#include <cmath> +#include <iostream> +#include <vector> +#include <limits> +#include <fstream> +#include <sstream> +#include <algorithm> +#include <string> +#include <utility> +#include <functional> +#include <boost/math/constants/constants.hpp> + +double pi = boost::math::constants::pi<double>(); +using PD = std::vector<std::pair<double,double> >; + +namespace Gudhi { +namespace Persistence_representations { + +class Sliced_Wasserstein { + + protected: + PD diagram; + + public: + + Sliced_Wasserstein(PD _diagram){diagram = _diagram;} + PD get_diagram(){return this->diagram;} + + + // ********************************** + // Utils. + // ********************************** + + // Compute the angle formed by two points of a PD + double compute_angle(PD diag, int i, int j){ + std::pair<double,double> vect; double x1,y1, x2,y2; + x1 = diag[i].first; y1 = diag[i].second; + x2 = diag[j].first; y2 = diag[j].second; + if (y1 - y2 > 0){ + vect.first = y1 - y2; + vect.second = x2 - x1;} + else{ + if(y1 - y2 < 0){ + vect.first = y2 - y1; + vect.second = x1 - x2; + } + else{ + vect.first = 0; + vect.second = abs(x1 - x2);} + } + double norm = std::sqrt(vect.first*vect.first + vect.second*vect.second); + return asin(vect.second/norm); + } + + // Compute the integral of |cos()| between alpha and beta, valid only if alpha is in [-pi,pi] and beta-alpha is in [0,pi] + double compute_int_cos(const double & alpha, const double & beta){ + double res = 0; + if (alpha >= 0 && alpha <= pi){ + if (cos(alpha) >= 0){ + if(pi/2 <= beta){res = 2-sin(alpha)-sin(beta);} + else{res = sin(beta)-sin(alpha);} + } + else{ + if(1.5*pi <= beta){res = 2+sin(alpha)+sin(beta);} + else{res = sin(alpha)-sin(beta);} + } + } + if (alpha >= -pi && alpha <= 0){ + if (cos(alpha) <= 0){ + if(-pi/2 <= beta){res = 2+sin(alpha)+sin(beta);} + else{res = sin(alpha)-sin(beta);} + } + else{ + if(pi/2 <= beta){res = 2-sin(alpha)-sin(beta);} + else{res = sin(beta)-sin(alpha);} + } + } + return res; + } + + double compute_int(const double & theta1, const double & theta2, const int & p, const int & q, const PD & PD1, const PD & PD2){ + double norm = std::sqrt( (PD1[p].first-PD2[q].first)*(PD1[p].first-PD2[q].first) + (PD1[p].second-PD2[q].second)*(PD1[p].second-PD2[q].second) ); + double angle1; + if (PD1[p].first > PD2[q].first) + angle1 = theta1 - asin( (PD1[p].second-PD2[q].second)/norm ); + else + angle1 = theta1 - asin( (PD2[q].second-PD1[p].second)/norm ); + double angle2 = angle1 + theta2 - theta1; + double integral = compute_int_cos(angle1,angle2); + return norm*integral; + } + + + + + // ********************************** + // Scalar product + distance. + // ********************************** + + double compute_sliced_wasserstein_distance(Sliced_Wasserstein second, int approx) { + + PD diagram1 = this->diagram; PD diagram2 = second.diagram; double sw = 0; + + if(approx == -1){ + + // Add projections onto diagonal. + int n1, n2; n1 = diagram1.size(); n2 = diagram2.size(); double max_ordinate = std::numeric_limits<double>::lowest(); + for (int i = 0; i < n2; i++){ + max_ordinate = std::max(max_ordinate, diagram2[i].second); + diagram1.emplace_back( (diagram2[i].first+diagram2[i].second)/2, (diagram2[i].first+diagram2[i].second)/2 ); + } + for (int i = 0; i < n1; i++){ + max_ordinate = std::max(max_ordinate, diagram1[i].second); + diagram2.emplace_back( (diagram1[i].first+diagram1[i].second)/2, (diagram1[i].first+diagram1[i].second)/2 ); + } + int num_pts_dgm = diagram1.size(); + + // Slightly perturb the points so that the PDs are in generic positions. + int mag = 0; while(max_ordinate > 10){mag++; max_ordinate/=10;} + double thresh = pow(10,-5+mag); + srand(time(NULL)); + for (int i = 0; i < num_pts_dgm; i++){ + diagram1[i].first += thresh*(1.0-2.0*rand()/RAND_MAX); diagram1[i].second += thresh*(1.0-2.0*rand()/RAND_MAX); + diagram2[i].first += thresh*(1.0-2.0*rand()/RAND_MAX); diagram2[i].second += thresh*(1.0-2.0*rand()/RAND_MAX); + } + + // Compute all angles in both PDs. + std::vector<std::pair<double, std::pair<int,int> > > angles1, angles2; + for (int i = 0; i < num_pts_dgm; i++){ + for (int j = i+1; j < num_pts_dgm; j++){ + double theta1 = compute_angle(diagram1,i,j); double theta2 = compute_angle(diagram2,i,j); + angles1.emplace_back(theta1, std::pair<int,int>(i,j)); + angles2.emplace_back(theta2, std::pair<int,int>(i,j)); + } + } + + // Sort angles. + std::sort(angles1.begin(), angles1.end(), [=](std::pair<double, std::pair<int,int> >& p1, const std::pair<double, std::pair<int,int> >& p2){return (p1.first < p2.first);}); + std::sort(angles2.begin(), angles2.end(), [=](std::pair<double, std::pair<int,int> >& p1, const std::pair<double, std::pair<int,int> >& p2){return (p1.first < p2.first);}); + + // Initialize orders of the points of both PDs (given by ordinates when theta = -pi/2). + std::vector<int> orderp1, orderp2; + for (int i = 0; i < num_pts_dgm; i++){ orderp1.push_back(i); orderp2.push_back(i); } + std::sort( orderp1.begin(), orderp1.end(), [=](int i, int j){ if(diagram1[i].second != diagram1[j].second) return (diagram1[i].second < diagram1[j].second); else return (diagram1[i].first > diagram1[j].first); } ); + std::sort( orderp2.begin(), orderp2.end(), [=](int i, int j){ if(diagram2[i].second != diagram2[j].second) return (diagram2[i].second < diagram2[j].second); else return (diagram2[i].first > diagram2[j].first); } ); + + // Find the inverses of the orders. + std::vector<int> order1(num_pts_dgm); std::vector<int> order2(num_pts_dgm); + for(int i = 0; i < num_pts_dgm; i++) for (int j = 0; j < num_pts_dgm; j++) if(orderp1[j] == i){ order1[i] = j; break; } + for(int i = 0; i < num_pts_dgm; i++) for (int j = 0; j < num_pts_dgm; j++) if(orderp2[j] == i){ order2[i] = j; break; } + + // Record all inversions of points in the orders as theta varies along the positive half-disk. + std::vector<std::vector<std::pair<int,double> > > anglePerm1(num_pts_dgm); + std::vector<std::vector<std::pair<int,double> > > anglePerm2(num_pts_dgm); + + int m1 = angles1.size(); + for (int i = 0; i < m1; i++){ + double theta = angles1[i].first; int p = angles1[i].second.first; int q = angles1[i].second.second; + anglePerm1[order1[p]].emplace_back(p,theta); + anglePerm1[order1[q]].emplace_back(q,theta); + int a = order1[p]; int b = order1[q]; order1[p] = b; order1[q] = a; + } + + int m2 = angles2.size(); + for (int i = 0; i < m2; i++){ + double theta = angles2[i].first; int p = angles2[i].second.first; int q = angles2[i].second.second; + anglePerm2[order2[p]].emplace_back(p,theta); + anglePerm2[order2[q]].emplace_back(q,theta); + int a = order2[p]; int b = order2[q]; order2[p] = b; order2[q] = a; + } + + for (int i = 0; i < num_pts_dgm; i++){ + anglePerm1[order1[i]].emplace_back(i,pi/2); + anglePerm2[order2[i]].emplace_back(i,pi/2); + } + + // Compute the SW distance with the list of inversions. + for (int i = 0; i < num_pts_dgm; i++){ + std::vector<std::pair<int,double> > u,v; u = anglePerm1[i]; v = anglePerm2[i]; + double theta1, theta2; theta1 = -pi/2; + unsigned int ku, kv; ku = 0; kv = 0; theta2 = std::min(u[ku].second,v[kv].second); + while(theta1 != pi/2){ + if(diagram1[u[ku].first].first != diagram2[v[kv].first].first || diagram1[u[ku].first].second != diagram2[v[kv].first].second) + if(theta1 != theta2) + sw += compute_int(theta1, theta2, u[ku].first, v[kv].first, diagram1, diagram2); + theta1 = theta2; + if ( (theta2 == u[ku].second) && ku < u.size()-1 ) ku++; + if ( (theta2 == v[kv].second) && kv < v.size()-1 ) kv++; + theta2 = std::min(u[ku].second, v[kv].second); + } + } + } + + + else{ + double step = pi/approx; + + // Add projections onto diagonal. + int n1, n2; n1 = diagram1.size(); n2 = diagram2.size(); + for (int i = 0; i < n2; i++) + diagram1.emplace_back( (diagram2[i].first + diagram2[i].second)/2, (diagram2[i].first + diagram2[i].second)/2 ); + for (int i = 0; i < n1; i++) + diagram2.emplace_back( (diagram1[i].first + diagram1[i].second)/2, (diagram1[i].first + diagram1[i].second)/2 ); + int n = diagram1.size(); + + // Sort and compare all projections. + #ifdef GUDHI_USE_TBB + tbb::parallel_for(0, approx, [&](int i){ + std::vector<std::pair<int,double> > l1, l2; + for (int j = 0; j < n; j++){ + l1.emplace_back( j, diagram1[j].first*cos(-pi/2+i*step) + diagram1[j].second*sin(-pi/2+i*step) ); + l2.emplace_back( j, diagram2[j].first*cos(-pi/2+i*step) + diagram2[j].second*sin(-pi/2+i*step) ); + } + std::sort(l1.begin(),l1.end(), [=](const std::pair<int,double> & p1, const std::pair<int,double> & p2){return p1.second < p2.second;}); + std::sort(l2.begin(),l2.end(), [=](const std::pair<int,double> & p1, const std::pair<int,double> & p2){return p1.second < p2.second;}); + double f = 0; for (int j = 0; j < n; j++) f += std::abs(l1[j].second - l2[j].second); + sw += f*step; + }); + #else + for (int i = 0; i < approx; i++){ + std::vector<std::pair<int,double> > l1, l2; + for (int j = 0; j < n; j++){ + l1.emplace_back( j, diagram1[j].first*cos(-pi/2+i*step) + diagram1[j].second*sin(-pi/2+i*step) ); + l2.emplace_back( j, diagram2[j].first*cos(-pi/2+i*step) + diagram2[j].second*sin(-pi/2+i*step) ); + } + std::sort(l1.begin(),l1.end(), [=](const std::pair<int,double> & p1, const std::pair<int,double> & p2){return p1.second < p2.second;}); + std::sort(l2.begin(),l2.end(), [=](const std::pair<int,double> & p1, const std::pair<int,double> & p2){return p1.second < p2.second;}); + double f = 0; for (int j = 0; j < n; j++) f += std::abs(l1[j].second - l2[j].second); + sw += f*step; + } + #endif + } + + return sw/pi; + } + + + double compute_scalar_product(Sliced_Wasserstein second, double sigma, int approx = 100) { + return std::exp(-compute_sliced_wasserstein_distance(second, approx)/(2*sigma*sigma)); + } + + double distance(Sliced_Wasserstein second, double sigma, int approx = 100, double power = 1) { + return std::pow(this->compute_scalar_product(*this, sigma, approx) + second.compute_scalar_product(second, sigma, approx)-2*this->compute_scalar_product(second, sigma, approx), power/2.0); + } + + +}; + +} // namespace Sliced_Wasserstein +} // namespace Gudhi + +#endif // SLICED_WASSERSTEIN_H_ diff --git a/src/cmake/modules/GUDHI_modules.cmake b/src/cmake/modules/GUDHI_modules.cmake index f95d0c34..205ee8a1 100644 --- a/src/cmake/modules/GUDHI_modules.cmake +++ b/src/cmake/modules/GUDHI_modules.cmake @@ -16,8 +16,8 @@ function(add_gudhi_module file_path) endfunction(add_gudhi_module) -option(WITH_GUDHI_BENCHMARK "Activate/desactivate benchmark compilation" OFF) -option(WITH_GUDHI_EXAMPLE "Activate/desactivate examples compilation and installation" OFF) +option(WITH_GUDHI_BENCHMARK "Activate/desactivate benchmark compilation" ON) +option(WITH_GUDHI_EXAMPLE "Activate/desactivate examples compilation and installation" ON) option(WITH_GUDHI_PYTHON "Activate/desactivate python module compilation and installation" ON) option(WITH_GUDHI_TEST "Activate/desactivate examples compilation and installation" ON) option(WITH_GUDHI_UTILITIES "Activate/desactivate utilities compilation and installation" ON) |