From 4e80b66cf5d4e6121149a12f3137e372e04d8588 Mon Sep 17 00:00:00 2001 From: mcarrier Date: Thu, 29 Mar 2018 15:27:31 +0000 Subject: added doc + cython git-svn-id: svn+ssh://scm.gforge.inria.fr/svnroot/gudhi/branches/kernels@3319 636b058d-ea47-450e-bf9e-a15bfbe3eedb Former-commit-id: cffc2e28ebf6fae46246c5abaac52b7328adf490 --- .../doc/Persistence_representations_doc.h | 62 +++++++++++++++++++ .../example/persistence_weighted_gaussian.cpp | 16 ++--- .../example/sliced_wasserstein.cpp | 12 ++-- .../include/gudhi/Persistence_weighted_gaussian.h | 72 +++++++++++++++++++--- .../include/gudhi/Sliced_Wasserstein.h | 58 +++++++++++++++-- 5 files changed, 194 insertions(+), 26 deletions(-) (limited to 'src/Persistence_representations') diff --git a/src/Persistence_representations/doc/Persistence_representations_doc.h b/src/Persistence_representations/doc/Persistence_representations_doc.h index 38bd3a21..6d4cc96c 100644 --- a/src/Persistence_representations/doc/Persistence_representations_doc.h +++ b/src/Persistence_representations/doc/Persistence_representations_doc.h @@ -250,6 +250,68 @@ namespace Persistence_representations { absolute value of differences between coordinates. A scalar product is a sum of products of values at the corresponding positions of two vectors. + + + + +\section sec_persistence_kernels Kernels on Persistence Diagrams + Reference manual: \ref Gudhi::Persistence_representations::Sliced_Wasserstein
+ Reference manual: \ref Gudhi::Persistence_representations::Persistence_weighted_gaussian
+ + Kernels for Persistence Diagrams can be regarded as infinite-dimensional vectorizations. More specifically, + they are similarity functions whose evaluations on pairs of persistence diagrams equals the scalar products + between images of these pairs under a map \f$\Phi\f$ taking values in a specific (possibly non Euclidean) Hilbert space \f$k(D_i, D_j) = \langle \Phi(D_i),\Phi(D_j)\rangle\f$. + Reciprocally, classical results of learning theory ensure that such a \f$\Phi\f$ exists for a given similarity function \f$k\f$ if and only if \f$k\f$ is positive semi-definite. + Kernels are designed for algorithms that can be kernelized, i.e., algorithms that only require to know scalar products between instances in order to run. + Examples of such algorithms include Support Vector Machines, Principal Component Analysis and Ridge Regression. + + There have been several attempts at defining kernels, i.e., positive semi-definite functions, between persistence diagrams within the last few years. We provide implementation + for three of them: + + \li the Persistence Scale Space Kernel---see \cite Reininghaus_Huber_ALL_PSSK, which is the classical scalar product between \f$L^2\f$ functions, where persistence diagrams + are turned into functions by centering and summing Gaussian functions over the diagram points and their symmetric counterparts w.r.t. the diagonal: \f$k(D_1,D_2)=\int \Phi(D_1)\Phi(D_2)\f$, + where \f$\Phi(D)=\sum_{p\in D} {\rm exp}\left(-\frac{\|p-\cdot\|_2^2}{2\sigma^2}\right)\f$. + + \li the Persistence Weighted Gaussian Kernel---see \cite Kusano_Fukumizu_Hiraoka_PWGK, which is a slight generalization of the previous kernel, is the scalar product between + weighted Kernel Mean Embeddings of persistence diagrams w.r.t. the Gaussian Kernel \f$k_G\f$ (with corresponding map \f$\Phi_G\f$) in \f$\mathbb{R}^2\f$: + \f$k(D_1,D_2)=\langle\sum_{p\in D_1} w(p)\Phi_G(p), \sum_{q\in D_2} w(q)\Phi_G(q)\rangle\f$ + + \li the Sliced Wasserstein Kernel---see \cite pmlr-v70-carriere17a, which takes the form of a Gaussian kernel with a specific distance between persistence diagrams + called the Sliced Wasserstein Distance: \f$k(D_1,D_2)={\rm exp}\left(-\frac{SW(D_1,D_2)}{2\sigma^2}\right)\f$ + + When launching: + + \code $> ./Sliced_Wasserstein + \endcode + + the program output is: + + \code $> Approx SW distance: 5.33648 + $> Exact SW distance: 5.33798 + $> Approx SW kernel: 0.0693743 + $> Exact SW kernel: 0.0693224 + $> Distance induced by approx SW kernel: 1.36428 + $> Distance induced by exact SW kernel: 1.3643 + \endcode + + + and when launching: + + \code $> ./Persistence_weighted_gaussian + \endcode + + the program output is: + + \code $> Approx PWG kernel: 1.21509 + $> Exact PWG kernel: 1.13628 + $> Distance induced by approx PWG kernel: 3.23354 + $> Distance induced by exact PWG kernel: 3.25697 + $> Approx Gaussian PWG kernel: 0.0194222 + $> Exact Gaussian PWG kernel: 0.0192524 + $> Approx PSS kernel: 0.134413 + $> Exact PSS kernel: 0.133394 + \endcode + */ /** @} */ // end defgroup Persistence_representations diff --git a/src/Persistence_representations/example/persistence_weighted_gaussian.cpp b/src/Persistence_representations/example/persistence_weighted_gaussian.cpp index a0e820ea..d447f165 100644 --- a/src/Persistence_representations/example/persistence_weighted_gaussian.cpp +++ b/src/Persistence_representations/example/persistence_weighted_gaussian.cpp @@ -57,11 +57,11 @@ int main(int argc, char** argv) { // Linear PWG - std::cout << PWG1.compute_scalar_product (PWG2) << std::endl; - std::cout << PWGex1.compute_scalar_product (PWGex2) << std::endl; + std::cout << "Approx PWG kernel: " << PWG1.compute_scalar_product (PWG2) << std::endl; + std::cout << "Exact PWG kernel: " << PWGex1.compute_scalar_product (PWGex2) << std::endl; - std::cout << PWG1.distance (PWG2) << std::endl; - std::cout << PWGex1.distance (PWGex2) << std::endl; + std::cout << "Distance induced by approx PWG kernel: " << PWG1.distance (PWG2) << std::endl; + std::cout << "Distance induced by exact PWG kernel: " << PWGex1.distance (PWGex2) << std::endl; @@ -71,8 +71,8 @@ int main(int argc, char** argv) { // Gaussian PWG - std::cout << std::exp( -PWG1.distance (PWG2, 2) ) / (2*tau*tau) << std::endl; - std::cout << std::exp( -PWGex1.distance (PWGex2, 2) ) / (2*tau*tau) << std::endl; + std::cout << "Approx Gaussian PWG kernel: " << std::exp( -PWG1.distance (PWG2) ) / (2*tau*tau) << std::endl; + std::cout << "Exact Gaussian PWG kernel: " << std::exp( -PWGex1.distance (PWGex2) ) / (2*tau*tau) << std::endl; @@ -91,8 +91,8 @@ int main(int argc, char** argv) { PWG pwgex1(pd1, 2*std::sqrt(sigma), -1, PWG::pss_weight); PWG pwgex2(pd2, 2*std::sqrt(sigma), -1, PWG::pss_weight); - std::cout << pwg1.compute_scalar_product (pwg2) / (16*pi*sigma) << std::endl; - std::cout << pwgex1.compute_scalar_product (pwgex2) / (16*pi*sigma) << std::endl; + std::cout << "Approx PSS kernel: " << pwg1.compute_scalar_product (pwg2) / (16*pi*sigma) << std::endl; + std::cout << "Exact PSS kernel: " << pwgex1.compute_scalar_product (pwgex2) / (16*pi*sigma) << std::endl; diff --git a/src/Persistence_representations/example/sliced_wasserstein.cpp b/src/Persistence_representations/example/sliced_wasserstein.cpp index 2470029b..f1aeea5c 100644 --- a/src/Persistence_representations/example/sliced_wasserstein.cpp +++ b/src/Persistence_representations/example/sliced_wasserstein.cpp @@ -32,8 +32,6 @@ int main(int argc, char** argv) { std::vector > persistence1; std::vector > persistence2; - std::vector > > set1; - std::vector > > set2; persistence1.push_back(std::make_pair(1, 2)); persistence1.push_back(std::make_pair(6, 8)); @@ -52,10 +50,12 @@ int main(int argc, char** argv) { SW swex1(persistence1, 1, -1); SW swex2(persistence2, 1, -1); - std::cout << sw1.compute_sliced_wasserstein_distance(sw2) << std::endl; - std::cout << swex1.compute_sliced_wasserstein_distance(swex2) << std::endl; - std::cout << sw1.compute_scalar_product(sw2) << std::endl; - std::cout << swex1.distance(swex2) << std::endl; + std::cout << "Approx SW distance: " << sw1.compute_sliced_wasserstein_distance(sw2) << std::endl; + std::cout << "Exact SW distance: " << swex1.compute_sliced_wasserstein_distance(swex2) << std::endl; + std::cout << "Approx SW kernel: " << sw1.compute_scalar_product(sw2) << std::endl; + std::cout << "Exact SW kernel: " << swex1.compute_scalar_product(swex2) << std::endl; + std::cout << "Distance induced by approx SW kernel: " << sw1.distance(sw2) << std::endl; + std::cout << "Distance induced by exact SW kernel: " << swex1.distance(swex2) << std::endl; return 0; } diff --git a/src/Persistence_representations/include/gudhi/Persistence_weighted_gaussian.h b/src/Persistence_representations/include/gudhi/Persistence_weighted_gaussian.h index a6efa72d..f824225a 100644 --- a/src/Persistence_representations/include/gudhi/Persistence_weighted_gaussian.h +++ b/src/Persistence_representations/include/gudhi/Persistence_weighted_gaussian.h @@ -45,7 +45,40 @@ using Weight = std::function) >; namespace Gudhi { namespace Persistence_representations { - +/** + * \class Persistence_weighted_gaussian gudhi/Persistence_weighted_gaussian.h + * \brief A class implementing the Persistence Weighted Gaussian Kernel and a specific case of it called the Persistence Scale Space Kernel. + * + * \ingroup Persistence_representations + * + * \details + * The Persistence Weighted Gaussian Kernel is built with Gaussian Kernel Mean Embedding, meaning that each persistence diagram is first + * sent to the Hilbert space of a Gaussian kernel with bandwidth parameter \f$\sigma >0\f$ using a weighted mean embedding \f$\Phi\f$: + * + * \f$ \Phi\,:\,D\,\rightarrow\,\sum_{p\in D}\,w(p)\,{\rm exp}\left(-\frac{\|p-\cdot\|_2^2}{2\sigma^2}\right) \f$, + * + * Usually, the weight function is chosen to be an arctan function of the distance of the point to the diagonal: + * \f$w(p) = {\rm arctan}(C\,|y-x|^\alpha)\f$, for some parameters \f$C,\alpha >0\f$. + * Then, their scalar product in this space is computed: + * + * \f$ k(D_1,D_2)=\langle\Phi(D_1),\Phi(D_2)\rangle + * \,=\,\sum_{p\in D_1}\,\sum_{q\in D_2}\,w(p)\,w(q)\,{\rm exp}\left(-\frac{\|p-q\|_2^2}{2\sigma^2}\right).\f$ + * + * Note that one may apply a second Gaussian kernel to their distance in this space and still get a kernel. + * + * It follows that the computation time is \f$O(n^2)\f$ where \f$n\f$ is the number of points + * in the diagrams. This time can be improved by computing approximations of the kernel + * with \f$m\f$ Fourier features \cite Rahimi07randomfeatures. In that case, the computation time becomes \f$O(mn)\f$. + * + * The Persistence Scale Space Kernel is a Persistence Weighted Gaussian Kernel between modified diagrams: + * the symmetric of each point with respect to the diagonal is first added in each diagram, and then the weight function + * is set to be +1 if the point is above the diagonal and -1 otherwise. + * + * For more details, please consult Persistence Weighted Kernel for Topological Data Analysis\cite Kusano_Fukumizu_Hiraoka_PWGK + * and A Stable Multi-Scale Kernel for Topological Machine Learning\cite Reininghaus_Huber_ALL_PSSK . + * It implements the following concepts: Topological_data_with_distances, Topological_data_with_scalar_product. + * +**/ class Persistence_weighted_gaussian{ protected: @@ -56,8 +89,17 @@ class Persistence_weighted_gaussian{ public: - Persistence_weighted_gaussian(PD _diagram){diagram = _diagram; sigma = 1.0; approx = 1000; weight = arctan_weight;} - Persistence_weighted_gaussian(PD _diagram, double _sigma, int _approx, Weight _weight){diagram = _diagram; sigma = _sigma; approx = _approx; weight = _weight;} + /** \brief Persistence Weighted Gaussian Kernel constructor. + * \ingroup Persistence_weighted_gaussian + * + * @param[in] _diagram persistence diagram. + * @param[in] _sigma bandwidth parameter of the Gaussian Kernel used for the Kernel Mean Embedding of the diagrams. + * @param[in] _approx number of random Fourier features in case of approximate computation, set to -1 for exact computation. + * @param[in] _weight weight function for the points in the diagrams. + * + */ + Persistence_weighted_gaussian(PD _diagram, double _sigma = 1.0, int _approx = 1000, Weight _weight = arctan_weight){diagram = _diagram; sigma = _sigma; approx = _approx; weight = _weight;} + PD get_diagram(){return this->diagram;} double get_sigma(){return this->sigma;} int get_approx(){return this->approx;} @@ -68,7 +110,12 @@ class Persistence_weighted_gaussian{ // Utils. // ********************************** - + /** \brief Specific weight of Persistence Scale Space Kernel. + * \ingroup Persistence_weighted_gaussian + * + * @param[in] p point in 2D. + * + */ static double pss_weight(std::pair p){ if(p.second > p.first) return 1; else return -1; @@ -108,7 +155,12 @@ class Persistence_weighted_gaussian{ // Scalar product + distance. // ********************************** - + /** \brief Evaluation of the kernel on a pair of diagrams. + * \ingroup Persistence_weighted_gaussian + * + * @param[in] second other instance of class Persistence_weighted_gaussian. Warning: sigma, approx and weight parameters need to be the same for both instances!!! + * + */ double compute_scalar_product(Persistence_weighted_gaussian second){ PD diagram1 = this->diagram; PD diagram2 = second.diagram; @@ -131,11 +183,17 @@ class Persistence_weighted_gaussian{ } } - double distance(Persistence_weighted_gaussian second, double power = 1) { + /** \brief Evaluation of the distance between images of diagrams in the Hilbert space of the kernel. + * \ingroup Persistence_weighted_gaussian + * + * @param[in] second other instance of class Persistence_weighted_gaussian. Warning: sigma, approx and weight parameters need to be the same for both instances!!! + * + */ + double distance(Persistence_weighted_gaussian second) { if(this->sigma != second.get_sigma() || this->approx != second.get_approx()){ std::cout << "Error: different representations!" << std::endl; return 0; } - else return std::pow(this->compute_scalar_product(*this) + second.compute_scalar_product(second)-2*this->compute_scalar_product(second), power/2.0); + else return std::pow(this->compute_scalar_product(*this) + second.compute_scalar_product(second)-2*this->compute_scalar_product(second), 0.5); } diff --git a/src/Persistence_representations/include/gudhi/Sliced_Wasserstein.h b/src/Persistence_representations/include/gudhi/Sliced_Wasserstein.h index f2ec56b7..bfb77384 100644 --- a/src/Persistence_representations/include/gudhi/Sliced_Wasserstein.h +++ b/src/Persistence_representations/include/gudhi/Sliced_Wasserstein.h @@ -45,6 +45,30 @@ using PD = std::vector >; namespace Gudhi { namespace Persistence_representations { +/** + * \class Sliced_Wasserstein gudhi/Sliced_Wasserstein.h + * \brief A class implementing the Sliced Wasserstein Kernel. + * + * \ingroup Persistence_representations + * + * \details + * The Sliced Wasserstein Kernel is defined as a Gaussian-like Kernel between persistence diagrams, where the distance used for + * comparison is the Sliced Wasserstein distance \f$SW\f$ between persistence diagrams, defined as the integral of the 1-norm + * between the sorted projections of the diagrams onto all lines passing through the origin: + * + * \f$ SW(D_1,D_2)=\int_{\theta\in\mathbb{S}}\,\|\pi_\theta(D_1\cup\pi_\Delta(D_2))-\pi_\theta(D_2\cup\pi_\Delta(D_1))\|_1{\rm d}\theta\f$, + * + * where \f$\pi_\theta\f$ is the projection onto the line defined with angle \f$\theta\f$ in the unit circle \f$\mathbb{S}\f$, + * and \f$\pi_\Delta\f$ is the projection onto the diagonal. + * The integral can be either computed exactly in \f$O(n^2{\rm log}(n))\f$ time, where \f$n\f$ is the number of points + * in the diagrams, or approximated by sampling \f$N\f$ lines in the circle in \f$O(Nn{\rm log}(n))\f$ time. The Sliced Wasserstein Kernel is then computed as: + * + * \f$ k(D_1,D_2) = {\rm exp}\left(-\frac{SW(D_1,D_2)}{2\sigma^2}\right).\f$ + * + * For more details, please consult Sliced Wasserstein Kernel for Persistence Diagrams\cite pmlr-v70-carriere17a . + * It implements the following concepts: Topological_data_with_distances, Topological_data_with_scalar_product. + * +**/ class Sliced_Wasserstein { protected: @@ -83,8 +107,15 @@ class Sliced_Wasserstein { } - Sliced_Wasserstein(PD _diagram){diagram = _diagram; approx = 100; sigma = 0.001; build_rep();} - Sliced_Wasserstein(PD _diagram, double _sigma, int _approx){diagram = _diagram; approx = _approx; sigma = _sigma; build_rep();} + /** \brief Sliced Wasserstein Kernel constructor. + * \ingroup Sliced_Wasserstein + * + * @param[in] _diagram persistence diagram. + * @param[in] _sigma bandwidth parameter. + * @param[in] _approx number of directions used to approximate the integral in the Sliced Wasserstein distance, set to -1 for exact computation. + * + */ + Sliced_Wasserstein(PD _diagram, double _sigma = 1.0, int _approx = 100){diagram = _diagram; approx = _approx; sigma = _sigma; build_rep();} PD get_diagram(){return this->diagram;} int get_approx(){return this->approx;} @@ -163,6 +194,12 @@ class Sliced_Wasserstein { // Scalar product + distance. // ********************************** + /** \brief Evaluation of the Sliced Wasserstein Distance between a pair of diagrams. + * \ingroup Sliced_Wasserstein + * + * @param[in] second other instance of class Sliced_Wasserstein. Warning: approx parameter needs to be the same for both instances!!! + * + */ double compute_sliced_wasserstein_distance(Sliced_Wasserstein second) { PD diagram1 = this->diagram; PD diagram2 = second.diagram; double sw = 0; @@ -277,14 +314,25 @@ class Sliced_Wasserstein { return sw/pi; } - + /** \brief Evaluation of the kernel on a pair of diagrams. + * \ingroup Sliced_Wasserstein + * + * @param[in] second other instance of class Sliced_Wasserstein. Warning: sigma and approx parameters need to be the same for both instances!!! + * + */ double compute_scalar_product(Sliced_Wasserstein second){ return std::exp(-compute_sliced_wasserstein_distance(second)/(2*this->sigma*this->sigma)); } - double distance(Sliced_Wasserstein second, double power = 1) { + /** \brief Evaluation of the distance between images of diagrams in the Hilbert space of the kernel. + * \ingroup Sliced_Wasserstein + * + * @param[in] second other instance of class Sliced_Wasserstein. Warning: sigma and approx parameters need to be the same for both instances!!! + * + */ + double distance(Sliced_Wasserstein second) { if(this->sigma != second.sigma || this->approx != second.approx){std::cout << "Error: different representations!" << std::endl; return 0;} - else return std::pow(this->compute_scalar_product(*this) + second.compute_scalar_product(second)-2*this->compute_scalar_product(second), power/2.0); + else return std::pow(this->compute_scalar_product(*this) + second.compute_scalar_product(second)-2*this->compute_scalar_product(second), 0.5); } -- cgit v1.2.3