From 4e80b66cf5d4e6121149a12f3137e372e04d8588 Mon Sep 17 00:00:00 2001
From: mcarrier <mcarrier@636b058d-ea47-450e-bf9e-a15bfbe3eedb>
Date: Thu, 29 Mar 2018 15:27:31 +0000
Subject: added doc + cython

git-svn-id: svn+ssh://scm.gforge.inria.fr/svnroot/gudhi/branches/kernels@3319 636b058d-ea47-450e-bf9e-a15bfbe3eedb

Former-commit-id: cffc2e28ebf6fae46246c5abaac52b7328adf490
---
 .../doc/Persistence_representations_doc.h          | 62 +++++++++++++++++++
 .../example/persistence_weighted_gaussian.cpp      | 16 ++---
 .../example/sliced_wasserstein.cpp                 | 12 ++--
 .../include/gudhi/Persistence_weighted_gaussian.h  | 72 +++++++++++++++++++---
 .../include/gudhi/Sliced_Wasserstein.h             | 58 +++++++++++++++--
 5 files changed, 194 insertions(+), 26 deletions(-)

(limited to 'src/Persistence_representations')
diff --git a/src/Persistence_representations/doc/Persistence_representations_doc.h b/src/Persistence_representations/doc/Persistence_representations_doc.h
index 38bd3a21..6d4cc96c 100644
--- a/src/Persistence_representations/doc/Persistence_representations_doc.h
+++ b/src/Persistence_representations/doc/Persistence_representations_doc.h
@@ -250,6 +250,68 @@ namespace Persistence_representations {
  absolute value of differences between coordinates. A scalar product is a sum of products of
  values at the corresponding positions of two vectors.
 
+
+
+
+
+\section sec_persistence_kernels Kernels on Persistence Diagrams
+ <b>Reference manual:</b> \ref Gudhi::Persistence_representations::Sliced_Wasserstein <br>
+ <b>Reference manual:</b> \ref Gudhi::Persistence_representations::Persistence_weighted_gaussian <br>
+
+ Kernels for Persistence Diagrams can be regarded as infinite-dimensional vectorizations. More specifically,
+ they are similarity functions whose evaluations on pairs of persistence diagrams equals the scalar products
+ between images of these pairs under a map \f$\Phi\f$ taking values in a specific (possibly non Euclidean) Hilbert space \f$k(D_i, D_j) = \langle \Phi(D_i),\Phi(D_j)\rangle\f$.
+ Reciprocally, classical results of learning theory ensure that such a \f$\Phi\f$ exists for a given similarity function \f$k\f$ if and only if \f$k\f$ is <i>positive semi-definite</i>.
+ Kernels are designed for algorithms that can be <i>kernelized</i>, i.e., algorithms that only require to know scalar products between instances in order to run.
+ Examples of such algorithms include Support Vector Machines, Principal Component Analysis and Ridge Regression. 
+
+ There have been several attempts at defining kernels, i.e., positive semi-definite functions, between persistence diagrams within the last few years. We provide implementation
+ for three of them:
+
+ \li the <i>Persistence Scale Space Kernel</i>---see \cite Reininghaus_Huber_ALL_PSSK, which is the classical scalar product between \f$L^2\f$ functions, where persistence diagrams
+ are turned into functions by centering and summing Gaussian functions over the diagram points and their symmetric counterparts w.r.t. the diagonal: \f$k(D_1,D_2)=\int \Phi(D_1)\Phi(D_2)\f$,
+ where \f$\Phi(D)=\sum_{p\in D} {\rm exp}\left(-\frac{\|p-\cdot\|_2^2}{2\sigma^2}\right)\f$. 
+ 
+ \li the <i>Persistence Weighted Gaussian Kernel</i>---see \cite Kusano_Fukumizu_Hiraoka_PWGK, which is a slight generalization of the previous kernel, is the scalar product between
+ weighted Kernel Mean Embeddings of persistence diagrams w.r.t. the Gaussian Kernel \f$k_G\f$ (with corresponding map \f$\Phi_G\f$) in \f$\mathbb{R}^2\f$: 
+ \f$k(D_1,D_2)=\langle\sum_{p\in D_1} w(p)\Phi_G(p), \sum_{q\in D_2} w(q)\Phi_G(q)\rangle\f$
+ 
+ \li the  <i>Sliced Wasserstein Kernel</i>---see \cite pmlr-v70-carriere17a, which takes the form of a Gaussian kernel with a specific distance between persistence diagrams
+     called the <i>Sliced Wasserstein Distance</i>: \f$k(D_1,D_2)={\rm exp}\left(-\frac{SW(D_1,D_2)}{2\sigma^2}\right)\f$
+
+ When launching:
+
+ \code $> ./Sliced_Wasserstein
+ \endcode
+ 
+ the program output is:
+ 
+ \code $> Approx SW distance: 5.33648
+ $> Exact SW distance: 5.33798
+ $> Approx SW kernel: 0.0693743
+ $> Exact  SW kernel: 0.0693224
+ $> Distance induced by approx SW kernel: 1.36428
+ $> Distance induced by exact  SW kernel: 1.3643 
+ \endcode
+
+
+ and when launching:
+
+ \code $> ./Persistence_weighted_gaussian
+ \endcode
+ 
+ the program output is:
+ 
+ \code $> Approx PWG kernel: 1.21509
+ $> Exact  PWG kernel: 1.13628
+ $> Distance induced by approx PWG kernel: 3.23354
+ $> Distance induced by exact  PWG kernel: 3.25697
+ $> Approx Gaussian PWG kernel: 0.0194222
+ $> Exact  Gaussian PWG kernel: 0.0192524
+ $> Approx PSS kernel: 0.134413
+ $> Exact  PSS kernel: 0.133394
+ \endcode
+
  */
 /** @} */  // end defgroup Persistence_representations
 
diff --git a/src/Persistence_representations/example/persistence_weighted_gaussian.cpp b/src/Persistence_representations/example/persistence_weighted_gaussian.cpp
index a0e820ea..d447f165 100644
--- a/src/Persistence_representations/example/persistence_weighted_gaussian.cpp
+++ b/src/Persistence_representations/example/persistence_weighted_gaussian.cpp
@@ -57,11 +57,11 @@ int main(int argc, char** argv) {
 
   // Linear PWG
 
-  std::cout << PWG1.compute_scalar_product  (PWG2) << std::endl;
-  std::cout << PWGex1.compute_scalar_product  (PWGex2) << std::endl;
+  std::cout << "Approx PWG kernel: " << PWG1.compute_scalar_product  (PWG2) << std::endl;
+  std::cout << "Exact  PWG kernel: " << PWGex1.compute_scalar_product  (PWGex2) << std::endl;
 
-  std::cout << PWG1.distance  (PWG2) << std::endl;
-  std::cout << PWGex1.distance  (PWGex2) << std::endl;
+  std::cout << "Distance induced by approx PWG kernel: " << PWG1.distance  (PWG2) << std::endl;
+  std::cout << "Distance induced by exact  PWG kernel: " << PWGex1.distance  (PWGex2) << std::endl;
 
 
 
@@ -71,8 +71,8 @@ int main(int argc, char** argv) {
 
   // Gaussian PWG
 
-  std::cout << std::exp( -PWG1.distance (PWG2, 2) ) / (2*tau*tau) << std::endl;
-  std::cout << std::exp( -PWGex1.distance (PWGex2, 2) ) / (2*tau*tau) << std::endl;
+  std::cout << "Approx Gaussian PWG kernel: " << std::exp( -PWG1.distance (PWG2) ) / (2*tau*tau) << std::endl;
+  std::cout << "Exact  Gaussian PWG kernel: " << std::exp( -PWGex1.distance (PWGex2) ) / (2*tau*tau) << std::endl;
 
 
 
@@ -91,8 +91,8 @@ int main(int argc, char** argv) {
   PWG pwgex1(pd1, 2*std::sqrt(sigma), -1, PWG::pss_weight);
   PWG pwgex2(pd2, 2*std::sqrt(sigma), -1, PWG::pss_weight);
 
-  std::cout << pwg1.compute_scalar_product  (pwg2) / (16*pi*sigma) << std::endl;
-  std::cout << pwgex1.compute_scalar_product  (pwgex2) / (16*pi*sigma) << std::endl;
+  std::cout << "Approx PSS kernel: " << pwg1.compute_scalar_product  (pwg2) / (16*pi*sigma) << std::endl;
+  std::cout << "Exact  PSS kernel: " << pwgex1.compute_scalar_product  (pwgex2) / (16*pi*sigma) << std::endl;
 
 
 
diff --git a/src/Persistence_representations/example/sliced_wasserstein.cpp b/src/Persistence_representations/example/sliced_wasserstein.cpp
index 2470029b..f1aeea5c 100644
--- a/src/Persistence_representations/example/sliced_wasserstein.cpp
+++ b/src/Persistence_representations/example/sliced_wasserstein.cpp
@@ -32,8 +32,6 @@ int main(int argc, char** argv) {
 
   std::vector<std::pair<double, double> > persistence1;
   std::vector<std::pair<double, double> > persistence2;
-  std::vector<std::vector<std::pair<double, double> > > set1;
-  std::vector<std::vector<std::pair<double, double> > > set2;
 
   persistence1.push_back(std::make_pair(1, 2));
   persistence1.push_back(std::make_pair(6, 8));
@@ -52,10 +50,12 @@ int main(int argc, char** argv) {
   SW swex1(persistence1, 1, -1);
   SW swex2(persistence2, 1, -1);
 
-  std::cout << sw1.compute_sliced_wasserstein_distance(sw2) << std::endl;
-  std::cout << swex1.compute_sliced_wasserstein_distance(swex2) << std::endl;
-  std::cout << sw1.compute_scalar_product(sw2) << std::endl;
-  std::cout << swex1.distance(swex2) << std::endl;
+  std::cout << "Approx SW distance: " << sw1.compute_sliced_wasserstein_distance(sw2) << std::endl;
+  std::cout << "Exact SW distance: " << swex1.compute_sliced_wasserstein_distance(swex2) << std::endl;
+  std::cout << "Approx SW kernel: " << sw1.compute_scalar_product(sw2) << std::endl;
+  std::cout << "Exact  SW kernel: " << swex1.compute_scalar_product(swex2) << std::endl;
+  std::cout << "Distance induced by approx SW kernel: " << sw1.distance(sw2) << std::endl;
+  std::cout << "Distance induced by exact  SW kernel: " << swex1.distance(swex2) << std::endl;
 
   return 0;
 }
diff --git a/src/Persistence_representations/include/gudhi/Persistence_weighted_gaussian.h b/src/Persistence_representations/include/gudhi/Persistence_weighted_gaussian.h
index a6efa72d..f824225a 100644
--- a/src/Persistence_representations/include/gudhi/Persistence_weighted_gaussian.h
+++ b/src/Persistence_representations/include/gudhi/Persistence_weighted_gaussian.h
@@ -45,7 +45,40 @@ using Weight = std::function<double (std::pair<double,double>) >;
 
 namespace Gudhi {
 namespace Persistence_representations {
-
+/**
+ * \class Persistence_weighted_gaussian gudhi/Persistence_weighted_gaussian.h
+ * \brief A class implementing the Persistence Weighted Gaussian Kernel and a specific case of it called the Persistence Scale Space Kernel.
+ *
+ * \ingroup Persistence_representations
+ *
+ * \details
+ * The Persistence Weighted Gaussian Kernel is built with Gaussian Kernel Mean Embedding, meaning that each persistence diagram is first
+ * sent to the Hilbert space of a Gaussian kernel with bandwidth parameter \f$\sigma >0\f$ using a weighted mean embedding \f$\Phi\f$:
+ *
+ * \f$ \Phi\,:\,D\,\rightarrow\,\sum_{p\in D}\,w(p)\,{\rm exp}\left(-\frac{\|p-\cdot\|_2^2}{2\sigma^2}\right) \f$,
+ *
+ * Usually, the weight function is chosen to be an arctan function of the distance of the point to the diagonal:
+ * \f$w(p) = {\rm arctan}(C\,|y-x|^\alpha)\f$, for some parameters \f$C,\alpha >0\f$.
+ * Then, their scalar product in this space is computed:
+ *
+ * \f$ k(D_1,D_2)=\langle\Phi(D_1),\Phi(D_2)\rangle
+ * \,=\,\sum_{p\in D_1}\,\sum_{q\in D_2}\,w(p)\,w(q)\,{\rm exp}\left(-\frac{\|p-q\|_2^2}{2\sigma^2}\right).\f$
+ *
+ * Note that one may apply a second Gaussian kernel to their distance in this space and still get a kernel.
+ *
+ * It follows that the computation time is \f$O(n^2)\f$ where \f$n\f$ is the number of points
+ * in the diagrams. This time can be improved by computing approximations of the kernel
+ * with \f$m\f$ Fourier features \cite Rahimi07randomfeatures. In that case, the computation time becomes \f$O(mn)\f$.
+ *
+ * The Persistence Scale Space Kernel is a Persistence Weighted Gaussian Kernel between modified diagrams:
+ * the symmetric of each point with respect to the diagonal is first added in each diagram, and then the weight function
+ * is set to be +1 if the point is above the diagonal and -1 otherwise.
+ * 
+ * For more details, please consult <i>Persistence Weighted Kernel for Topological Data Analysis</i>\cite Kusano_Fukumizu_Hiraoka_PWGK 
+ * and <i>A Stable Multi-Scale Kernel for Topological Machine Learning</i>\cite Reininghaus_Huber_ALL_PSSK . 
+ * It implements the following concepts: Topological_data_with_distances, Topological_data_with_scalar_product.
+ *
+**/
 class Persistence_weighted_gaussian{
 
  protected:
@@ -56,8 +89,17 @@ class Persistence_weighted_gaussian{
 
  public:
 
-  Persistence_weighted_gaussian(PD _diagram){diagram = _diagram; sigma = 1.0; approx = 1000; weight = arctan_weight;}
-  Persistence_weighted_gaussian(PD _diagram, double _sigma, int _approx, Weight _weight){diagram = _diagram; sigma = _sigma; approx = _approx; weight = _weight;}
+  /** \brief Persistence Weighted Gaussian Kernel constructor.
+   * \ingroup Persistence_weighted_gaussian
+   *
+   * @param[in] _diagram       persistence diagram.
+   * @param[in] _sigma         bandwidth parameter of the Gaussian Kernel used for the Kernel Mean Embedding of the diagrams.
+   * @param[in] _approx        number of random Fourier features in case of approximate computation, set to -1 for exact computation.
+   * @param[in] _weight        weight function for the points in the diagrams.
+   *
+   */
+  Persistence_weighted_gaussian(PD _diagram, double _sigma = 1.0, int _approx = 1000, Weight _weight = arctan_weight){diagram = _diagram; sigma = _sigma; approx = _approx; weight = _weight;}
+  
   PD get_diagram(){return this->diagram;}
   double get_sigma(){return this->sigma;}
   int get_approx(){return this->approx;}
@@ -68,7 +110,12 @@ class Persistence_weighted_gaussian{
   // Utils.
   // **********************************
 
-
+  /** \brief Specific weight of Persistence Scale Space Kernel.
+   * \ingroup Persistence_weighted_gaussian
+   *
+   * @param[in] p point in 2D.
+   *
+   */
   static double pss_weight(std::pair<double,double> p){
     if(p.second > p.first)  return 1;
     else return -1;
@@ -108,7 +155,12 @@ class Persistence_weighted_gaussian{
   // Scalar product + distance.
   // **********************************
 
-
+  /** \brief Evaluation of the kernel on a pair of diagrams.
+   * \ingroup Persistence_weighted_gaussian
+   *
+   * @param[in] second other instance of class Persistence_weighted_gaussian. Warning: sigma, approx and weight parameters need to be the same for both instances!!! 
+   *
+   */
   double compute_scalar_product(Persistence_weighted_gaussian second){
 
     PD diagram1 = this->diagram; PD diagram2 = second.diagram;
@@ -131,11 +183,17 @@ class Persistence_weighted_gaussian{
     }
   }
 
-  double distance(Persistence_weighted_gaussian second, double power = 1) {
+  /** \brief Evaluation of the distance between images of diagrams in the Hilbert space of the kernel.
+   * \ingroup Persistence_weighted_gaussian
+   *
+   * @param[in] second other instance of class Persistence_weighted_gaussian. Warning: sigma, approx and weight parameters need to be the same for both instances!!! 
+   *
+   */
+  double distance(Persistence_weighted_gaussian second) {
     if(this->sigma != second.get_sigma() || this->approx != second.get_approx()){
       std::cout << "Error: different representations!" << std::endl; return 0;
     }
-    else return std::pow(this->compute_scalar_product(*this) + second.compute_scalar_product(second)-2*this->compute_scalar_product(second),  power/2.0);
+    else return std::pow(this->compute_scalar_product(*this) + second.compute_scalar_product(second)-2*this->compute_scalar_product(second), 0.5);
   }
 
 
diff --git a/src/Persistence_representations/include/gudhi/Sliced_Wasserstein.h b/src/Persistence_representations/include/gudhi/Sliced_Wasserstein.h
index f2ec56b7..bfb77384 100644
--- a/src/Persistence_representations/include/gudhi/Sliced_Wasserstein.h
+++ b/src/Persistence_representations/include/gudhi/Sliced_Wasserstein.h
@@ -45,6 +45,30 @@ using PD = std::vector<std::pair<double,double> >;
 namespace Gudhi {
 namespace Persistence_representations {
 
+/**
+ * \class Sliced_Wasserstein gudhi/Sliced_Wasserstein.h
+ * \brief A class implementing the Sliced Wasserstein Kernel.
+ *
+ * \ingroup Persistence_representations
+ *
+ * \details
+ * The Sliced Wasserstein Kernel is defined as a Gaussian-like Kernel between persistence diagrams, where the distance used for
+ * comparison is the Sliced Wasserstein distance \f$SW\f$ between persistence diagrams, defined as the integral of the 1-norm
+ * between the sorted projections of the diagrams onto all lines passing through the origin:
+ *
+ * \f$ SW(D_1,D_2)=\int_{\theta\in\mathbb{S}}\,\|\pi_\theta(D_1\cup\pi_\Delta(D_2))-\pi_\theta(D_2\cup\pi_\Delta(D_1))\|_1{\rm d}\theta\f$,
+ *
+ * where \f$\pi_\theta\f$ is the projection onto the line defined with angle \f$\theta\f$ in the unit circle \f$\mathbb{S}\f$,
+ * and \f$\pi_\Delta\f$ is the projection onto the diagonal.
+ * The integral can be either computed exactly in \f$O(n^2{\rm log}(n))\f$ time, where \f$n\f$ is the number of points
+ * in the diagrams, or approximated by sampling \f$N\f$ lines in the circle in \f$O(Nn{\rm log}(n))\f$ time. The Sliced Wasserstein Kernel is then computed as:
+ *
+ * \f$ k(D_1,D_2) = {\rm exp}\left(-\frac{SW(D_1,D_2)}{2\sigma^2}\right).\f$
+ * 
+ * For more details, please consult <i>Sliced Wasserstein Kernel for Persistence Diagrams</i>\cite pmlr-v70-carriere17a .
+ * It implements the following concepts: Topological_data_with_distances, Topological_data_with_scalar_product.
+ *
+**/
 class Sliced_Wasserstein {
 
  protected:
@@ -83,8 +107,15 @@ class Sliced_Wasserstein {
 
   }
 
-  Sliced_Wasserstein(PD _diagram){diagram = _diagram; approx = 100; sigma = 0.001; build_rep();}
-  Sliced_Wasserstein(PD _diagram, double _sigma, int _approx){diagram = _diagram; approx = _approx; sigma = _sigma; build_rep();}
+  /** \brief Sliced Wasserstein Kernel constructor.
+   * \ingroup Sliced_Wasserstein
+   *
+   * @param[in] _diagram  persistence diagram.
+   * @param[in] _sigma  bandwidth parameter.
+   * @param[in] _approx number of directions used to approximate the integral in the Sliced Wasserstein distance, set to -1 for exact computation.
+   *
+   */
+  Sliced_Wasserstein(PD _diagram, double _sigma = 1.0, int _approx = 100){diagram = _diagram; approx = _approx; sigma = _sigma; build_rep();}
   
   PD get_diagram(){return this->diagram;}
   int get_approx(){return this->approx;}
@@ -163,6 +194,12 @@ class Sliced_Wasserstein {
   // Scalar product + distance.
   // **********************************
 
+  /** \brief Evaluation of the Sliced Wasserstein Distance between a pair of diagrams.
+   * \ingroup Sliced_Wasserstein
+   *
+   * @param[in] second other instance of class Sliced_Wasserstein. Warning: approx parameter needs to be the same for both instances!!! 
+   *
+   */
   double compute_sliced_wasserstein_distance(Sliced_Wasserstein second) {
 
       PD diagram1 = this->diagram; PD diagram2 = second.diagram; double sw = 0;
@@ -277,14 +314,25 @@ class Sliced_Wasserstein {
       return sw/pi;
   }
 
-
+  /** \brief Evaluation of the kernel on a pair of diagrams.
+   * \ingroup Sliced_Wasserstein
+   *
+   * @param[in] second other instance of class Sliced_Wasserstein. Warning: sigma and approx parameters need to be the same for both instances!!! 
+   *
+   */
   double compute_scalar_product(Sliced_Wasserstein second){
     return std::exp(-compute_sliced_wasserstein_distance(second)/(2*this->sigma*this->sigma));
   }
 
-  double distance(Sliced_Wasserstein second, double power = 1) {
+  /** \brief Evaluation of the distance between images of diagrams in the Hilbert space of the kernel.
+   * \ingroup Sliced_Wasserstein
+   *
+   * @param[in] second  other instance of class Sliced_Wasserstein. Warning: sigma and approx parameters need to be the same for both instances!!! 
+   *
+   */
+  double distance(Sliced_Wasserstein second) {
     if(this->sigma != second.sigma || this->approx != second.approx){std::cout << "Error: different representations!" << std::endl; return 0;}
-    else return std::pow(this->compute_scalar_product(*this) + second.compute_scalar_product(second)-2*this->compute_scalar_product(second),  power/2.0);
+    else return std::pow(this->compute_scalar_product(*this) + second.compute_scalar_product(second)-2*this->compute_scalar_product(second),  0.5);
   }
 
 
-- 
cgit v1.2.3