git-svn-id: svn+ssh://scm.gforge.inria.fr/svnroot/gudhi/branches/kernels@3106 636b058d-ea47-450e-bf9e-a15bfbe3eedb

Former-commit-id: d4456c36f3ef8cc9e7f516d943472b963b5e9b93
author: mcarrier <mcarrier@636b058d-ea47-450e-bf9e-a15bfbe3eedb> 2017-12-29 23:13:11 +0000
committer: mcarrier <mcarrier@636b058d-ea47-450e-bf9e-a15bfbe3eedb> 2017-12-29 23:13:11 +0000
commit: a065a3b86e33c24250a981f95db1ff46d9075ef5 (patch)
tree: 467ac7e14aaada4e18b3d5bd6b11f95b2cdd62fe /src
parent: 21e6cb713dd3db78e68b4140ab2d69508dad01af (diff)
6 files changed, 245 insertions, 416 deletions
diff --git a/src/Kernels/example/CMakeLists.txt b/src/Kernels/example/CMakeLists.txt
new file mode 100644
index 00000000..57e13004
--- /dev/null
+++ b/src/Kernels/example/CMakeLists.txt
@@ -0,0 +1,11 @@
+cmake_minimum_required(VERSION 2.6)
+project(Kernels_examples)
+
+add_executable ( BasicEx kernel_basic_example.cpp )
+
+if (TBB_FOUND)
+  target_link_libraries(BasicEx ${TBB_LIBRARIES})
+endif()
+
+add_test(NAME Kernels_example_basicex COMMAND $<TARGET_FILE:BasicEx>
+    "")
diff --git a/src/Kernels/example/kernel_basic_example.cpp b/src/Kernels/example/kernel_basic_example.cpp
index 0a8d83b3..8e9925c5 100644
--- a/src/Kernels/example/kernel_basic_example.cpp
+++ b/src/Kernels/example/kernel_basic_example.cpp
@@ -2,9 +2,9 @@
  *    (Geometric Understanding in Higher Dimensions) is a generic C++
  *    library for computational topology.
  *
- *    Authors:       Francois Godi, small modifications by Pawel Dlotko
+ *    Authors:       Mathieu Carrière
  *
- *    Copyright (C) 2015  INRIA
+ *    Copyright (C) 2017  INRIA
  *
  *    This program is free software: you can redistribute it and/or modify
  *    it under the terms of the GNU General Public License as published by
@@ -20,18 +20,15 @@
  *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <gudhi/SW.h>
-//#include <gudhi/PSS.h>
-//#include <gudhi/PWG.h>
-
-#include <iostream>
-#include <vector>
-#include <utility>  // for pair
-#include <limits>  // for numeric_limits
+#define NUMPI 3.14159265359
+#include <gudhi/kernel.h>
 
 int main() {
+
   std::vector< std::pair<double, double> > v1, v2;
 
+  double sigma = 2; double tau = 5;
+
   v1.emplace_back(2.7, 3.7);
   v1.emplace_back(9.6, 14.);
   v1.emplace_back(34.2, 34.974);
@@ -39,11 +36,13 @@ int main() {
   v2.emplace_back(2.8, 4.45);
   v2.emplace_back(9.5, 14.1);
 
-
-  double b1 = Gudhi::sliced_wasserstein::compute_approximate_SW   (v1, v2);
-  double b2 = Gudhi::sliced_wasserstein::compute_exact_SW         (v1, v2);
-
-  std::cout << "Approximate Sliced Wasserstein distance = " << b1 << std::endl;
-  std::cout << "Exact Sliced Wasserstein distance =       " << b2 << std::endl;
+  std::cout << "SW exact = "    << Gudhi::kernel::sw             (v1, v2)          << std::endl;
+  std::cout << "SW approx = "   << Gudhi::kernel::approx_sw      (v1, v2)          << std::endl;
+  std::cout << "PSS exact = "   << Gudhi::kernel::pss            (v1,v2,sigma)     << std::endl;
+  std::cout << "PSS approx = "  << Gudhi::kernel::approx_pss     (v1,v2,sigma)     << std::endl;
+  std::cout << "PWG exact = "   << Gudhi::kernel::lpwg           (v1,v2,sigma)     << std::endl;
+  std::cout << "PWG approx = "  << Gudhi::kernel::approx_lpwg    (v1,v2,sigma)     << std::endl;
+  std::cout << "GPWG exact = "  << Gudhi::kernel::gpwg           (v1,v2,sigma,tau) << std::endl;
+  std::cout << "GPWG approx = " << Gudhi::kernel::approx_gpwg    (v1,v2,sigma,tau) << std::endl;
 
 }
diff --git a/src/Kernels/include/gudhi/PSS.h b/src/Kernels/include/gudhi/PSS.h
deleted file mode 100644
index 5111a09f..00000000
--- a/src/Kernels/include/gudhi/PSS.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*    This file is part of the Gudhi Library. The Gudhi library
- *    (Geometric Understanding in Higher Dimensions) is a generic C++
- *    library for computational topology.
- *
- *    Author(s):       Mathieu Carrière
- *
- *    Copyright (C) 2017  INRIA (France)
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the GNU General Public License as published by
- *    the Free Software Foundation, either version 3 of the License, or
- *    (at your option) any later version.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    GNU General Public License for more details.
- *
- *    You should have received a copy of the GNU General Public License
- *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef PSS_H_
-#define PSS_H_
-
-#define NUMPI 3.14159265359
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <cstdlib>
-#include <cstdio>
-#include <iomanip>
-
-#include <string>
-#include <iostream>
-#include <sstream>
-#include <fstream>
-
-#include <set>
-#include <map>
-#include <vector>
-#include <algorithm>
-#include <limits>
-#include <assert.h>
-
-#include <cmath>
-#include <math.h>
-
-#include <memory>
-#include <stdexcept>
-#include <omp.h>
-
-#include <gmp.h>
-#include <gmpxx.h>
-#include <random>
-#include <chrono>
-#include <ctime>
-
-#include "figtree.h"
-#include "ANN.h"
-
-using PD = std::vector<std::pair<double,double> >;
-
-namespace Gudhi {
-namespace persistence_scale_space {
-
-double compute_exact_pss(PD PD1, PD PD2, double sigma = 1){
-  double k = 0;
-  for(int i = 0; i < PD1.size(); i++){
-    for(int j = 0; j < PD2.size(); j++){
-      k += exp(  -(    pow(PD1[i].first - PD2[j].first,  2)  +  pow(PD1[i].second - PD2[j].second, 2)     )/(8*sigma))   -\
-           exp(  -(    pow(PD1[i].first - PD2[j].second, 2)  +  pow(PD1[i].second - PD2[j].first,  2)     )/(8*sigma));
-    }
-  }
-  return k/(8*NUMPI*sigma);
-}
-
-double compute_approximate_pss(PD PD1, PD PD2, double sigma = 1, double error = 1e-2){
-
-  double k = 0;
-
-  int d = 2; int N = PD1.size(); int M = PD2.size(); double h = std::sqrt(8*sigma);
-  double* x = new double[2*N]; double* y = new double[2*M]; double* q = new double[N];
-  for(int i = 0; i < N; i++){
-    q[i] = 1.0/(8*NUMPI*sigma);
-    x[2*i] = PD1[i].first; x[2*i+1] = PD1[i].second;
-  }
-  for(int i = 0; i < M; i++){  y[2*i] = PD2[i].first; y[2*i+1] = PD2[i].second;  }
-  double* g_auto = new double[M];
-  memset(g_auto, 0, sizeof(double)*M);
-
-  figtree(d, N, M, 1, x, h, q, y, error, g_auto);
-  for(int i = 0; i < M; i++)  k += g_auto[i];
-
-  for(int i = 0; i < M; i++){  y[2*i] = PD2[i].second; y[2*i+1] = PD2[i].first;  }
-
-  figtree(d, N, M, 1, x, h, q, y, error, g_auto);
-  for(int i = 0; i < M; i++)  k -= g_auto[i];
-
-  delete[] x; delete[] y; delete[] q; delete[] g_auto;
-  return k;
-}
-
-} // namespace persistence_scale_space
-
-} // namespace Gudhi
-
-#endif // PSS_H_
diff --git a/src/Kernels/include/gudhi/PWG.h b/src/Kernels/include/gudhi/PWG.h
deleted file mode 100644
index bc491ae7..00000000
--- a/src/Kernels/include/gudhi/PWG.h
+++ /dev/null
@@ -1,204 +0,0 @@
-/*    This file is part of the Gudhi Library. The Gudhi library
- *    (Geometric Understanding in Higher Dimensions) is a generic C++
- *    library for computational topology.
- *
- *    Author(s):       Mathieu Carrière
- *
- *    Copyright (C) 2017  INRIA (France)
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the GNU General Public License as published by
- *    the Free Software Foundation, either version 3 of the License, or
- *    (at your option) any later version.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    GNU General Public License for more details.
- *
- *    You should have received a copy of the GNU General Public License
- *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef PWG_H_
-#define PWG_H_
-
-#define NUMPI 3.14159265359
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <cstdlib>
-#include <cstdio>
-#include <iomanip>
-
-#include <string>
-#include <iostream>
-#include <sstream>
-#include <fstream>
-
-#include <set>
-#include <map>
-#include <vector>
-#include <algorithm>
-#include <limits>
-#include <assert.h>
-
-#include <cmath>
-#include <math.h>
-
-#include <memory>
-#include <stdexcept>
-#include <omp.h>
-
-#include <gmp.h>
-#include <gmpxx.h>
-#include <random>
-#include <chrono>
-#include <ctime>
-
-using PD = std::vector<std::pair<double,double> >;
-
-namespace Gudhi {
-namespace persistence_weighted_gaussian {
-
-double compute_exact_linear_pwg(PD PD1, PD PD2, double sigma, double C, int p){
-
-  int num_pts1 = PD1.size();
-  int num_pts2 = PD2.size();
-
-  double k = 0;
-  for(int i = 0; i < num_pts1; i++){
-    for(int j = 0; j < num_pts2; j++){
-      k += atan(C*pow(PD1[i].second-PD1[i].first,p))*atan(C*pow(PD2[j].second-PD2[j].first,p))*\
-            exp(  -(    pow(PD1[i].first-PD2[j].first,2) + pow(PD1[i].second-PD2[j].second,2)     )/(2*pow(sigma,2))  );
-    }
-  }
-
-  return k;
-
-}
-
-double compute_exact_gaussian_pwg(PD PD1, PD PD2, double sigma, double C, int p, double tau){
-
-  int num_pts1 = PD1.size();
-  int num_pts2 = PD2.size();
-
-  double k1 = 0;
-  for(int i = 0; i < num_pts1; i++){
-    for(int j = 0; j < num_pts1; j++){
-      k1 += atan(C*pow(PD1[i].second-PD1[i].first,p))*atan(C*pow(PD1[j].second-PD1[j].first,p))*\
-            exp(  -(    pow(PD1[i].first-PD1[j].first,2) + pow(PD1[i].second-PD1[j].second,2)     )/(2*pow(sigma,2))  );
-    }
-  }
-
-  double k2 = 0;
-  for(int i = 0; i < num_pts2; i++){
-    for(int j = 0; j < num_pts2; j++){
-      k2 += atan(C*pow(PD2[i].second-PD2[i].first,p))*atan(C*pow(PD2[j].second-PD2[j].first,p))*\
-            exp(  -(    pow(PD2[i].first-PD2[j].first,2) + pow(PD2[i].second-PD2[j].second,2)     )/(2*pow(sigma,2))  );
-    }
-  }
-
-  double k3 = compute_exact_linear_pwg(PD1,PD2,sigma,C,p);
-  return exp( - (k1+k2-2*k3) / (2*pow(tau,2))  );
-
-}
-
-double compute_exact_gaussian_RKHSdist(PD PD1, PD PD2, double sigma, double C, int p){
-
-  int num_pts1 = PD1.size();
-  int num_pts2 = PD2.size();
-
-  double k1 = 0;
-  for(int i = 0; i < num_pts1; i++){
-    for(int j = 0; j < num_pts1; j++){
-      k1 += atan(C*pow(PD1[i].second-PD1[i].first,p))*atan(C*pow(PD1[j].second-PD1[j].first,p))*\
-            exp(  -(    pow(PD1[i].first-PD1[j].first,2) + pow(PD1[i].second-PD1[j].second,2)     )/(2*pow(sigma,2))  );
-    }
-  }
-
-  double k2 = 0;
-  for(int i = 0; i < num_pts2; i++){
-    for(int j = 0; j < num_pts2; j++){
-      k2 += atan(C*pow(PD2[i].second-PD2[i].first,p))*atan(C*pow(PD2[j].second-PD2[j].first,p))*\
-            exp(  -(    pow(PD2[i].first-PD2[j].first,2) + pow(PD2[i].second-PD2[j].second,2)     )/(2*pow(sigma,2))  );
-    }
-  }
-
-  double k3 = compute_exact_linear_pwg(PD1,PD2,sigma,C,p);
-  return std::sqrt(k1+k2-2*k3);
-
-}
-
-double compute_approximate_linear_pwg_from_Fourier_features(const std::vector<std::pair<double,double> >& B1, \
-                                                            const std::vector<std::pair<double,double> >& B2){
-  double d = 0; int M  = B1.size();
-  for(int i = 0; i < M; i++) d += B1[i].first*B2[i].first + B1[i].second*B2[i].second;
-  return (1.0/M)*d;
-}
-
-double compute_approximate_gaussian_pwg_from_Fourier_features(const std::vector<std::pair<double,double> >& B1, \
-                                                              const std::vector<std::pair<double,double> >& B2, double tau){
-  int M  = B1.size();
-  double d3 = compute_approximate_linear_pwg_from_Fourier_features(B1, B2);
-  double d1 = 0; double d2 = 0;
-  for(int i = 0; i < M; i++){d1 += pow(B1[i].first,2) + pow(B1[i].second,2); d2 += pow(B2[i].first,2) + pow(B2[i].second,2);}
-  return exp( -((1.0/M)*(d1+d2)-2*d3) / (2*pow(tau,2))  );
-}
-
-double compute_approximate_gaussian_RKHSdist_from_Fourier_features(const std::vector<std::pair<double,double> >& B1, \
-                                                              const std::vector<std::pair<double,double> >& B2){
-  int M  = B1.size();
-  double d3 = compute_approximate_linear_pwg_from_Fourier_features(B1, B2);
-  double d1 = 0; double d2 = 0;
-  for(int i = 0; i < M; i++){d1 += pow(B1[i].first,2) + pow(B1[i].second,2); d2 += pow(B2[i].first,2) + pow(B2[i].second,2);}
-  return std::sqrt((1.0/M)*(d1+d2)-2*d3);
-}
-
-std::vector<std::pair<double,double> > compute_Fourier_features(double C, int p, PD D, std::vector<std::pair<double,double> > Z){
-  int m = D.size(); std::vector<std::pair<double,double> > B; int M = Z.size();
-  for(int i = 0; i < M; i++){
-    double d1 = 0; double d2 = 0; double zx = Z[i].first; double zy = Z[i].second;
-    for(int j = 0; j < m; j++){
-      double x = D[j].first; double y = D[j].second;
-      d1 += atan(C*pow(y-x,p))*cos(x*zx + y*zy);
-      d2 += atan(C*pow(y-x,p))*sin(x*zx + y*zy);
-    }
-    B.push_back(std::pair<double,double>(d1,d2));
-  }
-  return B;
-}
-
-std::vector<std::pair<double,double> > random_Fourier(double sigma, int M = 1000){
-  std::normal_distribution<double> distrib(0,1); std::vector<std::pair<double,double> > Z;
-  std::random_device rd;
-  for(int i = 0; i < M; i++){
-    //unsigned seedx = 2*i; unsigned seedy = 2*i+1;
-    //std::default_random_engine generatorx(seedx); std::default_random_engine generatory(seedy);
-    std::mt19937 e1(rd()); std::mt19937 e2(rd());
-    double zx = distrib(e1/*generatorx*/); double zy = distrib(e2/*generatory*/);
-    Z.push_back(std::pair<double,double>((1/sigma)*zx,(1/sigma)*zy));
-  }
-  return Z;
-}
-
-double compute_approximate_linear_pwg(PD PD1, PD PD2, double sigma, double C, int p, int M = 1000){
-  std::vector<std::pair<double,double> > Z = random_Fourier(sigma, M);
-  std::vector<std::pair<double,double> > B1 = compute_Fourier_features(C,p,PD1,Z);
-  std::vector<std::pair<double,double> > B2 = compute_Fourier_features(C,p,PD2,Z);
-  return compute_approximate_linear_pwg_from_Fourier_features(B1,B2);
-}
-
-double compute_approximate_gaussian_pwg(PD PD1, PD PD2, double sigma, double C, int p, double tau, int M = 1000){
-  std::vector<std::pair<double,double> > Z = random_Fourier(sigma, M);
-  std::vector<std::pair<double,double> > B1 = compute_Fourier_features(C,p,PD1,Z);
-  std::vector<std::pair<double,double> > B2 = compute_Fourier_features(C,p,PD2,Z);
-  return compute_approximate_gaussian_pwg_from_Fourier_features(B1,B2,tau);
-}
-
-
-} // namespace persistence_weighted_gaussian
-
-} // namespace Gudhi
-
-#endif //PWG_H_
diff --git a/src/Kernels/include/gudhi/figtree-0.9.3.zip b/src/Kernels/include/gudhi/figtree-0.9.3.zip
deleted file mode 100644
index a9468274..00000000
--- a/src/Kernels/include/gudhi/figtree-0.9.3.zip
+++ /dev/null
diff --git a/src/Kernels/include/gudhi/SW.h b/src/Kernels/include/gudhi/kernel.h
index 0b041252..c4120d7a 100644
--- a/src/Kernels/include/gudhi/SW.h
+++ b/src/Kernels/include/gudhi/kernel.h
@@ -20,108 +20,116 @@
  *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#ifndef SW_H_
-#define SW_H_
+#ifndef KERNEL_H_
+#define KERNEL_H_
 
 #define NUMPI 3.14159265359
 
 #include <stdlib.h>
+#include <stdio.h>
 #include <cstdlib>
+#include <cstdio>
+#include <iomanip>
 #include <string>
 #include <iostream>
 #include <sstream>
 #include <fstream>
-#include <vector>
-#include <algorithm>
 #include <set>
 #include <map>
+#include <vector>
+#include <algorithm>
 #include <limits>
+#include <assert.h>
 #include <cmath>
 #include <math.h>
-#include <string.h>
-#include <stdio.h>
-#include <cstdio>
 #include <memory>
 #include <stdexcept>
 #include <omp.h>
-#include <assert.h>
-#include <iomanip>
 #include <gmp.h>
 #include <gmpxx.h>
 #include <random>
 #include <chrono>
 #include <ctime>
-#include <math.h>
 
 using PD = std::vector<std::pair<double,double> >;
-
 bool sortAngle(const std::pair<double, std::pair<int,int> >& p1, const std::pair<double, std::pair<int,int> >& p2){return (p1.first < p2.first);}
 bool myComp(const std::pair<int,double> & P1, const std::pair<int,double> & P2){return P1.second < P2.second;}
 
 namespace Gudhi {
-namespace sliced_wasserstein {
-
-
-// ********************************************************************
-// Approximate computation.
-// ********************************************************************
-
-/** \brief Computes an approximation of the Sliced Wasserstein distance between two persistence diagrams
- *
- * @param[in] N number of points sampled on the circle.
- *
- */
-
-double compute_approximate_SW(PD PD1, PD PD2, int N = 100){
+namespace kernel {
 
-  double step = NUMPI/N; double sw = 0;
 
-  // Add projections onto diagonal.
-  int n1, n2; n1 = PD1.size(); n2 = PD2.size();
-  for (int i = 0; i < n2; i++)
-    PD1.push_back(std::pair<double,double>(   (PD2[i].first + PD2[i].second)/2,  (PD2[i].first + PD2[i].second)/2)   );
-  for (int i = 0; i < n1; i++)
-    PD2.push_back(std::pair<double,double>(   (PD1[i].first + PD1[i].second)/2,  (PD1[i].first + PD1[i].second)/2)   );
-  int n = PD1.size();
-
-  // Sort and compare all projections.
-  //#pragma omp parallel for
-  for (int i = 0; i < N; i++){
-    std::vector<std::pair<int,double> > L1, L2;
-    for (int j = 0; j < n; j++){
-      L1.push_back(   std::pair<int,double>(j, PD1[j].first*cos(-NUMPI/2+i*step) + PD1[j].second*sin(-NUMPI/2+i*step))   );
-      L2.push_back(   std::pair<int,double>(j, PD2[j].first*cos(-NUMPI/2+i*step) + PD2[j].second*sin(-NUMPI/2+i*step))   );
-    }
-    std::sort(L1.begin(),L1.end(), myComp); std::sort(L2.begin(),L2.end(), myComp);
-    double f = 0; for (int j = 0; j < n; j++)  f += std::abs(L1[j].second - L2[j].second);
-    sw += f*step;
-  }
-  return sw/NUMPI;
+double pss_weight(std::pair<double,double> P){
+  if(P.second > P.first)  return 1;
+  else return -1;
 }
 
 
 
 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
 // ********************************************************************
 // Exact computation.
 // ********************************************************************
 
+/** \brief Computes the Linear Persistence Weighted Gaussian Kernel between two persistence diagrams.
+ *
+ * @param[in] PD1      first persistence diagram.
+ * @param[in] PD2      second persistence diagram.
+ * @param[in] sigma    bandwidth parameter of the Gaussian Kernel used for the Kernel Mean Embedding of the diagrams.
+ * @param[in] weight   weight function for the points in the diagrams.
+ *
+ */
+double lpwg(PD PD1, PD PD2, double sigma, double (*weight)(std::pair<double,double>) = [](std::pair<double,double> P){return atan(P.second - P.first);}){
+  int num_pts1 = PD1.size(); int num_pts2 = PD2.size(); double k = 0;
+  for(int i = 0; i < num_pts1; i++)
+    for(int j = 0; j < num_pts2; j++)
+      k += (*weight)(PD1[i])*(*weight)(PD2[j])*exp(-(pow(PD1[i].first-PD2[j].first,2) + pow(PD1[i].second-PD2[j].second,2))/(2*pow(sigma,2)));
+  return k;
+}
+
+/** \brief Computes the Persistence Scale Space Kernel between two persistence diagrams.
+ *
+ * @param[in] PD1      first persistence diagram.
+ * @param[in] PD2      second persistence diagram.
+ * @param[in] sigma    bandwidth parameter of the Gaussian Kernel used for the Kernel Mean Embedding of the diagrams.
+ *
+ */
+double pss(PD PD1, PD PD2, double sigma){
+  PD pd1 = PD1; int numpts = PD1.size();    for(int i = 0; i < numpts; i++)  pd1.push_back(std::pair<double,double>(PD1[i].second,PD1[i].first));
+  PD pd2 = PD2;     numpts = PD2.size();    for(int i = 0; i < numpts; i++)  pd2.push_back(std::pair<double,double>(PD2[i].second,PD2[i].first));
+  return lpwg(pd1, pd2, 2*sqrt(sigma), &pss_weight) / (2*8*NUMPI*sigma);
+}
 
+/** \brief Computes the Gaussian Persistence Weighted Gaussian Kernel between two persistence diagrams.
+ *
+ * @param[in] PD1      first persistence diagram.
+ * @param[in] PD2      second persistence diagram.
+ * @param[in] sigma    bandwidth parameter of the Gaussian Kernel used for the Kernel Mean Embedding of the diagrams.
+ * @param[in] tau      bandwidth parameter of the Gaussian Kernel used between the embeddings.
+ * @param[in] weight   weight function for the points in the diagrams.
+ *
+ */
+double gpwg(PD PD1, PD PD2, double sigma, double tau, double (*weight)(std::pair<double,double>) = [](std::pair<double,double> P){return atan(P.second - P.first);}){
+  double k1 = lpwg(PD1,PD1,sigma,weight);
+  double k2 = lpwg(PD2,PD2,sigma,weight);
+  double k3 = lpwg(PD1,PD2,sigma,weight);
+  return exp( - (k1+k2-2*k3) / (2*pow(tau,2))  );
+}
+
+/** \brief Computes the RKHS distance induced by the Gaussian Kernel Embedding between two persistence diagrams.
+ *
+ * @param[in] PD1      first persistence diagram.
+ * @param[in] PD2      second persistence diagram.
+ * @param[in] sigma    bandwidth parameter of the Gaussian Kernel used for the Kernel Mean Embedding of the diagrams.
+ * @param[in] weight   weight function for the points in the diagrams.
+ *
+ */
+double dpwg(PD PD1, PD PD2, double sigma, double (*weight)(std::pair<double,double>) = [](std::pair<double,double> P){return atan(P.second - P.first);}){
+  double k1 = lpwg(PD1,PD1,sigma,weight);
+  double k2 = lpwg(PD2,PD2,sigma,weight);
+  double k3 = lpwg(PD1,PD2,sigma,weight);
+  return std::sqrt(k1+k2-2*k3);
+}
 
 // Compute the angle formed by two points of a PD
 double compute_angle(const PD & PersDiag, const int & i, const int & j){
@@ -195,7 +203,7 @@ double compute_sw(const std::vector<std::vector<std::pair<int,double> > > & V1,
     while(theta1 != NUMPI/2){
       if(PD1[U[ku].first].first != PD2[V[kv].first].first || PD1[U[ku].first].second != PD2[V[kv].first].second)
         if(theta1 != theta2)
-          sw += Gudhi::sliced_wasserstein::compute_int(theta1, theta2, U[ku].first, V[kv].first, PD1, PD2);
+          sw += compute_int(theta1, theta2, U[ku].first, V[kv].first, PD1, PD2);
       theta1 = theta2;
       if (  (theta2 == U[ku].second)  &&  ku < U.size()-1  )  ku++;
       if (  (theta2 == V[kv].second)  &&  kv < V.size()-1  )  kv++;
@@ -205,29 +213,13 @@ double compute_sw(const std::vector<std::vector<std::pair<int,double> > > & V1,
   return sw/NUMPI;
 }
 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-/** \brief Computes the Sliced Wasserstein distance between two persistence diagrams
+/** \brief Computes the Sliced Wasserstein distance between two persistence diagrams.
+ *
+ * @param[in] PD1 first persistence diagram.
+ * @param[in] PD2 second persistence diagram.
  *
  */
-
-double compute_exact_SW(PD PD1, PD PD2){
+ double sw(PD PD1, PD PD2){
 
   // Add projections onto diagonal.
   int n1, n2; n1 = PD1.size(); n2 = PD2.size(); double max_ordinate = std::numeric_limits<double>::lowest();
@@ -254,7 +246,7 @@ double compute_exact_SW(PD PD1, PD PD2){
   std::vector<std::pair<double, std::pair<int,int> > > angles1, angles2;
   for (int i = 0; i < N; i++){
     for (int j = i+1; j < N; j++){
-      double theta1 = Gudhi::sliced_wasserstein::compute_angle(PD1,i,j); double theta2 = Gudhi::sliced_wasserstein::compute_angle(PD2,i,j);
+      double theta1 = compute_angle(PD1,i,j); double theta2 = compute_angle(PD2,i,j);
       angles1.push_back(std::pair<double, std::pair<int,int> >(theta1, std::pair<int,int>(i,j)));
       angles2.push_back(std::pair<double, std::pair<int,int> >(theta2, std::pair<int,int>(i,j)));
     }
@@ -300,17 +292,156 @@ double compute_exact_SW(PD PD1, PD PD2){
   }
 
   // Compute the SW distance with the list of inversions.
-  return Gudhi::sliced_wasserstein::compute_sw(anglePerm1, anglePerm2, PD1, PD2);
+  return compute_sw(anglePerm1, anglePerm2, PD1, PD2);
+
+}
+
+
+
+
 
+
+
+
+
+// ********************************************************************
+// Approximate computation.
+// ********************************************************************
+
+double approx_lpwg_Fourier(const std::vector<std::pair<double,double> >& B1, const std::vector<std::pair<double,double> >& B2){
+  double d = 0; int M  = B1.size();
+  for(int i = 0; i < M; i++) d += B1[i].first*B2[i].first + B1[i].second*B2[i].second;
+  return (1.0/M)*d;
 }
 
+double approx_gpwg_Fourier(const std::vector<std::pair<double,double> >& B1, const std::vector<std::pair<double,double> >& B2, double tau){
+  int M  = B1.size();
+  double d3 = approx_lpwg_Fourier(B1, B2);
+  double d1 = 0; double d2 = 0;
+  for(int i = 0; i < M; i++){d1 += pow(B1[i].first,2) + pow(B1[i].second,2); d2 += pow(B2[i].first,2) + pow(B2[i].second,2);}
+  return exp( -((1.0/M)*(d1+d2)-2*d3) / (2*pow(tau,2))  );
+}
 
+double approx_dpwg_Fourier(const std::vector<std::pair<double,double> >& B1, const std::vector<std::pair<double,double> >& B2){
+  int M  = B1.size();
+  double d3 = approx_lpwg_Fourier(B1, B2);
+  double d1 = 0; double d2 = 0;
+  for(int i = 0; i < M; i++){d1 += pow(B1[i].first,2) + pow(B1[i].second,2); d2 += pow(B2[i].first,2) + pow(B2[i].second,2);}
+  return std::sqrt((1.0/M)*(d1+d2)-2*d3);
+}
 
+std::vector<std::pair<double,double> > Fourier_feat(PD D, std::vector<std::pair<double,double> > Z, double (*weight)(std::pair<double,double>) = [](std::pair<double,double> P){return atan(P.second - P.first);}){
+  int m = D.size(); std::vector<std::pair<double,double> > B; int M = Z.size();
+  for(int i = 0; i < M; i++){
+    double d1 = 0; double d2 = 0; double zx = Z[i].first; double zy = Z[i].second;
+    for(int j = 0; j < m; j++){
+      double x = D[j].first; double y = D[j].second;
+      d1 += (*weight)(D[j])*cos(x*zx + y*zy);
+      d2 += (*weight)(D[j])*sin(x*zx + y*zy);
+    }
+    B.push_back(std::pair<double,double>(d1,d2));
+  }
+  return B;
+}
+
+std::vector<std::pair<double,double> > random_Fourier(double sigma, int M = 1000){
+  std::normal_distribution<double> distrib(0,1); std::vector<std::pair<double,double> > Z; std::random_device rd;
+  for(int i = 0; i < M; i++){
+    std::mt19937 e1(rd()); std::mt19937 e2(rd());
+    double zx = distrib(e1); double zy = distrib(e2);
+    Z.push_back(std::pair<double,double>((1.0/sigma)*zx,(1.0/sigma)*zy));
+  }
+  return Z;
+}
 
 
+/** \brief Computes an approximation of the Linear Persistence Weighted Gaussian Kernel between two persistence diagrams with random Fourier features.
+ *
+ * @param[in] PD1       first persistence diagram.
+ * @param[in] PD2       second persistence diagram.
+ * @param[in] sigma     bandwidth parameter of the Gaussian Kernel used for the Kernel Mean Embedding of the diagrams.
+ * @param[in] weight    weight function for the points in the diagrams.
+ * @param[in] M         number of Fourier features.
+ *
+ */
+double approx_lpwg(PD PD1, PD PD2, double sigma, double (*weight)(std::pair<double,double>) = [](std::pair<double,double> P){return atan(P.second - P.first);}, int M = 1000){
+  std::vector<std::pair<double,double> > Z =  random_Fourier(sigma, M);
+  std::vector<std::pair<double,double> > B1 = Fourier_feat(PD1,Z,weight);
+  std::vector<std::pair<double,double> > B2 = Fourier_feat(PD2,Z,weight);
+  return approx_lpwg_Fourier(B1,B2);
+}
+
+/** \brief Computes an approximation of the Persistence Scale Space Kernel between two persistence diagrams with random Fourier features.
+ *
+ * @param[in] PD1       first persistence diagram.
+ * @param[in] PD2       second persistence diagram.
+ * @param[in] sigma     bandwidth parameter of the Gaussian Kernel used for the Kernel Mean Embedding of the diagrams.
+ * @param[in] M         number of Fourier features.
+ *
+ */
+double approx_pss(PD PD1, PD PD2, double sigma, int M = 1000){
+  PD pd1 = PD1; int numpts = PD1.size();    for(int i = 0; i < numpts; i++)  pd1.push_back(std::pair<double,double>(PD1[i].second,PD1[i].first));
+  PD pd2 = PD2;     numpts = PD2.size();    for(int i = 0; i < numpts; i++)  pd2.push_back(std::pair<double,double>(PD2[i].second,PD2[i].first));
+  return approx_lpwg(pd1, pd2, 2*sqrt(sigma), &pss_weight, M) / (2*8*NUMPI*sigma);
+}
+
+
+/** \brief Computes an approximation of the Gaussian Persistence Weighted Gaussian Kernel between two persistence diagrams with random Fourier features.
+ *
+ * @param[in] PD1       first persistence diagram.
+ * @param[in] PD2       second persistence diagram.
+ * @param[in] sigma     bandwidth parameter of the Gaussian Kernel used for the Kernel Mean Embedding of the diagrams.
+ * @param[in] tau       bandwidth parameter of the Gaussian Kernel used between the embeddings.
+ * @param[in] weight    weight function for the points in the diagrams.
+ * @param[in] M         number of Fourier features.
+ *
+ */
+double approx_gpwg(PD PD1, PD PD2, double sigma, double tau, double (*weight)(std::pair<double,double>) = [](std::pair<double,double> P){return atan(P.second - P.first);}, int M = 1000){
+  std::vector<std::pair<double,double> > Z =  random_Fourier(sigma, M);
+  std::vector<std::pair<double,double> > B1 = Fourier_feat(PD1,Z,weight);
+  std::vector<std::pair<double,double> > B2 = Fourier_feat(PD2,Z,weight);
+  return approx_gpwg_Fourier(B1,B2,tau);
+}
+
+
+/** \brief Computes an approximation of the Sliced Wasserstein distance between two persistence diagrams.
+ *
+ * @param[in] PD1       first persistence diagram.
+ * @param[in] PD2       second persistence diagram.
+ * @param[in] N         number of points sampled on the circle.
+ *
+ */
+double approx_sw(PD PD1, PD PD2, int N = 100){
+
+  double step = NUMPI/N; double sw = 0;
+
+  // Add projections onto diagonal.
+  int n1, n2; n1 = PD1.size(); n2 = PD2.size();
+  for (int i = 0; i < n2; i++)
+    PD1.push_back(std::pair<double,double>(   (PD2[i].first + PD2[i].second)/2,  (PD2[i].first + PD2[i].second)/2)   );
+  for (int i = 0; i < n1; i++)
+    PD2.push_back(std::pair<double,double>(   (PD1[i].first + PD1[i].second)/2,  (PD1[i].first + PD1[i].second)/2)   );
+  int n = PD1.size();
+
+  // Sort and compare all projections.
+  //#pragma omp parallel for
+  for (int i = 0; i < N; i++){
+    std::vector<std::pair<int,double> > L1, L2;
+    for (int j = 0; j < n; j++){
+      L1.push_back(   std::pair<int,double>(j, PD1[j].first*cos(-NUMPI/2+i*step) + PD1[j].second*sin(-NUMPI/2+i*step))   );
+      L2.push_back(   std::pair<int,double>(j, PD2[j].first*cos(-NUMPI/2+i*step) + PD2[j].second*sin(-NUMPI/2+i*step))   );
+    }
+    std::sort(L1.begin(),L1.end(), myComp); std::sort(L2.begin(),L2.end(), myComp);
+    double f = 0; for (int j = 0; j < n; j++)  f += std::abs(L1[j].second - L2[j].second);
+    sw += f*step;
+  }
+  return sw/NUMPI;
+}
+
 
-}}
 
-#endif
+} // namespace kernel
 
+} // namespace Gudhi
 
+#endif //KERNEL_H_
author	mcarrier <mcarrier@636b058d-ea47-450e-bf9e-a15bfbe3eedb>	2017-12-29 23:13:11 +0000
committer	mcarrier <mcarrier@636b058d-ea47-450e-bf9e-a15bfbe3eedb>	2017-12-29 23:13:11 +0000
commit	a065a3b86e33c24250a981f95db1ff46d9075ef5 (patch)
tree	467ac7e14aaada4e18b3d5bd6b11f95b2cdd62fe /src
parent	21e6cb713dd3db78e68b4140ab2d69508dad01af (diff)