diff options
author | mcarrier <mcarrier@636b058d-ea47-450e-bf9e-a15bfbe3eedb> | 2017-12-29 23:13:11 +0000 |
---|---|---|
committer | mcarrier <mcarrier@636b058d-ea47-450e-bf9e-a15bfbe3eedb> | 2017-12-29 23:13:11 +0000 |
commit | a065a3b86e33c24250a981f95db1ff46d9075ef5 (patch) | |
tree | 467ac7e14aaada4e18b3d5bd6b11f95b2cdd62fe /src | |
parent | 21e6cb713dd3db78e68b4140ab2d69508dad01af (diff) |
git-svn-id: svn+ssh://scm.gforge.inria.fr/svnroot/gudhi/branches/kernels@3106 636b058d-ea47-450e-bf9e-a15bfbe3eedb
Former-commit-id: d4456c36f3ef8cc9e7f516d943472b963b5e9b93
Diffstat (limited to 'src')
-rw-r--r-- | src/Kernels/example/CMakeLists.txt | 11 | ||||
-rw-r--r-- | src/Kernels/example/kernel_basic_example.cpp | 31 | ||||
-rw-r--r-- | src/Kernels/include/gudhi/PSS.h | 108 | ||||
-rw-r--r-- | src/Kernels/include/gudhi/PWG.h | 204 | ||||
-rw-r--r-- | src/Kernels/include/gudhi/figtree-0.9.3.zip | bin | 1229617 -> 0 bytes | |||
-rw-r--r-- | src/Kernels/include/gudhi/kernel.h (renamed from src/Kernels/include/gudhi/SW.h) | 307 |
6 files changed, 245 insertions, 416 deletions
diff --git a/src/Kernels/example/CMakeLists.txt b/src/Kernels/example/CMakeLists.txt new file mode 100644 index 00000000..57e13004 --- /dev/null +++ b/src/Kernels/example/CMakeLists.txt @@ -0,0 +1,11 @@ +cmake_minimum_required(VERSION 2.6) +project(Kernels_examples) + +add_executable ( BasicEx kernel_basic_example.cpp ) + +if (TBB_FOUND) + target_link_libraries(BasicEx ${TBB_LIBRARIES}) +endif() + +add_test(NAME Kernels_example_basicex COMMAND $<TARGET_FILE:BasicEx> + "") diff --git a/src/Kernels/example/kernel_basic_example.cpp b/src/Kernels/example/kernel_basic_example.cpp index 0a8d83b3..8e9925c5 100644 --- a/src/Kernels/example/kernel_basic_example.cpp +++ b/src/Kernels/example/kernel_basic_example.cpp @@ -2,9 +2,9 @@ * (Geometric Understanding in Higher Dimensions) is a generic C++ * library for computational topology. * - * Authors: Francois Godi, small modifications by Pawel Dlotko + * Authors: Mathieu Carrière * - * Copyright (C) 2015 INRIA + * Copyright (C) 2017 INRIA * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -20,18 +20,15 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <gudhi/SW.h> -//#include <gudhi/PSS.h> -//#include <gudhi/PWG.h> - -#include <iostream> -#include <vector> -#include <utility> // for pair -#include <limits> // for numeric_limits +#define NUMPI 3.14159265359 +#include <gudhi/kernel.h> int main() { + std::vector< std::pair<double, double> > v1, v2; + double sigma = 2; double tau = 5; + v1.emplace_back(2.7, 3.7); v1.emplace_back(9.6, 14.); v1.emplace_back(34.2, 34.974); @@ -39,11 +36,13 @@ int main() { v2.emplace_back(2.8, 4.45); v2.emplace_back(9.5, 14.1); - - double b1 = Gudhi::sliced_wasserstein::compute_approximate_SW (v1, v2); - double b2 = Gudhi::sliced_wasserstein::compute_exact_SW (v1, v2); - - std::cout << "Approximate Sliced Wasserstein distance = " << b1 << std::endl; - std::cout << "Exact Sliced Wasserstein distance = " << b2 << std::endl; + std::cout << "SW exact = " << Gudhi::kernel::sw (v1, v2) << std::endl; + std::cout << "SW approx = " << Gudhi::kernel::approx_sw (v1, v2) << std::endl; + std::cout << "PSS exact = " << Gudhi::kernel::pss (v1,v2,sigma) << std::endl; + std::cout << "PSS approx = " << Gudhi::kernel::approx_pss (v1,v2,sigma) << std::endl; + std::cout << "PWG exact = " << Gudhi::kernel::lpwg (v1,v2,sigma) << std::endl; + std::cout << "PWG approx = " << Gudhi::kernel::approx_lpwg (v1,v2,sigma) << std::endl; + std::cout << "GPWG exact = " << Gudhi::kernel::gpwg (v1,v2,sigma,tau) << std::endl; + std::cout << "GPWG approx = " << Gudhi::kernel::approx_gpwg (v1,v2,sigma,tau) << std::endl; } diff --git a/src/Kernels/include/gudhi/PSS.h b/src/Kernels/include/gudhi/PSS.h deleted file mode 100644 index 5111a09f..00000000 --- a/src/Kernels/include/gudhi/PSS.h +++ /dev/null @@ -1,108 +0,0 @@ -/* This file is part of the Gudhi Library. The Gudhi library - * (Geometric Understanding in Higher Dimensions) is a generic C++ - * library for computational topology. - * - * Author(s): Mathieu Carrière - * - * Copyright (C) 2017 INRIA (France) - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef PSS_H_ -#define PSS_H_ - -#define NUMPI 3.14159265359 - -#include <stdlib.h> -#include <stdio.h> -#include <cstdlib> -#include <cstdio> -#include <iomanip> - -#include <string> -#include <iostream> -#include <sstream> -#include <fstream> - -#include <set> -#include <map> -#include <vector> -#include <algorithm> -#include <limits> -#include <assert.h> - -#include <cmath> -#include <math.h> - -#include <memory> -#include <stdexcept> -#include <omp.h> - -#include <gmp.h> -#include <gmpxx.h> -#include <random> -#include <chrono> -#include <ctime> - -#include "figtree.h" -#include "ANN.h" - -using PD = std::vector<std::pair<double,double> >; - -namespace Gudhi { -namespace persistence_scale_space { - -double compute_exact_pss(PD PD1, PD PD2, double sigma = 1){ - double k = 0; - for(int i = 0; i < PD1.size(); i++){ - for(int j = 0; j < PD2.size(); j++){ - k += exp( -( pow(PD1[i].first - PD2[j].first, 2) + pow(PD1[i].second - PD2[j].second, 2) )/(8*sigma)) -\ - exp( -( pow(PD1[i].first - PD2[j].second, 2) + pow(PD1[i].second - PD2[j].first, 2) )/(8*sigma)); - } - } - return k/(8*NUMPI*sigma); -} - -double compute_approximate_pss(PD PD1, PD PD2, double sigma = 1, double error = 1e-2){ - - double k = 0; - - int d = 2; int N = PD1.size(); int M = PD2.size(); double h = std::sqrt(8*sigma); - double* x = new double[2*N]; double* y = new double[2*M]; double* q = new double[N]; - for(int i = 0; i < N; i++){ - q[i] = 1.0/(8*NUMPI*sigma); - x[2*i] = PD1[i].first; x[2*i+1] = PD1[i].second; - } - for(int i = 0; i < M; i++){ y[2*i] = PD2[i].first; y[2*i+1] = PD2[i].second; } - double* g_auto = new double[M]; - memset(g_auto, 0, sizeof(double)*M); - - figtree(d, N, M, 1, x, h, q, y, error, g_auto); - for(int i = 0; i < M; i++) k += g_auto[i]; - - for(int i = 0; i < M; i++){ y[2*i] = PD2[i].second; y[2*i+1] = PD2[i].first; } - - figtree(d, N, M, 1, x, h, q, y, error, g_auto); - for(int i = 0; i < M; i++) k -= g_auto[i]; - - delete[] x; delete[] y; delete[] q; delete[] g_auto; - return k; -} - -} // namespace persistence_scale_space - -} // namespace Gudhi - -#endif // PSS_H_ diff --git a/src/Kernels/include/gudhi/PWG.h b/src/Kernels/include/gudhi/PWG.h deleted file mode 100644 index bc491ae7..00000000 --- a/src/Kernels/include/gudhi/PWG.h +++ /dev/null @@ -1,204 +0,0 @@ -/* This file is part of the Gudhi Library. The Gudhi library - * (Geometric Understanding in Higher Dimensions) is a generic C++ - * library for computational topology. - * - * Author(s): Mathieu Carrière - * - * Copyright (C) 2017 INRIA (France) - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef PWG_H_ -#define PWG_H_ - -#define NUMPI 3.14159265359 - -#include <stdlib.h> -#include <stdio.h> -#include <cstdlib> -#include <cstdio> -#include <iomanip> - -#include <string> -#include <iostream> -#include <sstream> -#include <fstream> - -#include <set> -#include <map> -#include <vector> -#include <algorithm> -#include <limits> -#include <assert.h> - -#include <cmath> -#include <math.h> - -#include <memory> -#include <stdexcept> -#include <omp.h> - -#include <gmp.h> -#include <gmpxx.h> -#include <random> -#include <chrono> -#include <ctime> - -using PD = std::vector<std::pair<double,double> >; - -namespace Gudhi { -namespace persistence_weighted_gaussian { - -double compute_exact_linear_pwg(PD PD1, PD PD2, double sigma, double C, int p){ - - int num_pts1 = PD1.size(); - int num_pts2 = PD2.size(); - - double k = 0; - for(int i = 0; i < num_pts1; i++){ - for(int j = 0; j < num_pts2; j++){ - k += atan(C*pow(PD1[i].second-PD1[i].first,p))*atan(C*pow(PD2[j].second-PD2[j].first,p))*\ - exp( -( pow(PD1[i].first-PD2[j].first,2) + pow(PD1[i].second-PD2[j].second,2) )/(2*pow(sigma,2)) ); - } - } - - return k; - -} - -double compute_exact_gaussian_pwg(PD PD1, PD PD2, double sigma, double C, int p, double tau){ - - int num_pts1 = PD1.size(); - int num_pts2 = PD2.size(); - - double k1 = 0; - for(int i = 0; i < num_pts1; i++){ - for(int j = 0; j < num_pts1; j++){ - k1 += atan(C*pow(PD1[i].second-PD1[i].first,p))*atan(C*pow(PD1[j].second-PD1[j].first,p))*\ - exp( -( pow(PD1[i].first-PD1[j].first,2) + pow(PD1[i].second-PD1[j].second,2) )/(2*pow(sigma,2)) ); - } - } - - double k2 = 0; - for(int i = 0; i < num_pts2; i++){ - for(int j = 0; j < num_pts2; j++){ - k2 += atan(C*pow(PD2[i].second-PD2[i].first,p))*atan(C*pow(PD2[j].second-PD2[j].first,p))*\ - exp( -( pow(PD2[i].first-PD2[j].first,2) + pow(PD2[i].second-PD2[j].second,2) )/(2*pow(sigma,2)) ); - } - } - - double k3 = compute_exact_linear_pwg(PD1,PD2,sigma,C,p); - return exp( - (k1+k2-2*k3) / (2*pow(tau,2)) ); - -} - -double compute_exact_gaussian_RKHSdist(PD PD1, PD PD2, double sigma, double C, int p){ - - int num_pts1 = PD1.size(); - int num_pts2 = PD2.size(); - - double k1 = 0; - for(int i = 0; i < num_pts1; i++){ - for(int j = 0; j < num_pts1; j++){ - k1 += atan(C*pow(PD1[i].second-PD1[i].first,p))*atan(C*pow(PD1[j].second-PD1[j].first,p))*\ - exp( -( pow(PD1[i].first-PD1[j].first,2) + pow(PD1[i].second-PD1[j].second,2) )/(2*pow(sigma,2)) ); - } - } - - double k2 = 0; - for(int i = 0; i < num_pts2; i++){ - for(int j = 0; j < num_pts2; j++){ - k2 += atan(C*pow(PD2[i].second-PD2[i].first,p))*atan(C*pow(PD2[j].second-PD2[j].first,p))*\ - exp( -( pow(PD2[i].first-PD2[j].first,2) + pow(PD2[i].second-PD2[j].second,2) )/(2*pow(sigma,2)) ); - } - } - - double k3 = compute_exact_linear_pwg(PD1,PD2,sigma,C,p); - return std::sqrt(k1+k2-2*k3); - -} - -double compute_approximate_linear_pwg_from_Fourier_features(const std::vector<std::pair<double,double> >& B1, \ - const std::vector<std::pair<double,double> >& B2){ - double d = 0; int M = B1.size(); - for(int i = 0; i < M; i++) d += B1[i].first*B2[i].first + B1[i].second*B2[i].second; - return (1.0/M)*d; -} - -double compute_approximate_gaussian_pwg_from_Fourier_features(const std::vector<std::pair<double,double> >& B1, \ - const std::vector<std::pair<double,double> >& B2, double tau){ - int M = B1.size(); - double d3 = compute_approximate_linear_pwg_from_Fourier_features(B1, B2); - double d1 = 0; double d2 = 0; - for(int i = 0; i < M; i++){d1 += pow(B1[i].first,2) + pow(B1[i].second,2); d2 += pow(B2[i].first,2) + pow(B2[i].second,2);} - return exp( -((1.0/M)*(d1+d2)-2*d3) / (2*pow(tau,2)) ); -} - -double compute_approximate_gaussian_RKHSdist_from_Fourier_features(const std::vector<std::pair<double,double> >& B1, \ - const std::vector<std::pair<double,double> >& B2){ - int M = B1.size(); - double d3 = compute_approximate_linear_pwg_from_Fourier_features(B1, B2); - double d1 = 0; double d2 = 0; - for(int i = 0; i < M; i++){d1 += pow(B1[i].first,2) + pow(B1[i].second,2); d2 += pow(B2[i].first,2) + pow(B2[i].second,2);} - return std::sqrt((1.0/M)*(d1+d2)-2*d3); -} - -std::vector<std::pair<double,double> > compute_Fourier_features(double C, int p, PD D, std::vector<std::pair<double,double> > Z){ - int m = D.size(); std::vector<std::pair<double,double> > B; int M = Z.size(); - for(int i = 0; i < M; i++){ - double d1 = 0; double d2 = 0; double zx = Z[i].first; double zy = Z[i].second; - for(int j = 0; j < m; j++){ - double x = D[j].first; double y = D[j].second; - d1 += atan(C*pow(y-x,p))*cos(x*zx + y*zy); - d2 += atan(C*pow(y-x,p))*sin(x*zx + y*zy); - } - B.push_back(std::pair<double,double>(d1,d2)); - } - return B; -} - -std::vector<std::pair<double,double> > random_Fourier(double sigma, int M = 1000){ - std::normal_distribution<double> distrib(0,1); std::vector<std::pair<double,double> > Z; - std::random_device rd; - for(int i = 0; i < M; i++){ - //unsigned seedx = 2*i; unsigned seedy = 2*i+1; - //std::default_random_engine generatorx(seedx); std::default_random_engine generatory(seedy); - std::mt19937 e1(rd()); std::mt19937 e2(rd()); - double zx = distrib(e1/*generatorx*/); double zy = distrib(e2/*generatory*/); - Z.push_back(std::pair<double,double>((1/sigma)*zx,(1/sigma)*zy)); - } - return Z; -} - -double compute_approximate_linear_pwg(PD PD1, PD PD2, double sigma, double C, int p, int M = 1000){ - std::vector<std::pair<double,double> > Z = random_Fourier(sigma, M); - std::vector<std::pair<double,double> > B1 = compute_Fourier_features(C,p,PD1,Z); - std::vector<std::pair<double,double> > B2 = compute_Fourier_features(C,p,PD2,Z); - return compute_approximate_linear_pwg_from_Fourier_features(B1,B2); -} - -double compute_approximate_gaussian_pwg(PD PD1, PD PD2, double sigma, double C, int p, double tau, int M = 1000){ - std::vector<std::pair<double,double> > Z = random_Fourier(sigma, M); - std::vector<std::pair<double,double> > B1 = compute_Fourier_features(C,p,PD1,Z); - std::vector<std::pair<double,double> > B2 = compute_Fourier_features(C,p,PD2,Z); - return compute_approximate_gaussian_pwg_from_Fourier_features(B1,B2,tau); -} - - -} // namespace persistence_weighted_gaussian - -} // namespace Gudhi - -#endif //PWG_H_ diff --git a/src/Kernels/include/gudhi/figtree-0.9.3.zip b/src/Kernels/include/gudhi/figtree-0.9.3.zip Binary files differdeleted file mode 100644 index a9468274..00000000 --- a/src/Kernels/include/gudhi/figtree-0.9.3.zip +++ /dev/null diff --git a/src/Kernels/include/gudhi/SW.h b/src/Kernels/include/gudhi/kernel.h index 0b041252..c4120d7a 100644 --- a/src/Kernels/include/gudhi/SW.h +++ b/src/Kernels/include/gudhi/kernel.h @@ -20,108 +20,116 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#ifndef SW_H_ -#define SW_H_ +#ifndef KERNEL_H_ +#define KERNEL_H_ #define NUMPI 3.14159265359 #include <stdlib.h> +#include <stdio.h> #include <cstdlib> +#include <cstdio> +#include <iomanip> #include <string> #include <iostream> #include <sstream> #include <fstream> -#include <vector> -#include <algorithm> #include <set> #include <map> +#include <vector> +#include <algorithm> #include <limits> +#include <assert.h> #include <cmath> #include <math.h> -#include <string.h> -#include <stdio.h> -#include <cstdio> #include <memory> #include <stdexcept> #include <omp.h> -#include <assert.h> -#include <iomanip> #include <gmp.h> #include <gmpxx.h> #include <random> #include <chrono> #include <ctime> -#include <math.h> using PD = std::vector<std::pair<double,double> >; - bool sortAngle(const std::pair<double, std::pair<int,int> >& p1, const std::pair<double, std::pair<int,int> >& p2){return (p1.first < p2.first);} bool myComp(const std::pair<int,double> & P1, const std::pair<int,double> & P2){return P1.second < P2.second;} namespace Gudhi { -namespace sliced_wasserstein { - - -// ******************************************************************** -// Approximate computation. -// ******************************************************************** - -/** \brief Computes an approximation of the Sliced Wasserstein distance between two persistence diagrams - * - * @param[in] N number of points sampled on the circle. - * - */ - -double compute_approximate_SW(PD PD1, PD PD2, int N = 100){ +namespace kernel { - double step = NUMPI/N; double sw = 0; - // Add projections onto diagonal. - int n1, n2; n1 = PD1.size(); n2 = PD2.size(); - for (int i = 0; i < n2; i++) - PD1.push_back(std::pair<double,double>( (PD2[i].first + PD2[i].second)/2, (PD2[i].first + PD2[i].second)/2) ); - for (int i = 0; i < n1; i++) - PD2.push_back(std::pair<double,double>( (PD1[i].first + PD1[i].second)/2, (PD1[i].first + PD1[i].second)/2) ); - int n = PD1.size(); - - // Sort and compare all projections. - //#pragma omp parallel for - for (int i = 0; i < N; i++){ - std::vector<std::pair<int,double> > L1, L2; - for (int j = 0; j < n; j++){ - L1.push_back( std::pair<int,double>(j, PD1[j].first*cos(-NUMPI/2+i*step) + PD1[j].second*sin(-NUMPI/2+i*step)) ); - L2.push_back( std::pair<int,double>(j, PD2[j].first*cos(-NUMPI/2+i*step) + PD2[j].second*sin(-NUMPI/2+i*step)) ); - } - std::sort(L1.begin(),L1.end(), myComp); std::sort(L2.begin(),L2.end(), myComp); - double f = 0; for (int j = 0; j < n; j++) f += std::abs(L1[j].second - L2[j].second); - sw += f*step; - } - return sw/NUMPI; +double pss_weight(std::pair<double,double> P){ + if(P.second > P.first) return 1; + else return -1; } - - - - - - - - - - - - - - - // ******************************************************************** // Exact computation. // ******************************************************************** +/** \brief Computes the Linear Persistence Weighted Gaussian Kernel between two persistence diagrams. + * + * @param[in] PD1 first persistence diagram. + * @param[in] PD2 second persistence diagram. + * @param[in] sigma bandwidth parameter of the Gaussian Kernel used for the Kernel Mean Embedding of the diagrams. + * @param[in] weight weight function for the points in the diagrams. + * + */ +double lpwg(PD PD1, PD PD2, double sigma, double (*weight)(std::pair<double,double>) = [](std::pair<double,double> P){return atan(P.second - P.first);}){ + int num_pts1 = PD1.size(); int num_pts2 = PD2.size(); double k = 0; + for(int i = 0; i < num_pts1; i++) + for(int j = 0; j < num_pts2; j++) + k += (*weight)(PD1[i])*(*weight)(PD2[j])*exp(-(pow(PD1[i].first-PD2[j].first,2) + pow(PD1[i].second-PD2[j].second,2))/(2*pow(sigma,2))); + return k; +} + +/** \brief Computes the Persistence Scale Space Kernel between two persistence diagrams. + * + * @param[in] PD1 first persistence diagram. + * @param[in] PD2 second persistence diagram. + * @param[in] sigma bandwidth parameter of the Gaussian Kernel used for the Kernel Mean Embedding of the diagrams. + * + */ +double pss(PD PD1, PD PD2, double sigma){ + PD pd1 = PD1; int numpts = PD1.size(); for(int i = 0; i < numpts; i++) pd1.push_back(std::pair<double,double>(PD1[i].second,PD1[i].first)); + PD pd2 = PD2; numpts = PD2.size(); for(int i = 0; i < numpts; i++) pd2.push_back(std::pair<double,double>(PD2[i].second,PD2[i].first)); + return lpwg(pd1, pd2, 2*sqrt(sigma), &pss_weight) / (2*8*NUMPI*sigma); +} +/** \brief Computes the Gaussian Persistence Weighted Gaussian Kernel between two persistence diagrams. + * + * @param[in] PD1 first persistence diagram. + * @param[in] PD2 second persistence diagram. + * @param[in] sigma bandwidth parameter of the Gaussian Kernel used for the Kernel Mean Embedding of the diagrams. + * @param[in] tau bandwidth parameter of the Gaussian Kernel used between the embeddings. + * @param[in] weight weight function for the points in the diagrams. + * + */ +double gpwg(PD PD1, PD PD2, double sigma, double tau, double (*weight)(std::pair<double,double>) = [](std::pair<double,double> P){return atan(P.second - P.first);}){ + double k1 = lpwg(PD1,PD1,sigma,weight); + double k2 = lpwg(PD2,PD2,sigma,weight); + double k3 = lpwg(PD1,PD2,sigma,weight); + return exp( - (k1+k2-2*k3) / (2*pow(tau,2)) ); +} + +/** \brief Computes the RKHS distance induced by the Gaussian Kernel Embedding between two persistence diagrams. + * + * @param[in] PD1 first persistence diagram. + * @param[in] PD2 second persistence diagram. + * @param[in] sigma bandwidth parameter of the Gaussian Kernel used for the Kernel Mean Embedding of the diagrams. + * @param[in] weight weight function for the points in the diagrams. + * + */ +double dpwg(PD PD1, PD PD2, double sigma, double (*weight)(std::pair<double,double>) = [](std::pair<double,double> P){return atan(P.second - P.first);}){ + double k1 = lpwg(PD1,PD1,sigma,weight); + double k2 = lpwg(PD2,PD2,sigma,weight); + double k3 = lpwg(PD1,PD2,sigma,weight); + return std::sqrt(k1+k2-2*k3); +} // Compute the angle formed by two points of a PD double compute_angle(const PD & PersDiag, const int & i, const int & j){ @@ -195,7 +203,7 @@ double compute_sw(const std::vector<std::vector<std::pair<int,double> > > & V1, while(theta1 != NUMPI/2){ if(PD1[U[ku].first].first != PD2[V[kv].first].first || PD1[U[ku].first].second != PD2[V[kv].first].second) if(theta1 != theta2) - sw += Gudhi::sliced_wasserstein::compute_int(theta1, theta2, U[ku].first, V[kv].first, PD1, PD2); + sw += compute_int(theta1, theta2, U[ku].first, V[kv].first, PD1, PD2); theta1 = theta2; if ( (theta2 == U[ku].second) && ku < U.size()-1 ) ku++; if ( (theta2 == V[kv].second) && kv < V.size()-1 ) kv++; @@ -205,29 +213,13 @@ double compute_sw(const std::vector<std::vector<std::pair<int,double> > > & V1, return sw/NUMPI; } - - - - - - - - - - - - - - - - - - -/** \brief Computes the Sliced Wasserstein distance between two persistence diagrams +/** \brief Computes the Sliced Wasserstein distance between two persistence diagrams. + * + * @param[in] PD1 first persistence diagram. + * @param[in] PD2 second persistence diagram. * */ - -double compute_exact_SW(PD PD1, PD PD2){ + double sw(PD PD1, PD PD2){ // Add projections onto diagonal. int n1, n2; n1 = PD1.size(); n2 = PD2.size(); double max_ordinate = std::numeric_limits<double>::lowest(); @@ -254,7 +246,7 @@ double compute_exact_SW(PD PD1, PD PD2){ std::vector<std::pair<double, std::pair<int,int> > > angles1, angles2; for (int i = 0; i < N; i++){ for (int j = i+1; j < N; j++){ - double theta1 = Gudhi::sliced_wasserstein::compute_angle(PD1,i,j); double theta2 = Gudhi::sliced_wasserstein::compute_angle(PD2,i,j); + double theta1 = compute_angle(PD1,i,j); double theta2 = compute_angle(PD2,i,j); angles1.push_back(std::pair<double, std::pair<int,int> >(theta1, std::pair<int,int>(i,j))); angles2.push_back(std::pair<double, std::pair<int,int> >(theta2, std::pair<int,int>(i,j))); } @@ -300,17 +292,156 @@ double compute_exact_SW(PD PD1, PD PD2){ } // Compute the SW distance with the list of inversions. - return Gudhi::sliced_wasserstein::compute_sw(anglePerm1, anglePerm2, PD1, PD2); + return compute_sw(anglePerm1, anglePerm2, PD1, PD2); + +} + + + + + + + + +// ******************************************************************** +// Approximate computation. +// ******************************************************************** + +double approx_lpwg_Fourier(const std::vector<std::pair<double,double> >& B1, const std::vector<std::pair<double,double> >& B2){ + double d = 0; int M = B1.size(); + for(int i = 0; i < M; i++) d += B1[i].first*B2[i].first + B1[i].second*B2[i].second; + return (1.0/M)*d; } +double approx_gpwg_Fourier(const std::vector<std::pair<double,double> >& B1, const std::vector<std::pair<double,double> >& B2, double tau){ + int M = B1.size(); + double d3 = approx_lpwg_Fourier(B1, B2); + double d1 = 0; double d2 = 0; + for(int i = 0; i < M; i++){d1 += pow(B1[i].first,2) + pow(B1[i].second,2); d2 += pow(B2[i].first,2) + pow(B2[i].second,2);} + return exp( -((1.0/M)*(d1+d2)-2*d3) / (2*pow(tau,2)) ); +} +double approx_dpwg_Fourier(const std::vector<std::pair<double,double> >& B1, const std::vector<std::pair<double,double> >& B2){ + int M = B1.size(); + double d3 = approx_lpwg_Fourier(B1, B2); + double d1 = 0; double d2 = 0; + for(int i = 0; i < M; i++){d1 += pow(B1[i].first,2) + pow(B1[i].second,2); d2 += pow(B2[i].first,2) + pow(B2[i].second,2);} + return std::sqrt((1.0/M)*(d1+d2)-2*d3); +} +std::vector<std::pair<double,double> > Fourier_feat(PD D, std::vector<std::pair<double,double> > Z, double (*weight)(std::pair<double,double>) = [](std::pair<double,double> P){return atan(P.second - P.first);}){ + int m = D.size(); std::vector<std::pair<double,double> > B; int M = Z.size(); + for(int i = 0; i < M; i++){ + double d1 = 0; double d2 = 0; double zx = Z[i].first; double zy = Z[i].second; + for(int j = 0; j < m; j++){ + double x = D[j].first; double y = D[j].second; + d1 += (*weight)(D[j])*cos(x*zx + y*zy); + d2 += (*weight)(D[j])*sin(x*zx + y*zy); + } + B.push_back(std::pair<double,double>(d1,d2)); + } + return B; +} + +std::vector<std::pair<double,double> > random_Fourier(double sigma, int M = 1000){ + std::normal_distribution<double> distrib(0,1); std::vector<std::pair<double,double> > Z; std::random_device rd; + for(int i = 0; i < M; i++){ + std::mt19937 e1(rd()); std::mt19937 e2(rd()); + double zx = distrib(e1); double zy = distrib(e2); + Z.push_back(std::pair<double,double>((1.0/sigma)*zx,(1.0/sigma)*zy)); + } + return Z; +} +/** \brief Computes an approximation of the Linear Persistence Weighted Gaussian Kernel between two persistence diagrams with random Fourier features. + * + * @param[in] PD1 first persistence diagram. + * @param[in] PD2 second persistence diagram. + * @param[in] sigma bandwidth parameter of the Gaussian Kernel used for the Kernel Mean Embedding of the diagrams. + * @param[in] weight weight function for the points in the diagrams. + * @param[in] M number of Fourier features. + * + */ +double approx_lpwg(PD PD1, PD PD2, double sigma, double (*weight)(std::pair<double,double>) = [](std::pair<double,double> P){return atan(P.second - P.first);}, int M = 1000){ + std::vector<std::pair<double,double> > Z = random_Fourier(sigma, M); + std::vector<std::pair<double,double> > B1 = Fourier_feat(PD1,Z,weight); + std::vector<std::pair<double,double> > B2 = Fourier_feat(PD2,Z,weight); + return approx_lpwg_Fourier(B1,B2); +} + +/** \brief Computes an approximation of the Persistence Scale Space Kernel between two persistence diagrams with random Fourier features. + * + * @param[in] PD1 first persistence diagram. + * @param[in] PD2 second persistence diagram. + * @param[in] sigma bandwidth parameter of the Gaussian Kernel used for the Kernel Mean Embedding of the diagrams. + * @param[in] M number of Fourier features. + * + */ +double approx_pss(PD PD1, PD PD2, double sigma, int M = 1000){ + PD pd1 = PD1; int numpts = PD1.size(); for(int i = 0; i < numpts; i++) pd1.push_back(std::pair<double,double>(PD1[i].second,PD1[i].first)); + PD pd2 = PD2; numpts = PD2.size(); for(int i = 0; i < numpts; i++) pd2.push_back(std::pair<double,double>(PD2[i].second,PD2[i].first)); + return approx_lpwg(pd1, pd2, 2*sqrt(sigma), &pss_weight, M) / (2*8*NUMPI*sigma); +} + + +/** \brief Computes an approximation of the Gaussian Persistence Weighted Gaussian Kernel between two persistence diagrams with random Fourier features. + * + * @param[in] PD1 first persistence diagram. + * @param[in] PD2 second persistence diagram. + * @param[in] sigma bandwidth parameter of the Gaussian Kernel used for the Kernel Mean Embedding of the diagrams. + * @param[in] tau bandwidth parameter of the Gaussian Kernel used between the embeddings. + * @param[in] weight weight function for the points in the diagrams. + * @param[in] M number of Fourier features. + * + */ +double approx_gpwg(PD PD1, PD PD2, double sigma, double tau, double (*weight)(std::pair<double,double>) = [](std::pair<double,double> P){return atan(P.second - P.first);}, int M = 1000){ + std::vector<std::pair<double,double> > Z = random_Fourier(sigma, M); + std::vector<std::pair<double,double> > B1 = Fourier_feat(PD1,Z,weight); + std::vector<std::pair<double,double> > B2 = Fourier_feat(PD2,Z,weight); + return approx_gpwg_Fourier(B1,B2,tau); +} + + +/** \brief Computes an approximation of the Sliced Wasserstein distance between two persistence diagrams. + * + * @param[in] PD1 first persistence diagram. + * @param[in] PD2 second persistence diagram. + * @param[in] N number of points sampled on the circle. + * + */ +double approx_sw(PD PD1, PD PD2, int N = 100){ + + double step = NUMPI/N; double sw = 0; + + // Add projections onto diagonal. + int n1, n2; n1 = PD1.size(); n2 = PD2.size(); + for (int i = 0; i < n2; i++) + PD1.push_back(std::pair<double,double>( (PD2[i].first + PD2[i].second)/2, (PD2[i].first + PD2[i].second)/2) ); + for (int i = 0; i < n1; i++) + PD2.push_back(std::pair<double,double>( (PD1[i].first + PD1[i].second)/2, (PD1[i].first + PD1[i].second)/2) ); + int n = PD1.size(); + + // Sort and compare all projections. + //#pragma omp parallel for + for (int i = 0; i < N; i++){ + std::vector<std::pair<int,double> > L1, L2; + for (int j = 0; j < n; j++){ + L1.push_back( std::pair<int,double>(j, PD1[j].first*cos(-NUMPI/2+i*step) + PD1[j].second*sin(-NUMPI/2+i*step)) ); + L2.push_back( std::pair<int,double>(j, PD2[j].first*cos(-NUMPI/2+i*step) + PD2[j].second*sin(-NUMPI/2+i*step)) ); + } + std::sort(L1.begin(),L1.end(), myComp); std::sort(L2.begin(),L2.end(), myComp); + double f = 0; for (int j = 0; j < n; j++) f += std::abs(L1[j].second - L2[j].second); + sw += f*step; + } + return sw/NUMPI; +} + -}} -#endif +} // namespace kernel +} // namespace Gudhi +#endif //KERNEL_H_ |