From cc42bcdf3323f2eb6edeaca105d29b32b394ca66 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Tue, 2 Jun 2020 07:33:31 +0200 Subject: Parallelism in pairwise_distances --- src/python/gudhi/representations/kernel_methods.py | 14 +++++------ src/python/gudhi/representations/metrics.py | 27 +++++++++++++++++----- 2 files changed, 28 insertions(+), 13 deletions(-) (limited to 'src/python/gudhi/representations') diff --git a/src/python/gudhi/representations/kernel_methods.py b/src/python/gudhi/representations/kernel_methods.py index 596f4f07..c9bd9d01 100644 --- a/src/python/gudhi/representations/kernel_methods.py +++ b/src/python/gudhi/representations/kernel_methods.py @@ -10,7 +10,7 @@ import numpy as np from sklearn.base import BaseEstimator, TransformerMixin from sklearn.metrics import pairwise_distances, pairwise_kernels -from .metrics import SlicedWassersteinDistance, PersistenceFisherDistance, _sklearn_wrapper, pairwise_persistence_diagram_distances, _sliced_wasserstein_distance, _persistence_fisher_distance +from .metrics import SlicedWassersteinDistance, PersistenceFisherDistance, _sklearn_wrapper, _pairwise, pairwise_persistence_diagram_distances, _sliced_wasserstein_distance, _persistence_fisher_distance from .preprocessing import Padding ############################################# @@ -60,7 +60,7 @@ def _persistence_scale_space_kernel(D1, D2, kernel_approx=None, bandwidth=1.): weight_pss = lambda x: 1 if x[1] >= x[0] else -1 return 0.5 * _persistence_weighted_gaussian_kernel(DD1, DD2, weight=weight_pss, kernel_approx=kernel_approx, bandwidth=bandwidth) -def pairwise_persistence_diagram_kernels(X, Y=None, kernel="sliced_wasserstein", **kwargs): +def pairwise_persistence_diagram_kernels(X, Y=None, kernel="sliced_wasserstein", n_jobs=None, **kwargs): """ This function computes the kernel matrix between two lists of persistence diagrams given as numpy arrays of shape (nx2). @@ -76,15 +76,15 @@ def pairwise_persistence_diagram_kernels(X, Y=None, kernel="sliced_wasserstein", XX = np.reshape(np.arange(len(X)), [-1,1]) YY = None if Y is None else np.reshape(np.arange(len(Y)), [-1,1]) if kernel == "sliced_wasserstein": - return np.exp(-pairwise_persistence_diagram_distances(X, Y, metric="sliced_wasserstein", num_directions=kwargs["num_directions"]) / kwargs["bandwidth"]) + return np.exp(-pairwise_persistence_diagram_distances(X, Y, metric="sliced_wasserstein", num_directions=kwargs["num_directions"], n_jobs=n_jobs) / kwargs["bandwidth"]) elif kernel == "persistence_fisher": - return np.exp(-pairwise_persistence_diagram_distances(X, Y, metric="persistence_fisher", kernel_approx=kwargs["kernel_approx"], bandwidth=kwargs["bandwidth"]) / kwargs["bandwidth_fisher"]) + return np.exp(-pairwise_persistence_diagram_distances(X, Y, metric="persistence_fisher", kernel_approx=kwargs["kernel_approx"], bandwidth=kwargs["bandwidth"], n_jobs=n_jobs) / kwargs["bandwidth_fisher"]) elif kernel == "persistence_scale_space": - return pairwise_kernels(XX, YY, metric=_sklearn_wrapper(_persistence_scale_space_kernel, X, Y, **kwargs)) + return _pairwise(pairwise_kernels, False, XX, YY, metric=_sklearn_wrapper(_persistence_scale_space_kernel, X, Y, **kwargs), n_jobs=n_jobs) elif kernel == "persistence_weighted_gaussian": - return pairwise_kernels(XX, YY, metric=_sklearn_wrapper(_persistence_weighted_gaussian_kernel, X, Y, **kwargs)) + return _pairwise(pairwise_kernels, False, XX, YY, metric=_sklearn_wrapper(_persistence_weighted_gaussian_kernel, X, Y, **kwargs), n_jobs=n_jobs) else: - return pairwise_kernels(XX, YY, metric=_sklearn_wrapper(metric, **kwargs)) + return _pairwise(pairwise_kernels, False, XX, YY, metric=_sklearn_wrapper(metric, **kwargs), n_jobs=n_jobs) class SlicedWassersteinKernel(BaseEstimator, TransformerMixin): """ diff --git a/src/python/gudhi/representations/metrics.py b/src/python/gudhi/representations/metrics.py index 8a32f7e9..23bccd68 100644 --- a/src/python/gudhi/representations/metrics.py +++ b/src/python/gudhi/representations/metrics.py @@ -12,6 +12,7 @@ from sklearn.base import BaseEstimator, TransformerMixin from sklearn.metrics import pairwise_distances from gudhi.hera import wasserstein_distance as hera_wasserstein_distance from .preprocessing import Padding +from joblib import Parallel, delayed, effective_n_jobs ############################################# # Metrics ################################### @@ -116,6 +117,20 @@ def _persistence_fisher_distance(D1, D2, kernel_approx=None, bandwidth=1.): vectorj = vectorj/vectorj_sum return np.arccos( min(np.dot(np.sqrt(vectori), np.sqrt(vectorj)), 1.) ) +def _pairwise(fallback, skipdiag, X, Y, metric, n_jobs): + if Y is not None: + return fallback(X, Y, metric=metric, n_jobs=n_jobs) + triu = np.triu_indices(len(X), k=skipdiag) + tril = (triu[1], triu[0]) + par = Parallel(n_jobs=n_jobs, prefer="threads") + d = par(delayed(metric)([triu[0][i]], [triu[1][i]]) for i in range(len(triu[0]))) + m = np.empty((len(X), len(X))) + m[triu] = d + m[tril] = d + if skipdiag: + np.fill_diagonal(m, 0) + return m + def _sklearn_wrapper(metric, X, Y, **kwargs): """ This function is a wrapper for any metric between two persistence diagrams that takes two numpy arrays of shapes (nx2) and (mx2) as arguments. @@ -134,7 +149,7 @@ PAIRWISE_DISTANCE_FUNCTIONS = { "persistence_fisher": _persistence_fisher_distance, } -def pairwise_persistence_diagram_distances(X, Y=None, metric="bottleneck", **kwargs): +def pairwise_persistence_diagram_distances(X, Y=None, metric="bottleneck", n_jobs=None, **kwargs): """ This function computes the distance matrix between two lists of persistence diagrams given as numpy arrays of shape (nx2). @@ -152,25 +167,25 @@ def pairwise_persistence_diagram_distances(X, Y=None, metric="bottleneck", **kwa if metric == "bottleneck": try: from .. import bottleneck_distance - return pairwise_distances(XX, YY, metric=_sklearn_wrapper(bottleneck_distance, X, Y, **kwargs)) + return _pairwise(pairwise_distances, True, XX, YY, metric=_sklearn_wrapper(bottleneck_distance, X, Y, **kwargs), n_jobs=n_jobs) except ImportError: print("Gudhi built without CGAL") raise elif metric == "pot_wasserstein": try: from gudhi.wasserstein import wasserstein_distance as pot_wasserstein_distance - return pairwise_distances(XX, YY, metric=_sklearn_wrapper(pot_wasserstein_distance, X, Y, **kwargs)) + return _pairwise(pairwise_distances, True, XX, YY, metric=_sklearn_wrapper(pot_wasserstein_distance, X, Y, **kwargs), n_jobs=n_jobs) except ImportError: print("POT (Python Optimal Transport) is not installed. Please install POT or use metric='wasserstein' or metric='hera_wasserstein'") raise elif metric == "sliced_wasserstein": Xproj = _compute_persistence_diagram_projections(X, **kwargs) Yproj = None if Y is None else _compute_persistence_diagram_projections(Y, **kwargs) - return pairwise_distances(XX, YY, metric=_sklearn_wrapper(_sliced_wasserstein_distance_on_projections, Xproj, Yproj)) + return _pairwise(pairwise_distances, True, XX, YY, metric=_sklearn_wrapper(_sliced_wasserstein_distance_on_projections, Xproj, Yproj), n_jobs=n_jobs) elif type(metric) == str: - return pairwise_distances(XX, YY, metric=_sklearn_wrapper(PAIRWISE_DISTANCE_FUNCTIONS[metric], X, Y, **kwargs)) + return _pairwise(pairwise_distances, True, XX, YY, metric=_sklearn_wrapper(PAIRWISE_DISTANCE_FUNCTIONS[metric], X, Y, **kwargs), n_jobs=n_jobs) else: - return pairwise_distances(XX, YY, metric=_sklearn_wrapper(metric, X, Y, **kwargs)) + return _pairwise(pairwise_distances, True, XX, YY, metric=_sklearn_wrapper(metric, X, Y, **kwargs), n_jobs=n_jobs) class SlicedWassersteinDistance(BaseEstimator, TransformerMixin): """ -- cgit v1.2.3 From aebfa07217f58943753967cfd3d7daa9f28d5650 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Tue, 2 Jun 2020 17:56:18 +0200 Subject: More n_jobs in metrics --- src/python/gudhi/representations/metrics.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) (limited to 'src/python/gudhi/representations') diff --git a/src/python/gudhi/representations/metrics.py b/src/python/gudhi/representations/metrics.py index 23bccd68..84907160 100644 --- a/src/python/gudhi/representations/metrics.py +++ b/src/python/gudhi/representations/metrics.py @@ -12,7 +12,7 @@ from sklearn.base import BaseEstimator, TransformerMixin from sklearn.metrics import pairwise_distances from gudhi.hera import wasserstein_distance as hera_wasserstein_distance from .preprocessing import Padding -from joblib import Parallel, delayed, effective_n_jobs +from joblib import Parallel, delayed ############################################# # Metrics ################################### @@ -157,6 +157,7 @@ def pairwise_persistence_diagram_distances(X, Y=None, metric="bottleneck", n_job X (list of n numpy arrays of shape (numx2)): first list of persistence diagrams. Y (list of m numpy arrays of shape (numx2)): second list of persistence diagrams (optional). If None, pairwise distances are computed from the first list only. metric: distance to use. It can be either a string ("sliced_wasserstein", "wasserstein", "hera_wasserstein" (Wasserstein distance computed with Hera---note that Hera is also used for the default option "wasserstein"), "pot_wasserstein" (Wasserstein distance computed with POT), "bottleneck", "persistence_fisher") or a function taking two numpy arrays of shape (nx2) and (mx2) as inputs. If it is a function, make sure that it is symmetric and that it outputs 0 if called on the same two arrays. + n_jobs (int): number of jobs to use for the computation. This uses joblib.Parallel(prefer="threads"), so metrics that do not release the GIL may not scale unless run inside a `joblib.parallel_backend `_ block. **kwargs: optional keyword parameters. Any further parameters are passed directly to the distance function. See the docs of the various distance classes in this module. Returns: @@ -191,14 +192,16 @@ class SlicedWassersteinDistance(BaseEstimator, TransformerMixin): """ This is a class for computing the sliced Wasserstein distance matrix from a list of persistence diagrams. The Sliced Wasserstein distance is computed by projecting the persistence diagrams onto lines, comparing the projections with the 1-norm, and finally integrating over all possible lines. See http://proceedings.mlr.press/v70/carriere17a.html for more details. """ - def __init__(self, num_directions=10): + def __init__(self, num_directions=10, n_jobs=None): """ Constructor for the SlicedWassersteinDistance class. Parameters: num_directions (int): number of lines evenly sampled from [-pi/2,pi/2] in order to approximate and speed up the distance computation (default 10). + n_jobs (int): number of jobs to use for the computation. See :func:`pairwise_persistence_diagram_distances` for details. """ self.num_directions = num_directions + self.n_jobs = n_jobs def fit(self, X, y=None): """ @@ -221,7 +224,7 @@ class SlicedWassersteinDistance(BaseEstimator, TransformerMixin): Returns: numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise sliced Wasserstein distances. """ - return pairwise_persistence_diagram_distances(X, self.diagrams_, metric="sliced_wasserstein", num_directions=self.num_directions) + return pairwise_persistence_diagram_distances(X, self.diagrams_, metric="sliced_wasserstein", num_directions=self.num_directions, n_jobs=self.n_jobs) def __call__(self, diag1, diag2): """ @@ -242,14 +245,16 @@ class BottleneckDistance(BaseEstimator, TransformerMixin): :Requires: `CGAL `_ :math:`\geq` 4.11.0 """ - def __init__(self, epsilon=None): + def __init__(self, epsilon=None, n_jobs=None): """ Constructor for the BottleneckDistance class. Parameters: epsilon (double): absolute (additive) error tolerated on the distance (default is the smallest positive float), see :func:`gudhi.bottleneck_distance`. + n_jobs (int): number of jobs to use for the computation. See :func:`pairwise_persistence_diagram_distances` for details. """ self.epsilon = epsilon + self.n_jobs = n_jobs def fit(self, X, y=None): """ @@ -272,7 +277,7 @@ class BottleneckDistance(BaseEstimator, TransformerMixin): Returns: numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise bottleneck distances. """ - Xfit = pairwise_persistence_diagram_distances(X, self.diagrams_, metric="bottleneck", e=self.epsilon) + Xfit = pairwise_persistence_diagram_distances(X, self.diagrams_, metric="bottleneck", e=self.epsilon, n_jobs=self.n_jobs) return Xfit def __call__(self, diag1, diag2): @@ -297,15 +302,17 @@ class PersistenceFisherDistance(BaseEstimator, TransformerMixin): """ This is a class for computing the persistence Fisher distance matrix from a list of persistence diagrams. The persistence Fisher distance is obtained by computing the original Fisher distance between the probability distributions associated to the persistence diagrams given by convolving them with a Gaussian kernel. See http://papers.nips.cc/paper/8205-persistence-fisher-kernel-a-riemannian-manifold-kernel-for-persistence-diagrams for more details. """ - def __init__(self, bandwidth=1., kernel_approx=None): + def __init__(self, bandwidth=1., kernel_approx=None, n_jobs=None): """ Constructor for the PersistenceFisherDistance class. Parameters: bandwidth (double): bandwidth of the Gaussian kernel used to turn persistence diagrams into probability distributions (default 1.). kernel_approx (class): kernel approximation class used to speed up computation (default None). Common kernel approximations classes can be found in the scikit-learn library (such as RBFSampler for instance). + n_jobs (int): number of jobs to use for the computation. See :func:`pairwise_persistence_diagram_distances` for details. """ self.bandwidth, self.kernel_approx = bandwidth, kernel_approx + self.n_jobs = n_jobs def fit(self, X, y=None): """ @@ -328,7 +335,7 @@ class PersistenceFisherDistance(BaseEstimator, TransformerMixin): Returns: numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise persistence Fisher distances. """ - return pairwise_persistence_diagram_distances(X, self.diagrams_, metric="persistence_fisher", bandwidth=self.bandwidth, kernel_approx=self.kernel_approx) + return pairwise_persistence_diagram_distances(X, self.diagrams_, metric="persistence_fisher", bandwidth=self.bandwidth, kernel_approx=self.kernel_approx, n_jobs=self.n_jobs) def __call__(self, diag1, diag2): """ @@ -347,7 +354,7 @@ class WassersteinDistance(BaseEstimator, TransformerMixin): """ This is a class for computing the Wasserstein distance matrix from a list of persistence diagrams. """ - def __init__(self, order=2, internal_p=2, mode="pot", delta=0.01): + def __init__(self, order=2, internal_p=2, mode="pot", delta=0.01, n_jobs=None): """ Constructor for the WassersteinDistance class. @@ -356,10 +363,12 @@ class WassersteinDistance(BaseEstimator, TransformerMixin): internal_p (int): ground metric on the (upper-half) plane (i.e. norm l_p in R^2), default value is 2 (euclidean norm), see :func:`gudhi.wasserstein.wasserstein_distance`. mode (str): method for computing Wasserstein distance. Either "pot" or "hera". delta (float): relative error 1+delta. Used only if mode == "hera". + n_jobs (int): number of jobs to use for the computation. See :func:`pairwise_persistence_diagram_distances` for details. """ self.order, self.internal_p, self.mode = order, internal_p, mode self.metric = "pot_wasserstein" if mode == "pot" else "hera_wasserstein" self.delta = delta + self.n_jobs = n_jobs def fit(self, X, y=None): """ @@ -383,9 +392,9 @@ class WassersteinDistance(BaseEstimator, TransformerMixin): numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise Wasserstein distances. """ if self.metric == "hera_wasserstein": - Xfit = pairwise_persistence_diagram_distances(X, self.diagrams_, metric=self.metric, order=self.order, internal_p=self.internal_p, delta=self.delta) + Xfit = pairwise_persistence_diagram_distances(X, self.diagrams_, metric=self.metric, order=self.order, internal_p=self.internal_p, delta=self.delta, n_jobs=self.n_jobs) else: - Xfit = pairwise_persistence_diagram_distances(X, self.diagrams_, metric=self.metric, order=self.order, internal_p=self.internal_p, matching=False) + Xfit = pairwise_persistence_diagram_distances(X, self.diagrams_, metric=self.metric, order=self.order, internal_p=self.internal_p, matching=False, n_jobs=self.n_jobs) return Xfit def __call__(self, diag1, diag2): -- cgit v1.2.3 From 7706056bb9c0396188201570f9399e636df63df7 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Tue, 2 Jun 2020 18:08:52 +0200 Subject: n_jobs for kernels --- src/python/gudhi/representations/kernel_methods.py | 25 +++++++++++++++------- 1 file changed, 17 insertions(+), 8 deletions(-) (limited to 'src/python/gudhi/representations') diff --git a/src/python/gudhi/representations/kernel_methods.py b/src/python/gudhi/representations/kernel_methods.py index c9bd9d01..6e4f0619 100644 --- a/src/python/gudhi/representations/kernel_methods.py +++ b/src/python/gudhi/representations/kernel_methods.py @@ -68,6 +68,7 @@ def pairwise_persistence_diagram_kernels(X, Y=None, kernel="sliced_wasserstein", X (list of n numpy arrays of shape (numx2)): first list of persistence diagrams. Y (list of m numpy arrays of shape (numx2)): second list of persistence diagrams (optional). If None, pairwise kernel values are computed from the first list only. kernel: kernel to use. It can be either a string ("sliced_wasserstein", "persistence_scale_space", "persistence_weighted_gaussian", "persistence_fisher") or a function taking two numpy arrays of shape (nx2) and (mx2) as inputs. If it is a function, make sure that it is symmetric. + n_jobs (int): number of jobs to use for the computation. This uses joblib.Parallel(prefer="threads"), so kernels that do not release the GIL may not scale unless run inside a `joblib.parallel_backend `_ block. **kwargs: optional keyword parameters. Any further parameters are passed directly to the kernel function. See the docs of the various kernel classes in this module. Returns: @@ -90,16 +91,18 @@ class SlicedWassersteinKernel(BaseEstimator, TransformerMixin): """ This is a class for computing the sliced Wasserstein kernel matrix from a list of persistence diagrams. The sliced Wasserstein kernel is computed by exponentiating the corresponding sliced Wasserstein distance with a Gaussian kernel. See http://proceedings.mlr.press/v70/carriere17a.html for more details. """ - def __init__(self, num_directions=10, bandwidth=1.0): + def __init__(self, num_directions=10, bandwidth=1.0, n_jobs=None): """ Constructor for the SlicedWassersteinKernel class. Parameters: bandwidth (double): bandwidth of the Gaussian kernel applied to the sliced Wasserstein distance (default 1.). num_directions (int): number of lines evenly sampled from [-pi/2,pi/2] in order to approximate and speed up the kernel computation (default 10). + n_jobs (int): number of jobs to use for the computation. See :func:`pairwise_persistence_diagram_kernels` for details. """ self.bandwidth = bandwidth self.num_directions = num_directions + self.n_jobs = n_jobs def fit(self, X, y=None): """ @@ -122,7 +125,7 @@ class SlicedWassersteinKernel(BaseEstimator, TransformerMixin): Returns: numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise sliced Wasserstein kernel values. """ - return pairwise_persistence_diagram_kernels(X, self.diagrams_, kernel="sliced_wasserstein", bandwidth=self.bandwidth, num_directions=self.num_directions) + return pairwise_persistence_diagram_kernels(X, self.diagrams_, kernel="sliced_wasserstein", bandwidth=self.bandwidth, num_directions=self.num_directions, n_jobs=self.n_jobs) def __call__(self, diag1, diag2): """ @@ -141,7 +144,7 @@ class PersistenceWeightedGaussianKernel(BaseEstimator, TransformerMixin): """ This is a class for computing the persistence weighted Gaussian kernel matrix from a list of persistence diagrams. The persistence weighted Gaussian kernel is computed by convolving the persistence diagram points with weighted Gaussian kernels. See http://proceedings.mlr.press/v48/kusano16.html for more details. """ - def __init__(self, bandwidth=1., weight=lambda x: 1, kernel_approx=None): + def __init__(self, bandwidth=1., weight=lambda x: 1, kernel_approx=None, n_jobs=None): """ Constructor for the PersistenceWeightedGaussianKernel class. @@ -149,9 +152,11 @@ class PersistenceWeightedGaussianKernel(BaseEstimator, TransformerMixin): bandwidth (double): bandwidth of the Gaussian kernel with which persistence diagrams will be convolved (default 1.) weight (function): weight function for the persistence diagram points (default constant function, ie lambda x: 1). This function must be defined on 2D points, ie lists or numpy arrays of the form [p_x,p_y]. kernel_approx (class): kernel approximation class used to speed up computation (default None). Common kernel approximations classes can be found in the scikit-learn library (such as RBFSampler for instance). + n_jobs (int): number of jobs to use for the computation. See :func:`pairwise_persistence_diagram_kernels` for details. """ self.bandwidth, self.weight = bandwidth, weight self.kernel_approx = kernel_approx + self.n_jobs = n_jobs def fit(self, X, y=None): """ @@ -174,7 +179,7 @@ class PersistenceWeightedGaussianKernel(BaseEstimator, TransformerMixin): Returns: numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise persistence weighted Gaussian kernel values. """ - return pairwise_persistence_diagram_kernels(X, self.diagrams_, kernel="persistence_weighted_gaussian", bandwidth=self.bandwidth, weight=self.weight, kernel_approx=self.kernel_approx) + return pairwise_persistence_diagram_kernels(X, self.diagrams_, kernel="persistence_weighted_gaussian", bandwidth=self.bandwidth, weight=self.weight, kernel_approx=self.kernel_approx, n_jobs=self.n_jobs) def __call__(self, diag1, diag2): """ @@ -193,15 +198,17 @@ class PersistenceScaleSpaceKernel(BaseEstimator, TransformerMixin): """ This is a class for computing the persistence scale space kernel matrix from a list of persistence diagrams. The persistence scale space kernel is computed by adding the symmetric to the diagonal of each point in each persistence diagram, with negative weight, and then convolving the points with a Gaussian kernel. See https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Reininghaus_A_Stable_Multi-Scale_2015_CVPR_paper.pdf for more details. """ - def __init__(self, bandwidth=1., kernel_approx=None): + def __init__(self, bandwidth=1., kernel_approx=None, n_jobs=None): """ Constructor for the PersistenceScaleSpaceKernel class. Parameters: bandwidth (double): bandwidth of the Gaussian kernel with which persistence diagrams will be convolved (default 1.) kernel_approx (class): kernel approximation class used to speed up computation (default None). Common kernel approximations classes can be found in the scikit-learn library (such as RBFSampler for instance). + n_jobs (int): number of jobs to use for the computation. See :func:`pairwise_persistence_diagram_kernels` for details. """ self.bandwidth, self.kernel_approx = bandwidth, kernel_approx + self.n_jobs = n_jobs def fit(self, X, y=None): """ @@ -224,7 +231,7 @@ class PersistenceScaleSpaceKernel(BaseEstimator, TransformerMixin): Returns: numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise persistence scale space kernel values. """ - return pairwise_persistence_diagram_kernels(X, self.diagrams_, kernel="persistence_scale_space", bandwidth=self.bandwidth, kernel_approx=self.kernel_approx) + return pairwise_persistence_diagram_kernels(X, self.diagrams_, kernel="persistence_scale_space", bandwidth=self.bandwidth, kernel_approx=self.kernel_approx, n_jobs=self.n_jobs) def __call__(self, diag1, diag2): """ @@ -243,7 +250,7 @@ class PersistenceFisherKernel(BaseEstimator, TransformerMixin): """ This is a class for computing the persistence Fisher kernel matrix from a list of persistence diagrams. The persistence Fisher kernel is computed by exponentiating the corresponding persistence Fisher distance with a Gaussian kernel. See papers.nips.cc/paper/8205-persistence-fisher-kernel-a-riemannian-manifold-kernel-for-persistence-diagrams for more details. """ - def __init__(self, bandwidth_fisher=1., bandwidth=1., kernel_approx=None): + def __init__(self, bandwidth_fisher=1., bandwidth=1., kernel_approx=None, n_jobs=None): """ Constructor for the PersistenceFisherKernel class. @@ -251,9 +258,11 @@ class PersistenceFisherKernel(BaseEstimator, TransformerMixin): bandwidth (double): bandwidth of the Gaussian kernel applied to the persistence Fisher distance (default 1.). bandwidth_fisher (double): bandwidth of the Gaussian kernel used to turn persistence diagrams into probability distributions by PersistenceFisherDistance class (default 1.). kernel_approx (class): kernel approximation class used to speed up computation (default None). Common kernel approximations classes can be found in the scikit-learn library (such as RBFSampler for instance). + n_jobs (int): number of jobs to use for the computation. See :func:`pairwise_persistence_diagram_kernels` for details. """ self.bandwidth = bandwidth self.bandwidth_fisher, self.kernel_approx = bandwidth_fisher, kernel_approx + self.n_jobs = n_jobs def fit(self, X, y=None): """ @@ -276,7 +285,7 @@ class PersistenceFisherKernel(BaseEstimator, TransformerMixin): Returns: numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise persistence Fisher kernel values. """ - return pairwise_persistence_diagram_kernels(X, self.diagrams_, kernel="persistence_fisher", bandwidth=self.bandwidth, bandwidth_fisher=self.bandwidth_fisher, kernel_approx=self.kernel_approx) + return pairwise_persistence_diagram_kernels(X, self.diagrams_, kernel="persistence_fisher", bandwidth=self.bandwidth, bandwidth_fisher=self.bandwidth_fisher, kernel_approx=self.kernel_approx, n_jobs=self.n_jobs) def __call__(self, diag1, diag2): """ -- cgit v1.2.3 From 69852030a6d1b68f3283b5727c6b944a9c7f5e73 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Tue, 2 Jun 2020 21:07:29 +0200 Subject: Some test --- src/python/gudhi/representations/kernel_methods.py | 2 +- src/python/gudhi/representations/metrics.py | 2 +- src/python/test/test_representations.py | 33 +++++++++++++++++++++- 3 files changed, 34 insertions(+), 3 deletions(-) (limited to 'src/python/gudhi/representations') diff --git a/src/python/gudhi/representations/kernel_methods.py b/src/python/gudhi/representations/kernel_methods.py index 6e4f0619..23fd23c7 100644 --- a/src/python/gudhi/representations/kernel_methods.py +++ b/src/python/gudhi/representations/kernel_methods.py @@ -75,7 +75,7 @@ def pairwise_persistence_diagram_kernels(X, Y=None, kernel="sliced_wasserstein", numpy array of shape (nxm): kernel matrix. """ XX = np.reshape(np.arange(len(X)), [-1,1]) - YY = None if Y is None else np.reshape(np.arange(len(Y)), [-1,1]) + YY = None if Y is None or Y is X else np.reshape(np.arange(len(Y)), [-1,1]) if kernel == "sliced_wasserstein": return np.exp(-pairwise_persistence_diagram_distances(X, Y, metric="sliced_wasserstein", num_directions=kwargs["num_directions"], n_jobs=n_jobs) / kwargs["bandwidth"]) elif kernel == "persistence_fisher": diff --git a/src/python/gudhi/representations/metrics.py b/src/python/gudhi/representations/metrics.py index 84907160..cf2e0879 100644 --- a/src/python/gudhi/representations/metrics.py +++ b/src/python/gudhi/representations/metrics.py @@ -164,7 +164,7 @@ def pairwise_persistence_diagram_distances(X, Y=None, metric="bottleneck", n_job numpy array of shape (nxm): distance matrix """ XX = np.reshape(np.arange(len(X)), [-1,1]) - YY = None if Y is None else np.reshape(np.arange(len(Y)), [-1,1]) + YY = None if Y is None or Y is X else np.reshape(np.arange(len(Y)), [-1,1]) if metric == "bottleneck": try: from .. import bottleneck_distance diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py index dba7f952..589cee00 100755 --- a/src/python/test/test_representations.py +++ b/src/python/test/test_representations.py @@ -1,12 +1,43 @@ import os import sys import matplotlib.pyplot as plt +import numpy as np +import pytest + def test_representations_examples(): # Disable graphics for testing purposes - plt.show = lambda:None + plt.show = lambda: None here = os.path.dirname(os.path.realpath(__file__)) sys.path.append(here + "/../example") import diagram_vectorizations_distances_kernels return None + + +from gudhi.representations.metrics import * +from gudhi.representations.kernel_methods import * + + +def _n_diags(n): + l = [] + for _ in range(n): + a = np.random.rand(50, 2) + a[:, 1] += a[:, 0] # So that y >= x + l.append(a) + return l + + +def test_multiple(): + l1 = _n_diags(9) + l2 = _n_diags(11) + l1b = l1.copy() + d1 = pairwise_persistence_diagram_distances(l1, e=0.00001, n_jobs=4) + d2 = BottleneckDistance(epsilon=0.00001).fit_transform(l1) + d3 = pairwise_persistence_diagram_distances(l1, l1b, e=0.00001, n_jobs=4) + assert d1 == pytest.approx(d2) + assert d3 == pytest.approx(d2, abs=1e-5) # Because of 0 entries (on the diagonal) + d1 = pairwise_persistence_diagram_distances(l1, l2, metric="wasserstein", order=2, internal_p=2) + d2 = WassersteinDistance(order=2, internal_p=2, n_jobs=4).fit(l2).transform(l1) + print(d1.shape, d2.shape) + assert d1 == pytest.approx(d2, rel=.02) -- cgit v1.2.3