From 9c45abcdf165519c58d59556dea74fd9f27c8396 Mon Sep 17 00:00:00 2001 From: martinroyer Date: Mon, 8 Jun 2020 15:56:34 +0200 Subject: ATOL introduction as finite vectorisation method --- src/python/gudhi/representations/vector_methods.py | 128 ++++++++++++++++++++- 1 file changed, 125 insertions(+), 3 deletions(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 46fee086..df66ffc3 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -1,16 +1,17 @@ # This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. # See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. -# Author(s): Mathieu Carrière +# Author(s): Mathieu Carrière, Martin Royer # -# Copyright (C) 2018-2019 Inria +# Copyright (C) 2018-2020 Inria # # Modification(s): -# - YYYY/MM Author: Description of the modification +# - 2020/06 Martin: ATOL integration import numpy as np from sklearn.base import BaseEstimator, TransformerMixin from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler from sklearn.neighbors import DistanceMetric +from sklearn.metrics import pairwise from .preprocessing import DiagramScaler, BirthPersistenceTransform @@ -574,3 +575,124 @@ class ComplexPolynomial(BaseEstimator, TransformerMixin): numpy array with shape (**threshold**): output complex vector of coefficients. """ return self.fit_transform([diag])[0,:] + +def _lapl_contrast(measure, centers, inertias, eps=1e-8): + """contrast function for vectorising `measure` in ATOL""" + return np.exp(-np.sqrt(pairwise.pairwise_distances(measure, Y=centers) / (inertias + eps))) + +def _gaus_contrast(measure, centers, inertias, eps=1e-8): + """contrast function for vectorising `measure` in ATOL""" + return np.exp(-pairwise.pairwise_distances(measure, Y=centers) / (inertias + eps)) + +def _indicator_contrast(diags, centers, inertias, eps=1e-8): + """contrast function for vectorising `measure` in ATOL""" + pair_dist = pairwise.pairwise_distances(diags, Y=centers) + flat_circ = (pair_dist < (inertias+eps)).astype(int) + robe_curve = np.positive((2-pair_dist/(inertias+eps))*((inertias+eps) < pair_dist).astype(int)) + return flat_circ + robe_curve + +def _cloud_weighting(measure): + """automatic uniform weighting with mass 1 for `measure` in ATOL""" + return np.ones(shape=measure.shape[0]) + +def _iidproba_weighting(measure): + """automatic uniform weighting with mass 1/N for `measure` in ATOL""" + return np.ones(shape=measure.shape[0]) / measure.shape[0] + +class Atol(BaseEstimator, TransformerMixin): + """ + This class allows to vectorise measures (e.g. point clouds, persistence diagrams, etc) after a quantisation step. + + ATOL paper: https://arxiv.org/abs/1909.13472 + """ + def __init__(self, quantiser, weighting_method="cloud", contrast="gaus"): + """ + Constructor for the Atol measure vectorisation class. + + Parameters: + quantiser (Object): Object with `fit` (sklearn API consistent) and `cluster_centers` and `n_clusters` + attributes (default: MiniBatchKMeans()). This object will be fitted by the function `fit`. + weighting_method (function): constant generic function for weighting the measure points + choose from {"cloud", "iidproba"} + (default: constant function, i.e. the measure is seen as a point cloud by default). + This will have no impact if weights are provided along with measures all the way: `fit` and `transform`. + contrast (string): constant function for evaluating proximity of a measure with respect to centers + choose from {"gaus", "lapl", "indi"} + (default: laplacian contrast function, see page 3 in the ATOL paper). + """ + self.quantiser = quantiser + self.contrast = { + "gaus": _gaus_contrast, + "lapl": _lapl_contrast, + "indi": _indicator_contrast, + }.get(contrast, _gaus_contrast) + self.centers = np.ones(shape=(self.quantiser.n_clusters, 2))*np.inf + self.inertias = np.full(self.quantiser.n_clusters, np.nan) + self.weighting_method = { + "cloud" : _cloud_weighting, + "iidproba": _iidproba_weighting, + }.get(weighting_method, _cloud_weighting) + + def fit(self, X, y=None, sample_weight=None): + """ + Calibration step: fit centers to the sample measures and derive inertias between centers. + + Parameters: + X (list N x d numpy arrays): input measures in R^d from which to learn center locations and inertias + (measures can have different N). + y: Ignored, present for API consistency by convention. + sample_weight (list of numpy arrays): weights for each measure point in X, optional. + If None, the object's weighting_method will be used. + + Returns: + self + """ + if not hasattr(self.quantiser, 'fit'): + raise TypeError("quantiser %s has no `fit` attribute." % (self.quantiser)) + if len(X) < self.quantiser.n_clusters: + # in case there are not enough observations for fitting the quantiser, we add random points in [0, 1]^2 + # @Martin: perhaps this behaviour is to be externalised and a warning should be raised instead + random_points = np.random.rand(self.quantiser.n_clusters-len(X), X[0].shape[1]) + X.append(random_points) + if sample_weight is None: + sample_weight = np.concatenate([self.weighting_method(measure) for measure in X]) + + measures_concat = np.concatenate(X) + self.quantiser.fit(X=measures_concat, sample_weight=sample_weight) + self.centers = self.quantiser.cluster_centers_ + labels = np.argmin(pairwise.pairwise_distances(measures_concat, Y=self.centers), axis=1) + dist_centers = pairwise.pairwise_distances(self.centers) + np.fill_diagonal(dist_centers, np.inf) + self.inertias = np.min(dist_centers, axis=0)/2 + return self + + def __call__(self, measure, sample_weight=None): + """ + Apply measure vectorisation on a single measure. + + Parameters: + measure (n x d numpy array): input measure in R^d. + + Returns: + numpy array in R^self.quantiser.n_clusters. + """ + if sample_weight is None: + sample_weight = self.weighting_method(measure) + return np.sum(sample_weight * self.contrast(measure, self.centers, self.inertias.T).T, axis=1) + + def transform(self, X, sample_weight=None): + """ + Apply measure vectorisation on a list of measures. + + Parameters: + X (list N x d numpy arrays): input measures in R^d from which to learn center locations and inertias + (measures can have different N). + sample_weight (list of numpy arrays): weights for each measure point in X, optional. + If None, the object's weighting_method will be used. + + Returns: + numpy array with shape (number of measures) x (self.quantiser.n_clusters). + """ + if sample_weight is None: + sample_weight = [self.weighting_method(measure) for measure in X] + return np.stack([self(measure, sample_weight=weight) for measure, weight in zip(X, sample_weight)]) -- cgit v1.2.3 From 9b4de0e29a01552b4bb3f47fe0d3f01f5601c000 Mon Sep 17 00:00:00 2001 From: martinroyer <16647869+martinroyer@users.noreply.github.com> Date: Tue, 9 Jun 2020 08:42:30 +0200 Subject: Apply suggestions from code review --- src/python/gudhi/representations/vector_methods.py | 45 ++++++++++++++++------ 1 file changed, 33 insertions(+), 12 deletions(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index df66ffc3..a09b9356 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -577,26 +577,26 @@ class ComplexPolynomial(BaseEstimator, TransformerMixin): return self.fit_transform([diag])[0,:] def _lapl_contrast(measure, centers, inertias, eps=1e-8): - """contrast function for vectorising `measure` in ATOL""" + """contrast function for vectorising `measure` in ATOL""" return np.exp(-np.sqrt(pairwise.pairwise_distances(measure, Y=centers) / (inertias + eps))) def _gaus_contrast(measure, centers, inertias, eps=1e-8): - """contrast function for vectorising `measure` in ATOL""" + """contrast function for vectorising `measure` in ATOL""" return np.exp(-pairwise.pairwise_distances(measure, Y=centers) / (inertias + eps)) def _indicator_contrast(diags, centers, inertias, eps=1e-8): - """contrast function for vectorising `measure` in ATOL""" + """contrast function for vectorising `measure` in ATOL""" pair_dist = pairwise.pairwise_distances(diags, Y=centers) flat_circ = (pair_dist < (inertias+eps)).astype(int) robe_curve = np.positive((2-pair_dist/(inertias+eps))*((inertias+eps) < pair_dist).astype(int)) return flat_circ + robe_curve def _cloud_weighting(measure): - """automatic uniform weighting with mass 1 for `measure` in ATOL""" + """automatic uniform weighting with mass 1 for `measure` in ATOL""" return np.ones(shape=measure.shape[0]) def _iidproba_weighting(measure): - """automatic uniform weighting with mass 1/N for `measure` in ATOL""" + """automatic uniform weighting with mass 1/N for `measure` in ATOL""" return np.ones(shape=measure.shape[0]) / measure.shape[0] class Atol(BaseEstimator, TransformerMixin): @@ -611,20 +611,41 @@ class Atol(BaseEstimator, TransformerMixin): Parameters: quantiser (Object): Object with `fit` (sklearn API consistent) and `cluster_centers` and `n_clusters` - attributes (default: MiniBatchKMeans()). This object will be fitted by the function `fit`. - weighting_method (function): constant generic function for weighting the measure points + attributes. This object will be fitted by the function `fit`. + weighting_method (string): constant generic function for weighting the measure points choose from {"cloud", "iidproba"} (default: constant function, i.e. the measure is seen as a point cloud by default). This will have no impact if weights are provided along with measures all the way: `fit` and `transform`. contrast (string): constant function for evaluating proximity of a measure with respect to centers - choose from {"gaus", "lapl", "indi"} + choose from {"gaussian", "laplacian", "indicator"} (default: laplacian contrast function, see page 3 in the ATOL paper). - """ + + Example + -------- + >>> from sklearn.cluster import KMeans + >>> import numpy as np + >>> a = np.array([[1, 2, 4], [1, 4, 0], [1, 0, 4]]) + >>> b = np.array([[4, 2, 0], [4, 4, 0], [4, 0, 2]]) + >>> c = np.array([[3, 2, -1], [1, 2, -1]]) + >>> atol_vectoriser = Atol(quantiser=KMeans(n_clusters=2)) + >>> atol_vectoriser.fit(X=[a, b, c]) + >>> atol_vectoriser.centers + array([[ 2.6 , 2.8 , -0.4 ], + [ 2. , 0.66666667, 3.33333333]]) + >>> atol_vectoriser(a) + array([0.58394704, 1.0769395 ]) + >>> atol_vectoriser(c) + array([1.02816136, 0.23559623]) + >>> atol_vectoriser.transform(X=[a, b, c]) + array([[0.58394704, 1.0769395 ], + [1.04696684, 0.56203292], + [1.02816136, 0.23559623]]) + """ self.quantiser = quantiser self.contrast = { - "gaus": _gaus_contrast, - "lapl": _lapl_contrast, - "indi": _indicator_contrast, + "gaussian": _gaus_contrast, + "laplacian": _lapl_contrast, + "indicator": _indicator_contrast, }.get(contrast, _gaus_contrast) self.centers = np.ones(shape=(self.quantiser.n_clusters, 2))*np.inf self.inertias = np.full(self.quantiser.n_clusters, np.nan) -- cgit v1.2.3 From 285919ad4a19c6bf9ec11355cd32bc4b39014365 Mon Sep 17 00:00:00 2001 From: martinroyer <16647869+martinroyer@users.noreply.github.com> Date: Tue, 9 Jun 2020 09:12:18 +0200 Subject: fix minimal example --- src/python/gudhi/representations/vector_methods.py | 31 +++++++++++----------- 1 file changed, 16 insertions(+), 15 deletions(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index a09b9356..f77338b7 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -604,25 +604,11 @@ class Atol(BaseEstimator, TransformerMixin): This class allows to vectorise measures (e.g. point clouds, persistence diagrams, etc) after a quantisation step. ATOL paper: https://arxiv.org/abs/1909.13472 - """ - def __init__(self, quantiser, weighting_method="cloud", contrast="gaus"): - """ - Constructor for the Atol measure vectorisation class. - - Parameters: - quantiser (Object): Object with `fit` (sklearn API consistent) and `cluster_centers` and `n_clusters` - attributes. This object will be fitted by the function `fit`. - weighting_method (string): constant generic function for weighting the measure points - choose from {"cloud", "iidproba"} - (default: constant function, i.e. the measure is seen as a point cloud by default). - This will have no impact if weights are provided along with measures all the way: `fit` and `transform`. - contrast (string): constant function for evaluating proximity of a measure with respect to centers - choose from {"gaussian", "laplacian", "indicator"} - (default: laplacian contrast function, see page 3 in the ATOL paper). Example -------- >>> from sklearn.cluster import KMeans + >>> from gudhi.representations.vector_methods import Atol >>> import numpy as np >>> a = np.array([[1, 2, 4], [1, 4, 0], [1, 0, 4]]) >>> b = np.array([[4, 2, 0], [4, 4, 0], [4, 0, 2]]) @@ -641,6 +627,21 @@ class Atol(BaseEstimator, TransformerMixin): [1.04696684, 0.56203292], [1.02816136, 0.23559623]]) """ + def __init__(self, quantiser, weighting_method="cloud", contrast="gaus"): + """ + Constructor for the Atol measure vectorisation class. + + Parameters: + quantiser (Object): Object with `fit` (sklearn API consistent) and `cluster_centers` and `n_clusters` + attributes. This object will be fitted by the function `fit`. + weighting_method (string): constant generic function for weighting the measure points + choose from {"cloud", "iidproba"} + (default: constant function, i.e. the measure is seen as a point cloud by default). + This will have no impact if weights are provided along with measures all the way: `fit` and `transform`. + contrast (string): constant function for evaluating proximity of a measure with respect to centers + choose from {"gaussian", "laplacian", "indicator"} + (default: laplacian contrast function, see page 3 in the ATOL paper). + """ self.quantiser = quantiser self.contrast = { "gaussian": _gaus_contrast, -- cgit v1.2.3 From 5d5a2b1a3b6a2d3d2dc061e6e9c940677e782c80 Mon Sep 17 00:00:00 2001 From: martinroyer <16647869+martinroyer@users.noreply.github.com> Date: Tue, 9 Jun 2020 09:13:28 +0200 Subject: Update src/python/gudhi/representations/vector_methods.py --- src/python/gudhi/representations/vector_methods.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index f77338b7..16e91812 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -641,7 +641,7 @@ class Atol(BaseEstimator, TransformerMixin): contrast (string): constant function for evaluating proximity of a measure with respect to centers choose from {"gaussian", "laplacian", "indicator"} (default: laplacian contrast function, see page 3 in the ATOL paper). - """ + """ self.quantiser = quantiser self.contrast = { "gaussian": _gaus_contrast, -- cgit v1.2.3 From 588e7127d1616e40bf7e3de7e7797b54aee137da Mon Sep 17 00:00:00 2001 From: martinroyer <16647869+martinroyer@users.noreply.github.com> Date: Tue, 9 Jun 2020 09:42:40 +0200 Subject: tweak test results from doc --- src/python/gudhi/representations/vector_methods.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 16e91812..d3b85636 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -614,18 +614,17 @@ class Atol(BaseEstimator, TransformerMixin): >>> b = np.array([[4, 2, 0], [4, 4, 0], [4, 0, 2]]) >>> c = np.array([[3, 2, -1], [1, 2, -1]]) >>> atol_vectoriser = Atol(quantiser=KMeans(n_clusters=2)) - >>> atol_vectoriser.fit(X=[a, b, c]) - >>> atol_vectoriser.centers + >>> atol_vectoriser.fit(X=[a, b, c]).centers array([[ 2.6 , 2.8 , -0.4 ], - [ 2. , 0.66666667, 3.33333333]]) + [ 2. , 0.66666667, 3.33333333]]) >>> atol_vectoriser(a) array([0.58394704, 1.0769395 ]) >>> atol_vectoriser(c) array([1.02816136, 0.23559623]) >>> atol_vectoriser.transform(X=[a, b, c]) array([[0.58394704, 1.0769395 ], - [1.04696684, 0.56203292], - [1.02816136, 0.23559623]]) + [1.04696684, 0.56203292], + [1.02816136, 0.23559623]]) """ def __init__(self, quantiser, weighting_method="cloud", contrast="gaus"): """ -- cgit v1.2.3 From 7eed21c364e244df7fceae11ce9d1c319db8bec9 Mon Sep 17 00:00:00 2001 From: martinroyer <16647869+martinroyer@users.noreply.github.com> Date: Tue, 9 Jun 2020 11:21:29 +0200 Subject: Update src/python/gudhi/representations/vector_methods.py --- src/python/gudhi/representations/vector_methods.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index d3b85636..0a26a8e5 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -626,7 +626,7 @@ class Atol(BaseEstimator, TransformerMixin): [1.04696684, 0.56203292], [1.02816136, 0.23559623]]) """ - def __init__(self, quantiser, weighting_method="cloud", contrast="gaus"): + def __init__(self, quantiser, weighting_method="cloud", contrast="gaussian"): """ Constructor for the Atol measure vectorisation class. @@ -639,7 +639,7 @@ class Atol(BaseEstimator, TransformerMixin): This will have no impact if weights are provided along with measures all the way: `fit` and `transform`. contrast (string): constant function for evaluating proximity of a measure with respect to centers choose from {"gaussian", "laplacian", "indicator"} - (default: laplacian contrast function, see page 3 in the ATOL paper). + (default: gaussian contrast function, see page 3 in the ATOL paper). """ self.quantiser = quantiser self.contrast = { -- cgit v1.2.3 From a22b48e00ca858c0e9c300cee87f265d70aeecc7 Mon Sep 17 00:00:00 2001 From: martinroyer <16647869+martinroyer@users.noreply.github.com> Date: Tue, 9 Jun 2020 12:23:20 +0200 Subject: remove randomness in example --- src/python/gudhi/representations/vector_methods.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 0a26a8e5..98cd6153 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -613,7 +613,7 @@ class Atol(BaseEstimator, TransformerMixin): >>> a = np.array([[1, 2, 4], [1, 4, 0], [1, 0, 4]]) >>> b = np.array([[4, 2, 0], [4, 4, 0], [4, 0, 2]]) >>> c = np.array([[3, 2, -1], [1, 2, -1]]) - >>> atol_vectoriser = Atol(quantiser=KMeans(n_clusters=2)) + >>> atol_vectoriser = Atol(quantiser=KMeans(n_clusters=2, random_state=202006)) >>> atol_vectoriser.fit(X=[a, b, c]).centers array([[ 2.6 , 2.8 , -0.4 ], [ 2. , 0.66666667, 3.33333333]]) -- cgit v1.2.3 From 3d126356fd3fcaeb2bde8824b8c5894450fccdd9 Mon Sep 17 00:00:00 2001 From: martinroyer <16647869+martinroyer@users.noreply.github.com> Date: Tue, 9 Jun 2020 12:43:28 +0200 Subject: awful test --- src/python/gudhi/representations/vector_methods.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 98cd6153..77b2836f 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -615,16 +615,16 @@ class Atol(BaseEstimator, TransformerMixin): >>> c = np.array([[3, 2, -1], [1, 2, -1]]) >>> atol_vectoriser = Atol(quantiser=KMeans(n_clusters=2, random_state=202006)) >>> atol_vectoriser.fit(X=[a, b, c]).centers - array([[ 2.6 , 2.8 , -0.4 ], - [ 2. , 0.66666667, 3.33333333]]) + array([[ 2. , 0.66666667, 3.33333333], + [ 2.6 , 2.8 , -0.4 ]]) >>> atol_vectoriser(a) - array([0.58394704, 1.0769395 ]) + array([1.0769395 , 0.58394704]) >>> atol_vectoriser(c) - array([1.02816136, 0.23559623]) + array([0.23559623, 1.02816136]) >>> atol_vectoriser.transform(X=[a, b, c]) - array([[0.58394704, 1.0769395 ], - [1.04696684, 0.56203292], - [1.02816136, 0.23559623]]) + array([[1.0769395 , 0.58394704], + [0.56203292, 1.04696684], + [0.23559623, 1.02816136]]) """ def __init__(self, quantiser, weighting_method="cloud", contrast="gaussian"): """ -- cgit v1.2.3 From cdba6045ddf1dd41e8addb7351d1c87a5506ba0f Mon Sep 17 00:00:00 2001 From: martinroyer <16647869+martinroyer@users.noreply.github.com> Date: Wed, 10 Jun 2020 10:20:13 +0200 Subject: Apply suggestions from code review --- src/python/gudhi/representations/vector_methods.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 77b2836f..667f963b 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -578,17 +578,17 @@ class ComplexPolynomial(BaseEstimator, TransformerMixin): def _lapl_contrast(measure, centers, inertias, eps=1e-8): """contrast function for vectorising `measure` in ATOL""" - return np.exp(-np.sqrt(pairwise.pairwise_distances(measure, Y=centers) / (inertias + eps))) + return np.exp(-pairwise.pairwise_distances(measure, Y=centers) / (inertias + eps)) def _gaus_contrast(measure, centers, inertias, eps=1e-8): """contrast function for vectorising `measure` in ATOL""" - return np.exp(-pairwise.pairwise_distances(measure, Y=centers) / (inertias + eps)) + return np.exp(-pairwise.pairwise_distances(measure, Y=centers)**2 / (inertias**2 + eps)) def _indicator_contrast(diags, centers, inertias, eps=1e-8): """contrast function for vectorising `measure` in ATOL""" pair_dist = pairwise.pairwise_distances(diags, Y=centers) flat_circ = (pair_dist < (inertias+eps)).astype(int) - robe_curve = np.positive((2-pair_dist/(inertias+eps))*((inertias+eps) < pair_dist).astype(int)) + robe_curve = np.clip(2-pair_dist/(inertias+eps), 0, 1) return flat_circ + robe_curve def _cloud_weighting(measure): @@ -638,7 +638,7 @@ class Atol(BaseEstimator, TransformerMixin): (default: constant function, i.e. the measure is seen as a point cloud by default). This will have no impact if weights are provided along with measures all the way: `fit` and `transform`. contrast (string): constant function for evaluating proximity of a measure with respect to centers - choose from {"gaussian", "laplacian", "indicator"} + choose from {"gaussian", "laplacian", "indicator"} (default: gaussian contrast function, see page 3 in the ATOL paper). """ self.quantiser = quantiser @@ -670,7 +670,7 @@ class Atol(BaseEstimator, TransformerMixin): """ if not hasattr(self.quantiser, 'fit'): raise TypeError("quantiser %s has no `fit` attribute." % (self.quantiser)) - if len(X) < self.quantiser.n_clusters: + if np.sum([measure.shape[0] for measure in X]) < self.quantiser.n_clusters: # in case there are not enough observations for fitting the quantiser, we add random points in [0, 1]^2 # @Martin: perhaps this behaviour is to be externalised and a warning should be raised instead random_points = np.random.rand(self.quantiser.n_clusters-len(X), X[0].shape[1]) @@ -681,7 +681,6 @@ class Atol(BaseEstimator, TransformerMixin): measures_concat = np.concatenate(X) self.quantiser.fit(X=measures_concat, sample_weight=sample_weight) self.centers = self.quantiser.cluster_centers_ - labels = np.argmin(pairwise.pairwise_distances(measures_concat, Y=self.centers), axis=1) dist_centers = pairwise.pairwise_distances(self.centers) np.fill_diagonal(dist_centers, np.inf) self.inertias = np.min(dist_centers, axis=0)/2 -- cgit v1.2.3 From bef50e15e499e40d4dd4f5d991ec87eab4236108 Mon Sep 17 00:00:00 2001 From: martinroyer <16647869+martinroyer@users.noreply.github.com> Date: Wed, 10 Jun 2020 10:32:48 +0200 Subject: remove epsilons --- src/python/gudhi/representations/vector_methods.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 667f963b..ede1087f 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -576,19 +576,19 @@ class ComplexPolynomial(BaseEstimator, TransformerMixin): """ return self.fit_transform([diag])[0,:] -def _lapl_contrast(measure, centers, inertias, eps=1e-8): +def _lapl_contrast(measure, centers, inertias): """contrast function for vectorising `measure` in ATOL""" - return np.exp(-pairwise.pairwise_distances(measure, Y=centers) / (inertias + eps)) + return np.exp(-pairwise.pairwise_distances(measure, Y=centers) / inertias) -def _gaus_contrast(measure, centers, inertias, eps=1e-8): +def _gaus_contrast(measure, centers, inertias): """contrast function for vectorising `measure` in ATOL""" - return np.exp(-pairwise.pairwise_distances(measure, Y=centers)**2 / (inertias**2 + eps)) + return np.exp(-pairwise.pairwise_distances(measure, Y=centers)**2 / inertias**2) -def _indicator_contrast(diags, centers, inertias, eps=1e-8): +def _indicator_contrast(diags, centers, inertias): """contrast function for vectorising `measure` in ATOL""" pair_dist = pairwise.pairwise_distances(diags, Y=centers) - flat_circ = (pair_dist < (inertias+eps)).astype(int) - robe_curve = np.clip(2-pair_dist/(inertias+eps), 0, 1) + flat_circ = (pair_dist < inertias).astype(int) + robe_curve = np.clip(2-pair_dist/inertias, 0, 1) return flat_circ + robe_curve def _cloud_weighting(measure): -- cgit v1.2.3 From 76529cae58f8a2736a1730fd81a9e12c3f4c7e19 Mon Sep 17 00:00:00 2001 From: martinroyer <16647869+martinroyer@users.noreply.github.com> Date: Thu, 11 Jun 2020 16:47:16 +0200 Subject: Apply suggestions from code review #456 (thank you Marc!) --- src/python/gudhi/representations/vector_methods.py | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index ede1087f..49c05c51 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -586,10 +586,8 @@ def _gaus_contrast(measure, centers, inertias): def _indicator_contrast(diags, centers, inertias): """contrast function for vectorising `measure` in ATOL""" - pair_dist = pairwise.pairwise_distances(diags, Y=centers) - flat_circ = (pair_dist < inertias).astype(int) - robe_curve = np.clip(2-pair_dist/inertias, 0, 1) - return flat_circ + robe_curve + robe_curve = np.clip(2-pairwise.pairwise_distances(diags, Y=centers)/inertias, 0, 1) + return robe_curve def _cloud_weighting(measure): """automatic uniform weighting with mass 1 for `measure` in ATOL""" @@ -603,7 +601,7 @@ class Atol(BaseEstimator, TransformerMixin): """ This class allows to vectorise measures (e.g. point clouds, persistence diagrams, etc) after a quantisation step. - ATOL paper: https://arxiv.org/abs/1909.13472 + ATOL paper: :cite:`royer2019atol` Example -------- @@ -632,9 +630,9 @@ class Atol(BaseEstimator, TransformerMixin): Parameters: quantiser (Object): Object with `fit` (sklearn API consistent) and `cluster_centers` and `n_clusters` - attributes. This object will be fitted by the function `fit`. + attributes, e.g. sklearn.cluster.KMeans. It will be fitted when the Atol object function `fit` is called. weighting_method (string): constant generic function for weighting the measure points - choose from {"cloud", "iidproba"} + choose from {"cloud", "iidproba"} (default: constant function, i.e. the measure is seen as a point cloud by default). This will have no impact if weights are provided along with measures all the way: `fit` and `transform`. contrast (string): constant function for evaluating proximity of a measure with respect to centers @@ -647,8 +645,6 @@ class Atol(BaseEstimator, TransformerMixin): "laplacian": _lapl_contrast, "indicator": _indicator_contrast, }.get(contrast, _gaus_contrast) - self.centers = np.ones(shape=(self.quantiser.n_clusters, 2))*np.inf - self.inertias = np.full(self.quantiser.n_clusters, np.nan) self.weighting_method = { "cloud" : _cloud_weighting, "iidproba": _iidproba_weighting, @@ -670,11 +666,6 @@ class Atol(BaseEstimator, TransformerMixin): """ if not hasattr(self.quantiser, 'fit'): raise TypeError("quantiser %s has no `fit` attribute." % (self.quantiser)) - if np.sum([measure.shape[0] for measure in X]) < self.quantiser.n_clusters: - # in case there are not enough observations for fitting the quantiser, we add random points in [0, 1]^2 - # @Martin: perhaps this behaviour is to be externalised and a warning should be raised instead - random_points = np.random.rand(self.quantiser.n_clusters-len(X), X[0].shape[1]) - X.append(random_points) if sample_weight is None: sample_weight = np.concatenate([self.weighting_method(measure) for measure in X]) -- cgit v1.2.3 From a90843c6bf5f7f05392c4262efb60e94ccfb0e48 Mon Sep 17 00:00:00 2001 From: martinroyer <16647869+martinroyer@users.noreply.github.com> Date: Thu, 11 Jun 2020 17:03:40 +0200 Subject: test value tweak --- src/python/gudhi/representations/vector_methods.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 49c05c51..5a45f179 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -616,13 +616,13 @@ class Atol(BaseEstimator, TransformerMixin): array([[ 2. , 0.66666667, 3.33333333], [ 2.6 , 2.8 , -0.4 ]]) >>> atol_vectoriser(a) - array([1.0769395 , 0.58394704]) + array([1.18168665, 0.42375966]) >>> atol_vectoriser(c) - array([0.23559623, 1.02816136]) + array([0.02062512, 1.25157463]) >>> atol_vectoriser.transform(X=[a, b, c]) - array([[1.0769395 , 0.58394704], - [0.56203292, 1.04696684], - [0.23559623, 1.02816136]]) + array([[1.18168665, 0.42375966], + [0.29861028, 1.06330156], + [0.02062512, 1.25157463]]) """ def __init__(self, quantiser, weighting_method="cloud", contrast="gaussian"): """ -- cgit v1.2.3 From ec1c3ad11aeb46a67926a615fd5c00fbc70b501e Mon Sep 17 00:00:00 2001 From: martinroyer <16647869+martinroyer@users.noreply.github.com> Date: Thu, 11 Jun 2020 21:17:27 +0200 Subject: case n_centers = 1 --- src/python/gudhi/representations/vector_methods.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 5a45f179..a576267c 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -672,9 +672,14 @@ class Atol(BaseEstimator, TransformerMixin): measures_concat = np.concatenate(X) self.quantiser.fit(X=measures_concat, sample_weight=sample_weight) self.centers = self.quantiser.cluster_centers_ - dist_centers = pairwise.pairwise_distances(self.centers) - np.fill_diagonal(dist_centers, np.inf) - self.inertias = np.min(dist_centers, axis=0)/2 + if self.quantiser.n_clusters == 1: + dist_centers = pairwise.pairwise_distances(measures_concat) + np.fill_diagonal(dist_centers, 0) + self.inertias = np.max(dist_centers)/2 + else: + dist_centers = pairwise.pairwise_distances(self.centers) + np.fill_diagonal(dist_centers, np.inf) + self.inertias = np.min(dist_centers, axis=0)/2 return self def __call__(self, measure, sample_weight=None): -- cgit v1.2.3 From 1abc47f5bf65ee3451a907ecfc9db84c0471ef93 Mon Sep 17 00:00:00 2001 From: martinroyer <16647869+martinroyer@users.noreply.github.com> Date: Tue, 16 Jun 2020 22:36:31 +0200 Subject: Update src/python/gudhi/representations/vector_methods.py --- src/python/gudhi/representations/vector_methods.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index a576267c..566c24a3 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -675,7 +675,7 @@ class Atol(BaseEstimator, TransformerMixin): if self.quantiser.n_clusters == 1: dist_centers = pairwise.pairwise_distances(measures_concat) np.fill_diagonal(dist_centers, 0) - self.inertias = np.max(dist_centers)/2 + self.inertias = np.array([np.max(dist_centers)/2]) else: dist_centers = pairwise.pairwise_distances(self.centers) np.fill_diagonal(dist_centers, np.inf) -- cgit v1.2.3 From 4a558f9542283533d1218a35ce43751615ca2ffd Mon Sep 17 00:00:00 2001 From: martinroyer <16647869+martinroyer@users.noreply.github.com> Date: Fri, 19 Jun 2020 14:29:55 +0200 Subject: fix for null inertias --- src/python/gudhi/representations/vector_methods.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 566c24a3..aaf7ffeb 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -678,7 +678,7 @@ class Atol(BaseEstimator, TransformerMixin): self.inertias = np.array([np.max(dist_centers)/2]) else: dist_centers = pairwise.pairwise_distances(self.centers) - np.fill_diagonal(dist_centers, np.inf) + dist_centers[dist_centers == 0] = np.inf self.inertias = np.min(dist_centers, axis=0)/2 return self -- cgit v1.2.3 From b7d9cc2b1e8f58f563d23c3588d785ced98222b3 Mon Sep 17 00:00:00 2001 From: martinroyer-buntu Date: Fri, 3 Jul 2020 11:01:26 +0200 Subject: small optim --- src/python/gudhi/representations/vector_methods.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index aaf7ffeb..5ca127f6 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -582,7 +582,7 @@ def _lapl_contrast(measure, centers, inertias): def _gaus_contrast(measure, centers, inertias): """contrast function for vectorising `measure` in ATOL""" - return np.exp(-pairwise.pairwise_distances(measure, Y=centers)**2 / inertias**2) + return np.exp(-pairwise.pairwise_distances(measure, Y=centers, squared=True) / inertias**2) def _indicator_contrast(diags, centers, inertias): """contrast function for vectorising `measure` in ATOL""" -- cgit v1.2.3