summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorVincent Rouvreau <10407034+VincentRouvreau@users.noreply.github.com>2020-07-23 15:38:29 +0200
committerGitHub <noreply@github.com>2020-07-23 15:38:29 +0200
commit7bd8c85deb864166c228cea8ea2edfba32cced24 (patch)
treed1aaa95f4135661347162bb147dd89583775b4d7 /src
parent444ec77fe16783c35ef86598011a662c5d6e8d92 (diff)
parent96eb09e4f034fd71f5674f75e5e4584a7402b218 (diff)
Merge pull request #348 from martinroyer/atol
Introduction of ATOL in finite vectorisation method
Diffstat (limited to 'src')
-rw-r--r--src/python/doc/representations_sum.inc2
-rw-r--r--src/python/gudhi/representations/vector_methods.py144
-rwxr-xr-xsrc/python/test/test_representations.py17
3 files changed, 159 insertions, 4 deletions
diff --git a/src/python/doc/representations_sum.inc b/src/python/doc/representations_sum.inc
index 323a0920..4298aea9 100644
--- a/src/python/doc/representations_sum.inc
+++ b/src/python/doc/representations_sum.inc
@@ -2,7 +2,7 @@
:widths: 30 40 30
+------------------------------------------------------------------+----------------------------------------------------------------+-------------------------------------------------------------+
- | .. figure:: | Vectorizations, distances and kernels that work on persistence | :Author: Mathieu Carrière |
+ | .. figure:: | Vectorizations, distances and kernels that work on persistence | :Author: Mathieu Carrière, Martin Royer |
| img/sklearn-tda.png | diagrams, compatible with scikit-learn. | |
| | | :Since: GUDHI 3.1.0 |
| | | |
diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 46fee086..5ca127f6 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -1,16 +1,17 @@
# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
-# Author(s): Mathieu Carrière
+# Author(s): Mathieu Carrière, Martin Royer
#
-# Copyright (C) 2018-2019 Inria
+# Copyright (C) 2018-2020 Inria
#
# Modification(s):
-# - YYYY/MM Author: Description of the modification
+# - 2020/06 Martin: ATOL integration
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler
from sklearn.neighbors import DistanceMetric
+from sklearn.metrics import pairwise
from .preprocessing import DiagramScaler, BirthPersistenceTransform
@@ -574,3 +575,140 @@ class ComplexPolynomial(BaseEstimator, TransformerMixin):
numpy array with shape (**threshold**): output complex vector of coefficients.
"""
return self.fit_transform([diag])[0,:]
+
+def _lapl_contrast(measure, centers, inertias):
+ """contrast function for vectorising `measure` in ATOL"""
+ return np.exp(-pairwise.pairwise_distances(measure, Y=centers) / inertias)
+
+def _gaus_contrast(measure, centers, inertias):
+ """contrast function for vectorising `measure` in ATOL"""
+ return np.exp(-pairwise.pairwise_distances(measure, Y=centers, squared=True) / inertias**2)
+
+def _indicator_contrast(diags, centers, inertias):
+ """contrast function for vectorising `measure` in ATOL"""
+ robe_curve = np.clip(2-pairwise.pairwise_distances(diags, Y=centers)/inertias, 0, 1)
+ return robe_curve
+
+def _cloud_weighting(measure):
+ """automatic uniform weighting with mass 1 for `measure` in ATOL"""
+ return np.ones(shape=measure.shape[0])
+
+def _iidproba_weighting(measure):
+ """automatic uniform weighting with mass 1/N for `measure` in ATOL"""
+ return np.ones(shape=measure.shape[0]) / measure.shape[0]
+
+class Atol(BaseEstimator, TransformerMixin):
+ """
+ This class allows to vectorise measures (e.g. point clouds, persistence diagrams, etc) after a quantisation step.
+
+ ATOL paper: :cite:`royer2019atol`
+
+ Example
+ --------
+ >>> from sklearn.cluster import KMeans
+ >>> from gudhi.representations.vector_methods import Atol
+ >>> import numpy as np
+ >>> a = np.array([[1, 2, 4], [1, 4, 0], [1, 0, 4]])
+ >>> b = np.array([[4, 2, 0], [4, 4, 0], [4, 0, 2]])
+ >>> c = np.array([[3, 2, -1], [1, 2, -1]])
+ >>> atol_vectoriser = Atol(quantiser=KMeans(n_clusters=2, random_state=202006))
+ >>> atol_vectoriser.fit(X=[a, b, c]).centers
+ array([[ 2. , 0.66666667, 3.33333333],
+ [ 2.6 , 2.8 , -0.4 ]])
+ >>> atol_vectoriser(a)
+ array([1.18168665, 0.42375966])
+ >>> atol_vectoriser(c)
+ array([0.02062512, 1.25157463])
+ >>> atol_vectoriser.transform(X=[a, b, c])
+ array([[1.18168665, 0.42375966],
+ [0.29861028, 1.06330156],
+ [0.02062512, 1.25157463]])
+ """
+ def __init__(self, quantiser, weighting_method="cloud", contrast="gaussian"):
+ """
+ Constructor for the Atol measure vectorisation class.
+
+ Parameters:
+ quantiser (Object): Object with `fit` (sklearn API consistent) and `cluster_centers` and `n_clusters`
+ attributes, e.g. sklearn.cluster.KMeans. It will be fitted when the Atol object function `fit` is called.
+ weighting_method (string): constant generic function for weighting the measure points
+ choose from {"cloud", "iidproba"}
+ (default: constant function, i.e. the measure is seen as a point cloud by default).
+ This will have no impact if weights are provided along with measures all the way: `fit` and `transform`.
+ contrast (string): constant function for evaluating proximity of a measure with respect to centers
+ choose from {"gaussian", "laplacian", "indicator"}
+ (default: gaussian contrast function, see page 3 in the ATOL paper).
+ """
+ self.quantiser = quantiser
+ self.contrast = {
+ "gaussian": _gaus_contrast,
+ "laplacian": _lapl_contrast,
+ "indicator": _indicator_contrast,
+ }.get(contrast, _gaus_contrast)
+ self.weighting_method = {
+ "cloud" : _cloud_weighting,
+ "iidproba": _iidproba_weighting,
+ }.get(weighting_method, _cloud_weighting)
+
+ def fit(self, X, y=None, sample_weight=None):
+ """
+ Calibration step: fit centers to the sample measures and derive inertias between centers.
+
+ Parameters:
+ X (list N x d numpy arrays): input measures in R^d from which to learn center locations and inertias
+ (measures can have different N).
+ y: Ignored, present for API consistency by convention.
+ sample_weight (list of numpy arrays): weights for each measure point in X, optional.
+ If None, the object's weighting_method will be used.
+
+ Returns:
+ self
+ """
+ if not hasattr(self.quantiser, 'fit'):
+ raise TypeError("quantiser %s has no `fit` attribute." % (self.quantiser))
+ if sample_weight is None:
+ sample_weight = np.concatenate([self.weighting_method(measure) for measure in X])
+
+ measures_concat = np.concatenate(X)
+ self.quantiser.fit(X=measures_concat, sample_weight=sample_weight)
+ self.centers = self.quantiser.cluster_centers_
+ if self.quantiser.n_clusters == 1:
+ dist_centers = pairwise.pairwise_distances(measures_concat)
+ np.fill_diagonal(dist_centers, 0)
+ self.inertias = np.array([np.max(dist_centers)/2])
+ else:
+ dist_centers = pairwise.pairwise_distances(self.centers)
+ dist_centers[dist_centers == 0] = np.inf
+ self.inertias = np.min(dist_centers, axis=0)/2
+ return self
+
+ def __call__(self, measure, sample_weight=None):
+ """
+ Apply measure vectorisation on a single measure.
+
+ Parameters:
+ measure (n x d numpy array): input measure in R^d.
+
+ Returns:
+ numpy array in R^self.quantiser.n_clusters.
+ """
+ if sample_weight is None:
+ sample_weight = self.weighting_method(measure)
+ return np.sum(sample_weight * self.contrast(measure, self.centers, self.inertias.T).T, axis=1)
+
+ def transform(self, X, sample_weight=None):
+ """
+ Apply measure vectorisation on a list of measures.
+
+ Parameters:
+ X (list N x d numpy arrays): input measures in R^d from which to learn center locations and inertias
+ (measures can have different N).
+ sample_weight (list of numpy arrays): weights for each measure point in X, optional.
+ If None, the object's weighting_method will be used.
+
+ Returns:
+ numpy array with shape (number of measures) x (self.quantiser.n_clusters).
+ """
+ if sample_weight is None:
+ sample_weight = [self.weighting_method(measure) for measure in X]
+ return np.stack([self(measure, sample_weight=weight) for measure, weight in zip(X, sample_weight)])
diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py
index 589cee00..e5c211a0 100755
--- a/src/python/test/test_representations.py
+++ b/src/python/test/test_representations.py
@@ -4,6 +4,8 @@ import matplotlib.pyplot as plt
import numpy as np
import pytest
+from sklearn.cluster import KMeans
+
def test_representations_examples():
# Disable graphics for testing purposes
@@ -15,6 +17,7 @@ def test_representations_examples():
return None
+from gudhi.representations.vector_methods import Atol
from gudhi.representations.metrics import *
from gudhi.representations.kernel_methods import *
@@ -41,3 +44,17 @@ def test_multiple():
d2 = WassersteinDistance(order=2, internal_p=2, n_jobs=4).fit(l2).transform(l1)
print(d1.shape, d2.shape)
assert d1 == pytest.approx(d2, rel=.02)
+
+
+def test_dummy_atol():
+ a = np.array([[1, 2, 4], [1, 4, 0], [1, 0, 4]])
+ b = np.array([[4, 2, 0], [4, 4, 0], [4, 0, 2]])
+ c = np.array([[3, 2, -1], [1, 2, -1]])
+
+ for weighting_method in ["cloud", "iidproba"]:
+ for contrast in ["gaussian", "laplacian", "indicator"]:
+ atol_vectoriser = Atol(quantiser=KMeans(n_clusters=1, random_state=202006), weighting_method=weighting_method, contrast=contrast)
+ atol_vectoriser.fit([a, b, c])
+ atol_vectoriser(a)
+ atol_vectoriser.transform(X=[a, b, c])
+