diff options
Diffstat (limited to 'src/python/gudhi')
-rw-r--r-- | src/python/gudhi/representations/preprocessing.py | 57 | ||||
-rw-r--r-- | src/python/gudhi/sklearn/__init__.py | 0 | ||||
-rw-r--r-- | src/python/gudhi/sklearn/cubical_persistence.py | 110 | ||||
-rw-r--r-- | src/python/gudhi/tensorflow/cubical_layer.py | 2 |
4 files changed, 164 insertions, 5 deletions
diff --git a/src/python/gudhi/representations/preprocessing.py b/src/python/gudhi/representations/preprocessing.py index a8545349..8722e162 100644 --- a/src/python/gudhi/representations/preprocessing.py +++ b/src/python/gudhi/representations/preprocessing.py @@ -1,10 +1,11 @@ # This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. # See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. -# Author(s): Mathieu Carrière +# Author(s): Mathieu Carrière, Vincent Rouvreau # # Copyright (C) 2018-2019 Inria # # Modification(s): +# - 2021/10 Vincent Rouvreau: Add DimensionSelector # - YYYY/MM Author: Description of the modification import numpy as np @@ -75,7 +76,7 @@ class Clamping(BaseEstimator, TransformerMixin): Constructor for the Clamping class. Parameters: - limit (double): clamping value (default np.inf). + limit (float): clamping value (default np.inf). """ self.minimum = minimum self.maximum = maximum @@ -234,7 +235,7 @@ class ProminentPoints(BaseEstimator, TransformerMixin): use (bool): whether to use the class or not (default False). location (string): either "upper" or "lower" (default "upper"). Whether to keep the points that are far away ("upper") or close ("lower") to the diagonal. num_pts (int): cardinality threshold (default 10). If location == "upper", keep the top **num_pts** points that are the farthest away from the diagonal. If location == "lower", keep the top **num_pts** points that are the closest to the diagonal. - threshold (double): distance-to-diagonal threshold (default -1). If location == "upper", keep the points that are at least at a distance **threshold** from the diagonal. If location == "lower", keep the points that are at most at a distance **threshold** from the diagonal. + threshold (float): distance-to-diagonal threshold (default -1). If location == "upper", keep the points that are at least at a distance **threshold** from the diagonal. If location == "lower", keep the points that are at most at a distance **threshold** from the diagonal. """ self.num_pts = num_pts self.threshold = threshold @@ -317,7 +318,7 @@ class DiagramSelector(BaseEstimator, TransformerMixin): Parameters: use (bool): whether to use the class or not (default False). - limit (double): second coordinate value that is the criterion for being an essential point (default numpy.inf). + limit (float): second coordinate value that is the criterion for being an essential point (default numpy.inf). point_type (string): either "finite" or "essential". The type of the points that are going to be extracted. """ self.use, self.limit, self.point_type = use, limit, point_type @@ -363,3 +364,51 @@ class DiagramSelector(BaseEstimator, TransformerMixin): n x 2 numpy array: extracted persistence diagram. """ return self.fit_transform([diag])[0] + + +# Mermaid sequence diagram - https://mermaid-js.github.io/mermaid-live-editor/ +# sequenceDiagram +# USER->>DimensionSelector: fit_transform(<br/>[[array( Hi(X0) ), array( Hj(X0) ), ...],<br/> [array( Hi(X1) ), array( Hj(X1) ), ...],<br/> ...]) +# DimensionSelector->>thread1: _transform([array( Hi(X0) ), array( Hj(X0) )], ...) +# DimensionSelector->>thread2: _transform([array( Hi(X1) ), array( Hj(X1) )], ...) +# Note right of DimensionSelector: ... +# thread1->>DimensionSelector: array( Hn(X0) ) +# thread2->>DimensionSelector: array( Hn(X1) ) +# Note right of DimensionSelector: ... +# DimensionSelector->>USER: [array( Hn(X0) ), <br/> array( Hn(X1) ), <br/> ...] + +class DimensionSelector(BaseEstimator, TransformerMixin): + """ + This is a class to select persistence diagrams in a specific dimension from its index. + """ + + def __init__(self, index=0): + """ + Constructor for the DimensionSelector class. + + Parameters: + index (int): The returned persistence diagrams dimension index. Default value is `0`. + """ + self.index = index + + def fit(self, X, Y=None): + """ + Nothing to be done, but useful when included in a scikit-learn Pipeline. + """ + return self + + def transform(self, X, Y=None): + """ + Select persistence diagrams from its dimension. + + Parameters: + X (list of list of tuple): List of list of persistence pairs, i.e. + `[[array( Hi(X0) ), array( Hj(X0) ), ...], [array( Hi(X1) ), array( Hj(X1) ), ...], ...]` + + Returns: + list of tuple: + Persistence diagrams in a specific dimension. i.e. if `index` was set to `m` and `Hn` is at index `m` of + the input, it returns `[array( Hn(X0) ), array( Hn(X1), ...]` + """ + + return [persistence[self.index] for persistence in X] diff --git a/src/python/gudhi/sklearn/__init__.py b/src/python/gudhi/sklearn/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/python/gudhi/sklearn/__init__.py diff --git a/src/python/gudhi/sklearn/cubical_persistence.py b/src/python/gudhi/sklearn/cubical_persistence.py new file mode 100644 index 00000000..672af278 --- /dev/null +++ b/src/python/gudhi/sklearn/cubical_persistence.py @@ -0,0 +1,110 @@ +# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. +# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. +# Author(s): Vincent Rouvreau +# +# Copyright (C) 2021 Inria +# +# Modification(s): +# - YYYY/MM Author: Description of the modification + +from .. import CubicalComplex +from sklearn.base import BaseEstimator, TransformerMixin + +import numpy as np +# joblib is required by scikit-learn +from joblib import Parallel, delayed + +# Mermaid sequence diagram - https://mermaid-js.github.io/mermaid-live-editor/ +# sequenceDiagram +# USER->>CubicalPersistence: fit_transform(X) +# CubicalPersistence->>thread1: _tranform(X[0]) +# CubicalPersistence->>thread2: _tranform(X[1]) +# Note right of CubicalPersistence: ... +# thread1->>CubicalPersistence: [array( H0(X[0]) ), array( H1(X[0]) )] +# thread2->>CubicalPersistence: [array( H0(X[1]) ), array( H1(X[1]) )] +# Note right of CubicalPersistence: ... +# CubicalPersistence->>USER: [[array( H0(X[0]) ), array( H1(X[0]) )],<br/> [array( H0(X[1]) ), array( H1(X[1]) )],<br/> ...] + + +class CubicalPersistence(BaseEstimator, TransformerMixin): + """ + This is a class for computing the persistence diagrams from a cubical complex. + """ + + def __init__( + self, + homology_dimensions, + newshape=None, + homology_coeff_field=11, + min_persistence=0.0, + n_jobs=None, + ): + """ + Constructor for the CubicalPersistence class. + + Parameters: + homology_dimensions (int or list of int): The returned persistence diagrams dimension(s). + Short circuit the use of :class:`~gudhi.representations.preprocessing.DimensionSelector` when only one + dimension matters (in other words, when `homology_dimensions` is an int). + newshape (tuple of ints): If cells filtration values require to be reshaped + (cf. :func:`~gudhi.sklearn.cubical_persistence.CubicalPersistence.transform`), set `newshape` + to perform `numpy.reshape(X, newshape, order='C')` in + :func:`~gudhi.sklearn.cubical_persistence.CubicalPersistence.transform` method. + homology_coeff_field (int): The homology coefficient field. Must be a prime number. Default value is 11. + min_persistence (float): The minimum persistence value to take into account (strictly greater than + `min_persistence`). Default value is `0.0`. Set `min_persistence` to `-1.0` to see all values. + n_jobs (int): cf. https://joblib.readthedocs.io/en/latest/generated/joblib.Parallel.html + """ + self.homology_dimensions = homology_dimensions + self.newshape = newshape + self.homology_coeff_field = homology_coeff_field + self.min_persistence = min_persistence + self.n_jobs = n_jobs + + def fit(self, X, Y=None): + """ + Nothing to be done, but useful when included in a scikit-learn Pipeline. + """ + return self + + def __transform(self, cells): + cubical_complex = CubicalComplex(top_dimensional_cells=cells) + cubical_complex.compute_persistence( + homology_coeff_field=self.homology_coeff_field, min_persistence=self.min_persistence + ) + return [ + cubical_complex.persistence_intervals_in_dimension(dim) for dim in self.homology_dimensions + ] + + def __transform_only_this_dim(self, cells): + cubical_complex = CubicalComplex(top_dimensional_cells=cells) + cubical_complex.compute_persistence( + homology_coeff_field=self.homology_coeff_field, min_persistence=self.min_persistence + ) + return cubical_complex.persistence_intervals_in_dimension(self.homology_dimensions) + + def transform(self, X, Y=None): + """Compute all the cubical complexes and their associated persistence diagrams. + + :param X: List of cells filtration values (`numpy.reshape(X, newshape, order='C'` if `newshape` is set with a tuple of ints). + :type X: list of list of float OR list of numpy.ndarray + + :return: Persistence diagrams in the format: + + - If `homology_dimensions` was set to `n`: `[array( Hn(X[0]) ), array( Hn(X[1]) ), ...]` + - If `homology_dimensions` was set to `[i, j]`: `[[array( Hi(X[0]) ), array( Hj(X[0]) )], [array( Hi(X[1]) ), array( Hj(X[1]) )], ...]` + :rtype: list of (,2) array_like or list of list of (,2) array_like + """ + if self.newshape is not None: + X = np.reshape(X, self.newshape, order='C') + + # Depends on homology_dimensions is an integer or a list of integer (else case) + if isinstance(self.homology_dimensions, int): + # threads is preferred as cubical construction and persistence computation releases the GIL + return Parallel(n_jobs=self.n_jobs, prefer="threads")( + delayed(self.__transform_only_this_dim)(cells) for cells in X + ) + else: + # threads is preferred as cubical construction and persistence computation releases the GIL + return Parallel(n_jobs=self.n_jobs, prefer="threads")(delayed(self.__transform)(cells) for cells in X) + diff --git a/src/python/gudhi/tensorflow/cubical_layer.py b/src/python/gudhi/tensorflow/cubical_layer.py index 3304e719..5df2c370 100644 --- a/src/python/gudhi/tensorflow/cubical_layer.py +++ b/src/python/gudhi/tensorflow/cubical_layer.py @@ -18,7 +18,7 @@ def _Cubical(Xflat, Xdim, dimensions, homology_coeff_field): cc = CubicalComplex(dimensions=Xdim[::-1], top_dimensional_cells=Xflat) cc.compute_persistence(homology_coeff_field=homology_coeff_field) - # Retrieve and ouput image indices/pixels corresponding to positive and negative simplices + # Retrieve and output image indices/pixels corresponding to positive and negative simplices cof_pp = cc.cofaces_of_persistence_pairs() L_cofs = [] |