1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
|
# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
# Author(s): Vincent Rouvreau
#
# Copyright (C) 2021 Inria
#
# Modification(s):
# - YYYY/MM Author: Description of the modification
from .. import CubicalComplex
from sklearn.base import BaseEstimator, TransformerMixin
# joblib is required by scikit-learn
from joblib import Parallel, delayed
# Mermaid sequence diagram - https://mermaid-js.github.io/mermaid-live-editor/
# sequenceDiagram
# USER->>CubicalPersistence: fit_transform(X)
# CubicalPersistence->>thread1: _tranform(X[0])
# CubicalPersistence->>thread2: _tranform(X[1])
# Note right of CubicalPersistence: ...
# thread1->>CubicalPersistence: [array( H0(X[0]) ), array( H1(X[0]) )]
# thread2->>CubicalPersistence: [array( H0(X[1]) ), array( H1(X[1]) )]
# Note right of CubicalPersistence: ...
# CubicalPersistence->>USER: [[array( H0(X[0]) ), array( H1(X[0]) )],<br/> [array( H0(X[1]) ), array( H1(X[1]) )],<br/> ...]
class CubicalPersistence(BaseEstimator, TransformerMixin):
"""
This is a class for computing the persistence diagrams from a cubical complex.
"""
def __init__(self, dimensions=None, max_persistence_dimension=0, only_this_dim=-1, homology_coeff_field=11, min_persistence=0., n_jobs=None):
"""
Constructor for the CubicalPersistence class.
Parameters:
dimensions (list of int): A list of number of top dimensional cells if cells filtration values will require
to be reshaped (cf. :func:`~gudhi.sklearn.cubical_persistence.CubicalPersistence.transform`)
max_persistence_dimension (int): The returned persistence diagrams maximal dimension. Default value is `0`.
Ignored if `only_this_dim` is set.
only_this_dim (int): The returned persistence diagrams dimension. If `only_this_dim` is set,
`max_persistence_dimension` will be ignored.
Short circuit the use of :class:`~gudhi.sklearn.post_processing.DimensionSelector` when only one
dimension matters.
homology_coeff_field (int): The homology coefficient field. Must be a prime number. Default value is 11.
min_persistence (float): The minimum persistence value to take into account (strictly greater than
`min_persistence`). Default value is `0.0`. Sets `min_persistence` to `-1.0` to see all values.
n_jobs (int): cf. https://joblib.readthedocs.io/en/latest/generated/joblib.Parallel.html
"""
self.dimensions = dimensions
self.max_persistence_dimension = max_persistence_dimension
self.only_this_dim = only_this_dim
self.homology_coeff_field = homology_coeff_field
self.min_persistence = min_persistence
self.n_jobs = n_jobs
def fit(self, X, Y=None):
"""
Nothing to be done, but useful when included in a scikit-learn Pipeline.
"""
return self
def __transform(self, cells):
cubical_complex = CubicalComplex(top_dimensional_cells=cells, dimensions=self.dimensions)
cubical_complex.compute_persistence(
homology_coeff_field=self.homology_coeff_field, min_persistence=self.min_persistence
)
return [cubical_complex.persistence_intervals_in_dimension(dim) for dim in range(self.max_persistence_dimension + 1)]
def __transform_only_this_dim(self, cells):
cubical_complex = CubicalComplex(top_dimensional_cells=cells, dimensions=self.dimensions)
cubical_complex.compute_persistence(
homology_coeff_field=self.homology_coeff_field, min_persistence=self.min_persistence
)
return cubical_complex.persistence_intervals_in_dimension(self.only_this_dim)
def transform(self, X, Y=None):
"""
Compute all the cubical complexes and their associated persistence diagrams.
Parameters:
X (list of list of double OR list of numpy.ndarray): List of cells filtration values that can be flatten if
`dimensions` is set in the constructor, or already with the correct shape in a numpy.ndarray (and
`dimensions` must not be set).
Returns:
Persistence diagrams in the format:
- If `only_this_dim` was set to `n`: `[array( Hn(X[0]) ), array( Hn(X[1]) ), ...]`
- else: `[[array( H0(X[0]) ), array( H1(X[0]) ), ...], [array( H0(X[1]) ), array( H1(X[1]) ), ...], ...]`
"""
if self.only_this_dim == -1:
# threads is preferred as cubical construction and persistence computation releases the GIL
return Parallel(n_jobs=self.n_jobs, prefer="threads")(delayed(self.__transform)(cells) for cells in X)
else:
# threads is preferred as cubical construction and persistence computation releases the GIL
return Parallel(n_jobs=self.n_jobs, prefer="threads")(delayed(self.__transform_only_this_dim)(cells) for cells in X)
|