diff options
Diffstat (limited to 'src/python/gudhi/representations/vector_methods.py')
-rw-r--r-- | src/python/gudhi/representations/vector_methods.py | 80 |
1 files changed, 52 insertions, 28 deletions
diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 84bc99a2..e883b5dd 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -6,6 +6,7 @@ # # Modification(s): # - 2020/06 Martin: ATOL integration +# - 2021/11 Vincent Rouvreau: factorize _automatic_sample_range import numpy as np from sklearn.base import BaseEstimator, TransformerMixin @@ -45,10 +46,14 @@ class PersistenceImage(BaseEstimator, TransformerMixin): y (n x 1 array): persistence diagram labels (unused). """ if np.isnan(np.array(self.im_range)).any(): - new_X = BirthPersistenceTransform().fit_transform(X) - pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(new_X,y) - [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] - self.im_range = np.where(np.isnan(np.array(self.im_range)), np.array([mx, Mx, my, My]), np.array(self.im_range)) + try: + new_X = BirthPersistenceTransform().fit_transform(X) + pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(new_X,y) + [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] + self.im_range = np.where(np.isnan(np.array(self.im_range)), np.array([mx, Mx, my, My]), np.array(self.im_range)) + except ValueError: + # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 + pass return self def transform(self, X): @@ -94,6 +99,28 @@ class PersistenceImage(BaseEstimator, TransformerMixin): """ return self.fit_transform([diag])[0,:] +def _automatic_sample_range(sample_range, X, y): + """ + Compute and returns sample range from the persistence diagrams if one of the sample_range values is numpy.nan. + + Parameters: + sample_range (a numpy array of 2 float): minimum and maximum of all piecewise-linear function domains, of + the form [x_min, x_max]. + X (list of n x 2 numpy arrays): input persistence diagrams. + y (n x 1 array): persistence diagram labels (unused). + """ + nan_in_range = np.isnan(sample_range) + if nan_in_range.any(): + try: + pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) + [mx,my] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]] + [Mx,My] = [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] + return np.where(nan_in_range, np.array([mx, My]), sample_range) + except ValueError: + # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 + pass + return sample_range + class Landscape(BaseEstimator, TransformerMixin): """ This is a class for computing persistence landscapes from a list of persistence diagrams. A persistence landscape is a collection of 1D piecewise-linear functions computed from the rank function associated to the persistence diagram. These piecewise-linear functions are then sampled evenly on a given range and the corresponding vectors of samples are concatenated and returned. See http://jmlr.org/papers/v16/bubenik15a.html for more details. @@ -119,10 +146,7 @@ class Landscape(BaseEstimator, TransformerMixin): X (list of n x 2 numpy arrays): input persistence diagrams. y (n x 1 array): persistence diagram labels (unused). """ - if self.nan_in_range.any(): - pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) - [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] - self.sample_range = np.where(self.nan_in_range, np.array([mx, My]), np.array(self.sample_range)) + self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) return self def transform(self, X): @@ -218,10 +242,7 @@ class Silhouette(BaseEstimator, TransformerMixin): X (list of n x 2 numpy arrays): input persistence diagrams. y (n x 1 array): persistence diagram labels (unused). """ - if np.isnan(np.array(self.sample_range)).any(): - pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) - [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] - self.sample_range = np.where(np.isnan(np.array(self.sample_range)), np.array([mx, My]), np.array(self.sample_range)) + self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) return self def transform(self, X): @@ -307,10 +328,7 @@ class BettiCurve(BaseEstimator, TransformerMixin): X (list of n x 2 numpy arrays): input persistence diagrams. y (n x 1 array): persistence diagram labels (unused). """ - if np.isnan(np.array(self.sample_range)).any(): - pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) - [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] - self.sample_range = np.where(np.isnan(np.array(self.sample_range)), np.array([mx, My]), np.array(self.sample_range)) + self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) return self def transform(self, X): @@ -374,10 +392,7 @@ class Entropy(BaseEstimator, TransformerMixin): X (list of n x 2 numpy arrays): input persistence diagrams. y (n x 1 array): persistence diagram labels (unused). """ - if np.isnan(np.array(self.sample_range)).any(): - pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) - [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] - self.sample_range = np.where(np.isnan(np.array(self.sample_range)), np.array([mx, My]), np.array(self.sample_range)) + self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) return self def transform(self, X): @@ -396,9 +411,13 @@ class Entropy(BaseEstimator, TransformerMixin): new_X = BirthPersistenceTransform().fit_transform(X) for i in range(num_diag): - orig_diagram, diagram, num_pts_in_diag = X[i], new_X[i], X[i].shape[0] - new_diagram = DiagramScaler(use=True, scalers=[([1], MaxAbsScaler())]).fit_transform([diagram])[0] + try: + new_diagram = DiagramScaler(use=True, scalers=[([1], MaxAbsScaler())]).fit_transform([diagram])[0] + except ValueError: + # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 + assert len(diagram) == 0 + new_diagram = np.empty(shape = [0, 2]) if self.mode == "scalar": ent = - np.sum( np.multiply(new_diagram[:,1], np.log(new_diagram[:,1])) ) @@ -412,12 +431,11 @@ class Entropy(BaseEstimator, TransformerMixin): max_idx = np.clip(np.ceil((py - self.sample_range[0]) / step_x).astype(int), 0, self.resolution) for k in range(min_idx, max_idx): ent[k] += (-1) * new_diagram[j,1] * np.log(new_diagram[j,1]) - if self.normalized: - ent = ent / np.linalg.norm(ent, ord=1) - Xfit.append(np.reshape(ent,[1,-1])) - - Xfit = np.concatenate(Xfit, 0) + if self.normalized: + ent = ent / np.linalg.norm(ent, ord=1) + Xfit.append(np.reshape(ent,[1,-1])) + Xfit = np.concatenate(Xfit, axis=0) return Xfit def __call__(self, diag): @@ -478,7 +496,13 @@ class TopologicalVector(BaseEstimator, TransformerMixin): diagram, num_pts_in_diag = X[i], X[i].shape[0] pers = 0.5 * (diagram[:,1]-diagram[:,0]) min_pers = np.minimum(pers,np.transpose(pers)) - distances = DistanceMetric.get_metric("chebyshev").pairwise(diagram) + # Works fine with sklearn 1.0, but an ValueError exception is thrown on past versions + try: + distances = DistanceMetric.get_metric("chebyshev").pairwise(diagram) + except ValueError: + # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 + assert len(diagram) == 0 + distances = np.empty(shape = [0, 0]) vect = np.flip(np.sort(np.triu(np.minimum(distances, min_pers)), axis=None), 0) dim = min(len(vect), thresh) Xfit[i, :dim] = vect[:dim] |