summaryrefslogtreecommitdiff
path: root/src/python/gudhi/representations/vector_methods.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/python/gudhi/representations/vector_methods.py')
-rw-r--r--src/python/gudhi/representations/vector_methods.py80
1 files changed, 52 insertions, 28 deletions
diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 84bc99a2..e883b5dd 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -6,6 +6,7 @@
#
# Modification(s):
# - 2020/06 Martin: ATOL integration
+# - 2021/11 Vincent Rouvreau: factorize _automatic_sample_range
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
@@ -45,10 +46,14 @@ class PersistenceImage(BaseEstimator, TransformerMixin):
y (n x 1 array): persistence diagram labels (unused).
"""
if np.isnan(np.array(self.im_range)).any():
- new_X = BirthPersistenceTransform().fit_transform(X)
- pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(new_X,y)
- [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]]
- self.im_range = np.where(np.isnan(np.array(self.im_range)), np.array([mx, Mx, my, My]), np.array(self.im_range))
+ try:
+ new_X = BirthPersistenceTransform().fit_transform(X)
+ pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(new_X,y)
+ [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]]
+ self.im_range = np.where(np.isnan(np.array(self.im_range)), np.array([mx, Mx, my, My]), np.array(self.im_range))
+ except ValueError:
+ # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507
+ pass
return self
def transform(self, X):
@@ -94,6 +99,28 @@ class PersistenceImage(BaseEstimator, TransformerMixin):
"""
return self.fit_transform([diag])[0,:]
+def _automatic_sample_range(sample_range, X, y):
+ """
+ Compute and returns sample range from the persistence diagrams if one of the sample_range values is numpy.nan.
+
+ Parameters:
+ sample_range (a numpy array of 2 float): minimum and maximum of all piecewise-linear function domains, of
+ the form [x_min, x_max].
+ X (list of n x 2 numpy arrays): input persistence diagrams.
+ y (n x 1 array): persistence diagram labels (unused).
+ """
+ nan_in_range = np.isnan(sample_range)
+ if nan_in_range.any():
+ try:
+ pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y)
+ [mx,my] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]]
+ [Mx,My] = [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]]
+ return np.where(nan_in_range, np.array([mx, My]), sample_range)
+ except ValueError:
+ # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507
+ pass
+ return sample_range
+
class Landscape(BaseEstimator, TransformerMixin):
"""
This is a class for computing persistence landscapes from a list of persistence diagrams. A persistence landscape is a collection of 1D piecewise-linear functions computed from the rank function associated to the persistence diagram. These piecewise-linear functions are then sampled evenly on a given range and the corresponding vectors of samples are concatenated and returned. See http://jmlr.org/papers/v16/bubenik15a.html for more details.
@@ -119,10 +146,7 @@ class Landscape(BaseEstimator, TransformerMixin):
X (list of n x 2 numpy arrays): input persistence diagrams.
y (n x 1 array): persistence diagram labels (unused).
"""
- if self.nan_in_range.any():
- pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y)
- [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]]
- self.sample_range = np.where(self.nan_in_range, np.array([mx, My]), np.array(self.sample_range))
+ self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
return self
def transform(self, X):
@@ -218,10 +242,7 @@ class Silhouette(BaseEstimator, TransformerMixin):
X (list of n x 2 numpy arrays): input persistence diagrams.
y (n x 1 array): persistence diagram labels (unused).
"""
- if np.isnan(np.array(self.sample_range)).any():
- pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y)
- [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]]
- self.sample_range = np.where(np.isnan(np.array(self.sample_range)), np.array([mx, My]), np.array(self.sample_range))
+ self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
return self
def transform(self, X):
@@ -307,10 +328,7 @@ class BettiCurve(BaseEstimator, TransformerMixin):
X (list of n x 2 numpy arrays): input persistence diagrams.
y (n x 1 array): persistence diagram labels (unused).
"""
- if np.isnan(np.array(self.sample_range)).any():
- pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y)
- [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]]
- self.sample_range = np.where(np.isnan(np.array(self.sample_range)), np.array([mx, My]), np.array(self.sample_range))
+ self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
return self
def transform(self, X):
@@ -374,10 +392,7 @@ class Entropy(BaseEstimator, TransformerMixin):
X (list of n x 2 numpy arrays): input persistence diagrams.
y (n x 1 array): persistence diagram labels (unused).
"""
- if np.isnan(np.array(self.sample_range)).any():
- pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y)
- [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]]
- self.sample_range = np.where(np.isnan(np.array(self.sample_range)), np.array([mx, My]), np.array(self.sample_range))
+ self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
return self
def transform(self, X):
@@ -396,9 +411,13 @@ class Entropy(BaseEstimator, TransformerMixin):
new_X = BirthPersistenceTransform().fit_transform(X)
for i in range(num_diag):
-
orig_diagram, diagram, num_pts_in_diag = X[i], new_X[i], X[i].shape[0]
- new_diagram = DiagramScaler(use=True, scalers=[([1], MaxAbsScaler())]).fit_transform([diagram])[0]
+ try:
+ new_diagram = DiagramScaler(use=True, scalers=[([1], MaxAbsScaler())]).fit_transform([diagram])[0]
+ except ValueError:
+ # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507
+ assert len(diagram) == 0
+ new_diagram = np.empty(shape = [0, 2])
if self.mode == "scalar":
ent = - np.sum( np.multiply(new_diagram[:,1], np.log(new_diagram[:,1])) )
@@ -412,12 +431,11 @@ class Entropy(BaseEstimator, TransformerMixin):
max_idx = np.clip(np.ceil((py - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
for k in range(min_idx, max_idx):
ent[k] += (-1) * new_diagram[j,1] * np.log(new_diagram[j,1])
- if self.normalized:
- ent = ent / np.linalg.norm(ent, ord=1)
- Xfit.append(np.reshape(ent,[1,-1]))
-
- Xfit = np.concatenate(Xfit, 0)
+ if self.normalized:
+ ent = ent / np.linalg.norm(ent, ord=1)
+ Xfit.append(np.reshape(ent,[1,-1]))
+ Xfit = np.concatenate(Xfit, axis=0)
return Xfit
def __call__(self, diag):
@@ -478,7 +496,13 @@ class TopologicalVector(BaseEstimator, TransformerMixin):
diagram, num_pts_in_diag = X[i], X[i].shape[0]
pers = 0.5 * (diagram[:,1]-diagram[:,0])
min_pers = np.minimum(pers,np.transpose(pers))
- distances = DistanceMetric.get_metric("chebyshev").pairwise(diagram)
+ # Works fine with sklearn 1.0, but an ValueError exception is thrown on past versions
+ try:
+ distances = DistanceMetric.get_metric("chebyshev").pairwise(diagram)
+ except ValueError:
+ # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507
+ assert len(diagram) == 0
+ distances = np.empty(shape = [0, 0])
vect = np.flip(np.sort(np.triu(np.minimum(distances, min_pers)), axis=None), 0)
dim = min(len(vect), thresh)
Xfit[i, :dim] = vect[:dim]