summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/python/doc/representations.rst2
-rwxr-xr-xsrc/python/example/diagram_vectorizations_distances_kernels.py2
-rw-r--r--src/python/gudhi/representations/kernel_methods.py2
-rw-r--r--src/python/gudhi/representations/metrics.py11
-rw-r--r--src/python/gudhi/representations/preprocessing.py10
-rw-r--r--src/python/gudhi/representations/vector_methods.py34
6 files changed, 31 insertions, 30 deletions
diff --git a/src/python/doc/representations.rst b/src/python/doc/representations.rst
index a137a035..c870f834 100644
--- a/src/python/doc/representations.rst
+++ b/src/python/doc/representations.rst
@@ -10,7 +10,7 @@ Representations manual
This module, originally named sklearn_tda, aims at bridging the gap between persistence diagrams and machine learning tools, in particular scikit-learn. It provides tools, using the scikit-learn standard interface, to compute distances and kernels on diagrams, and to convert diagrams into vectors.
-A diagram is represented as a numpy array of shape (n,2), as can be obtained from `SimplexTree.persistence_intervals_in_dimension` for instance. Points at infinity are represented as a numpy array of shape (n,1), storing only the birth time.
+A diagram is represented as a numpy array of shape (n,2), as can be obtained from :func:`gudhi.SimplexTree.persistence_intervals_in_dimension` for instance. Points at infinity are represented as a numpy array of shape (n,1), storing only the birth time.
A small example is provided
diff --git a/src/python/example/diagram_vectorizations_distances_kernels.py b/src/python/example/diagram_vectorizations_distances_kernels.py
index a77bbfdd..119072eb 100755
--- a/src/python/example/diagram_vectorizations_distances_kernels.py
+++ b/src/python/example/diagram_vectorizations_distances_kernels.py
@@ -16,7 +16,7 @@ diags = [D]
diags = DiagramSelector(use=True, point_type="finite").fit_transform(diags)
diags = DiagramScaler(use=True, scalers=[([0,1], MinMaxScaler())]).fit_transform(diags)
-diags = DiagramScaler(use=True, scalers=[([1], Clamping(limit=.9))]).fit_transform(diags)
+diags = DiagramScaler(use=True, scalers=[([1], Clamping(maximum=.9))]).fit_transform(diags)
D = diags[0]
plt.scatter(D[:,0],D[:,1])
diff --git a/src/python/gudhi/representations/kernel_methods.py b/src/python/gudhi/representations/kernel_methods.py
index c855d2be..bfc83aff 100644
--- a/src/python/gudhi/representations/kernel_methods.py
+++ b/src/python/gudhi/representations/kernel_methods.py
@@ -161,7 +161,7 @@ class PersistenceScaleSpaceKernel(BaseEstimator, TransformerMixin):
"""
Xp = list(X)
for i in range(len(Xp)):
- op_X = np.matmul(Xp[i], np.array([[0.,1.], [1.,0.]]))
+ op_X = Xp[i][:,[1,0]]
Xp[i] = np.concatenate([Xp[i], op_X], axis=0)
return self.pwg_.transform(Xp)
diff --git a/src/python/gudhi/representations/metrics.py b/src/python/gudhi/representations/metrics.py
index c512cb82..5f9ec6ab 100644
--- a/src/python/gudhi/representations/metrics.py
+++ b/src/python/gudhi/representations/metrics.py
@@ -86,12 +86,12 @@ class BottleneckDistance(BaseEstimator, TransformerMixin):
"""
This is a class for computing the bottleneck distance matrix from a list of persistence diagrams.
"""
- def __init__(self, epsilon=1e-3):
+ def __init__(self, epsilon=None):
"""
Constructor for the BottleneckDistance class.
Parameters:
- epsilon (double): approximation quality (default 1e-4).
+ epsilon (double): absolute (additive) error tolerated on the distance (default is the smallest positive float), see :func:`gudhi.bottleneck_distance`.
"""
self.epsilon = epsilon
@@ -118,7 +118,8 @@ class BottleneckDistance(BaseEstimator, TransformerMixin):
"""
num_diag1 = len(X)
- if len(self.diagrams_) == len(X) and np.all([np.array_equal(self.diagrams_[i], X[i]) for i in range(len(X))]):
+ #if len(self.diagrams_) == len(X) and np.all([np.array_equal(self.diagrams_[i], X[i]) for i in range(len(X))]):
+ if X is self.diagrams_:
matrix = np.zeros((num_diag1, num_diag1))
if USE_GUDHI:
@@ -127,7 +128,7 @@ class BottleneckDistance(BaseEstimator, TransformerMixin):
matrix[i,j] = bottleneck_distance(X[i], X[j], self.epsilon)
matrix[j,i] = matrix[i,j]
else:
- print("Gudhi required---returning null matrix")
+ print("Gudhi built without CGAL: returning a null matrix")
else:
num_diag2 = len(self.diagrams_)
@@ -138,7 +139,7 @@ class BottleneckDistance(BaseEstimator, TransformerMixin):
for j in range(num_diag2):
matrix[i,j] = bottleneck_distance(X[i], self.diagrams_[j], self.epsilon)
else:
- print("Gudhi required---returning null matrix")
+ print("Gudhi built without CGAL: returning a null matrix")
Xfit = matrix
diff --git a/src/python/gudhi/representations/preprocessing.py b/src/python/gudhi/representations/preprocessing.py
index 83227ca1..a39b00e4 100644
--- a/src/python/gudhi/representations/preprocessing.py
+++ b/src/python/gudhi/representations/preprocessing.py
@@ -30,7 +30,7 @@ class BirthPersistenceTransform(BaseEstimator, TransformerMixin):
Fit the BirthPersistenceTransform class on a list of persistence diagrams (this function actually does nothing but is useful when BirthPersistenceTransform is included in a scikit-learn Pipeline).
Parameters:
- X (n x 2 numpy array): input persistence diagrams.
+ X (list of n x 2 numpy array): input persistence diagrams.
y (n x 1 array): persistence diagram labels (unused).
"""
return self
@@ -58,14 +58,15 @@ class Clamping(BaseEstimator, TransformerMixin):
"""
This is a class for clamping values. It can be used as a parameter for the DiagramScaler class, for instance if you want to clamp abscissae or ordinates of persistence diagrams.
"""
- def __init__(self, limit=np.inf):
+ def __init__(self, minimum=-np.inf, maximum=np.inf):
"""
Constructor for the Clamping class.
Parameters:
limit (double): clamping value (default np.inf).
"""
- self.limit = limit
+ self.minimum = minimum
+ self.maximum = maximum
def fit(self, X, y=None):
"""
@@ -87,8 +88,7 @@ class Clamping(BaseEstimator, TransformerMixin):
Returns:
numpy array of size n: output list of values.
"""
- Xfit = np.minimum(X, self.limit)
- #Xfit = np.where(X >= self.limit, self.limit * np.ones(X.shape), X)
+ Xfit = np.clip(X, self.minimum, self.maximum)
return Xfit
class DiagramScaler(BaseEstimator, TransformerMixin):
diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index bf32f18e..61c4fb84 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -83,7 +83,7 @@ class PersistenceImage(BaseEstimator, TransformerMixin):
class Landscape(BaseEstimator, TransformerMixin):
"""
- This is a class for computing persistence landscapes from a list of persistence diagrams. A persistence landscape is a collection of 1D piecewise-linear functions computed from the rank function associated to the persistence diagram. These piecewise-linear functions are then sampled uniformly on a given range and the corresponding vectors of samples are concatenated and returned. See http://jmlr.org/papers/v16/bubenik15a.html for more details.
+ This is a class for computing persistence landscapes from a list of persistence diagrams. A persistence landscape is a collection of 1D piecewise-linear functions computed from the rank function associated to the persistence diagram. These piecewise-linear functions are then sampled evenly on a given range and the corresponding vectors of samples are concatenated and returned. See http://jmlr.org/papers/v16/bubenik15a.html for more details.
"""
def __init__(self, num_landscapes=5, resolution=100, sample_range=[np.nan, np.nan]):
"""
@@ -92,7 +92,7 @@ class Landscape(BaseEstimator, TransformerMixin):
Parameters:
num_landscapes (int): number of piecewise-linear functions to output (default 5).
resolution (int): number of sample for all piecewise-linear functions (default 100).
- sample_range ([double, double]): minimum and maximum of all piecewise-linear function domains, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn uniformly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
+ sample_range ([double, double]): minimum and maximum of all piecewise-linear function domains, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
"""
self.num_landscapes, self.resolution, self.sample_range = num_landscapes, resolution, sample_range
@@ -136,9 +136,9 @@ class Landscape(BaseEstimator, TransformerMixin):
for j in range(num_pts_in_diag):
[px,py] = diagram[j,:2]
- min_idx = np.minimum(np.maximum(np.ceil((px - self.sample_range[0]) / step_x).astype(int), 0), self.resolution)
- mid_idx = np.minimum(np.maximum(np.ceil((0.5*(py+px) - self.sample_range[0]) / step_x).astype(int), 0), self.resolution)
- max_idx = np.minimum(np.maximum(np.ceil((py - self.sample_range[0]) / step_x).astype(int), 0), self.resolution)
+ min_idx = np.clip(np.ceil((px - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
+ mid_idx = np.clip(np.ceil((0.5*(py+px) - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
+ max_idx = np.clip(np.ceil((py - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
if min_idx < self.resolution and max_idx > 0:
@@ -165,7 +165,7 @@ class Landscape(BaseEstimator, TransformerMixin):
class Silhouette(BaseEstimator, TransformerMixin):
"""
- This is a class for computing persistence silhouettes from a list of persistence diagrams. A persistence silhouette is computed by taking a weighted average of the collection of 1D piecewise-linear functions given by the persistence landscapes, and then by uniformly sampling this average on a given range. Finally, the corresponding vector of samples is returned. See https://arxiv.org/abs/1312.0308 for more details.
+ This is a class for computing persistence silhouettes from a list of persistence diagrams. A persistence silhouette is computed by taking a weighted average of the collection of 1D piecewise-linear functions given by the persistence landscapes, and then by evenly sampling this average on a given range. Finally, the corresponding vector of samples is returned. See https://arxiv.org/abs/1312.0308 for more details.
"""
def __init__(self, weight=lambda x: 1, resolution=100, sample_range=[np.nan, np.nan]):
"""
@@ -174,7 +174,7 @@ class Silhouette(BaseEstimator, TransformerMixin):
Parameters:
weight (function): weight function for the persistence diagram points (default constant function, ie lambda x: 1). This function must be defined on 2D points, ie on lists or numpy arrays of the form [p_x,p_y].
resolution (int): number of samples for the weighted average (default 100).
- sample_range ([double, double]): minimum and maximum for the weighted average domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn uniformly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
+ sample_range ([double, double]): minimum and maximum for the weighted average domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
"""
self.weight, self.resolution, self.sample_range = weight, resolution, sample_range
@@ -219,9 +219,9 @@ class Silhouette(BaseEstimator, TransformerMixin):
[px,py] = diagram[j,:2]
weight = weights[j] / total_weight
- min_idx = np.minimum(np.maximum(np.ceil((px - self.sample_range[0]) / step_x).astype(int), 0), self.resolution)
- mid_idx = np.minimum(np.maximum(np.ceil((0.5*(py+px) - self.sample_range[0]) / step_x).astype(int), 0), self.resolution)
- max_idx = np.minimum(np.maximum(np.ceil((py - self.sample_range[0]) / step_x).astype(int), 0), self.resolution)
+ min_idx = np.clip(np.ceil((px - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
+ mid_idx = np.clip(np.ceil((0.5*(py+px) - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
+ max_idx = np.clip(np.ceil((py - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
if min_idx < self.resolution and max_idx > 0:
@@ -243,7 +243,7 @@ class Silhouette(BaseEstimator, TransformerMixin):
class BettiCurve(BaseEstimator, TransformerMixin):
"""
- This is a class for computing Betti curves from a list of persistence diagrams. A Betti curve is a 1D piecewise-constant function obtained from the rank function. It is sampled uniformly on a given range and the vector of samples is returned. See https://www.researchgate.net/publication/316604237_Time_Series_Classification_via_Topological_Data_Analysis for more details.
+ This is a class for computing Betti curves from a list of persistence diagrams. A Betti curve is a 1D piecewise-constant function obtained from the rank function. It is sampled evenly on a given range and the vector of samples is returned. See https://www.researchgate.net/publication/316604237_Time_Series_Classification_via_Topological_Data_Analysis for more details.
"""
def __init__(self, resolution=100, sample_range=[np.nan, np.nan]):
"""
@@ -251,7 +251,7 @@ class BettiCurve(BaseEstimator, TransformerMixin):
Parameters:
resolution (int): number of sample for the piecewise-constant function (default 100).
- sample_range ([double, double]): minimum and maximum of the piecewise-constant function domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn uniformly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
+ sample_range ([double, double]): minimum and maximum of the piecewise-constant function domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
"""
self.resolution, self.sample_range = resolution, sample_range
@@ -290,8 +290,8 @@ class BettiCurve(BaseEstimator, TransformerMixin):
bc = np.zeros(self.resolution)
for j in range(num_pts_in_diag):
[px,py] = diagram[j,:2]
- min_idx = np.minimum(np.maximum(np.ceil((px - self.sample_range[0]) / step_x).astype(int), 0), self.resolution)
- max_idx = np.minimum(np.maximum(np.ceil((py - self.sample_range[0]) / step_x).astype(int), 0), self.resolution)
+ min_idx = np.clip(np.ceil((px - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
+ max_idx = np.clip(np.ceil((py - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
for k in range(min_idx, max_idx):
bc[k] += 1
@@ -313,7 +313,7 @@ class Entropy(BaseEstimator, TransformerMixin):
mode (string): what entropy to compute: either "scalar" for computing the entropy statistics, or "vector" for computing the entropy summary functions (default "scalar").
normalized (bool): whether to normalize the entropy summary function (default True). Used only if **mode** = "vector".
resolution (int): number of sample for the entropy summary function (default 100). Used only if **mode** = "vector".
- sample_range ([double, double]): minimum and maximum of the entropy summary function domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn uniformly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method. Used only if **mode** = "vector".
+ sample_range ([double, double]): minimum and maximum of the entropy summary function domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method. Used only if **mode** = "vector".
"""
self.mode, self.normalized, self.resolution, self.sample_range = mode, normalized, resolution, sample_range
@@ -359,8 +359,8 @@ class Entropy(BaseEstimator, TransformerMixin):
ent = np.zeros(self.resolution)
for j in range(num_pts_in_diag):
[px,py] = orig_diagram[j,:2]
- min_idx = np.minimum(np.maximum(np.ceil((px - self.sample_range[0]) / step_x).astype(int), 0), self.resolution)
- max_idx = np.minimum(np.maximum(np.ceil((py - self.sample_range[0]) / step_x).astype(int), 0), self.resolution)
+ min_idx = np.clip(np.ceil((px - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
+ max_idx = np.clip(np.ceil((py - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
for k in range(min_idx, max_idx):
ent[k] += (-1) * new_diagram[j,1] * np.log(new_diagram[j,1])
if self.normalized: