From f97865b2f5a0457d98bfd75eea3abc23e249943a Mon Sep 17 00:00:00 2001 From: Gard Spreemann Date: Sun, 20 Dec 2020 14:48:33 +0100 Subject: More flexible Betti curve computations. Introduce a new BettiCurve2 class that can compute Betti curves on any grid (not just np.linspace ones), and can compute the grid needed to capture all values of the Betti curves. Based on feedback from PR #427. --- src/python/gudhi/representations/vector_methods.py | 152 ++++++++++++++++++++- 1 file changed, 151 insertions(+), 1 deletion(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index cdcb1fde..fda0a22d 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -1,14 +1,16 @@ # This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. # See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. -# Author(s): Mathieu Carrière, Martin Royer +# Author(s): Mathieu Carrière, Martin Royer, Gard Spreemann # # Copyright (C) 2018-2020 Inria # # Modification(s): # - 2020/06 Martin: ATOL integration +# - 2020/12 Gard: A more flexible Betti curve class capable of computing exact curves. import numpy as np from sklearn.base import BaseEstimator, TransformerMixin +from sklearn.exceptions import NotFittedError from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler from sklearn.neighbors import DistanceMetric from sklearn.metrics import pairwise @@ -350,6 +352,154 @@ class BettiCurve(BaseEstimator, TransformerMixin): """ return self.fit_transform([diag])[0,:] + +class BettiCurve2(BaseEstimator, TransformerMixin): + """ + A more flexible replacement for the BettiCurve class. + + Examples + -------- + If pd is a persistence diagram and xs is a grid such that xs[0] >= pd.min(), then the result of + >>> bc = BettiCurve2(xs) + >>> result = bc(pd) + and + >>> from scipy.interpolate import interp1d + >>> bc = BettiCurve2(None) + >>> bettis = bc.fit_transform([pd]) + >>> interp = interp1d(bc.grid_, bettis[0, :], kind="previous", fill_value="extrapolate") + >>> result = np.array(interp(xs), dtype=int) + are the same. + """ + + def __init__(self, grid = None): + """ + Constructor for the BettiCurve class. + + Parameters + ---------- + grid: 1d array or None, default=None + Filtration grid points at which to compute the Betti curves. Must be strictly ordered. Infinites are OK. If None (default), a grid will be computed that captures all the filtration value changes. + + Attributes + ---------- + grid_: 1d array + Contains the compute grid after fit or fit_transform. + """ + + self.grid_ = np.array(grid) + + + def fit(self, X, y = None): + """ + Compute a filtration grid that captures all changes in Betti numbers for all the given persistence diagrams. + + Parameters + ---------- + X: list of 2d arrays + Persistence diagrams. + + y: None. + Ignored. + """ + + events = np.unique(np.concatenate([pd.flatten() for pd in X], axis=0)) + + if len(events) == 0: + self.grid_ = np.array([-np.inf]) + else: + self.grid_ = np.array(events) + + return self + + + def fit_transform(self, X): + """ + Find a sampling grid that captures all changes in Betti numbers, and compute those Betti numbers. The result is the same as fit(X) followed by transform(X), but potentially faster. + """ + + N = len(X) + + events = np.concatenate([pd.flatten(order="F") for pd in X], axis=0) + sorting = np.argsort(events) + offsets = np.zeros(1 + N, dtype=int) + for i in range(0, N): + offsets[i+1] = offsets[i] + 2*X[i].shape[0] + starts = offsets[0:N] + ends = offsets[1:N + 1] - 1 + + bettis = [[0] for i in range(0, N)] + if len(sorting) == 0: + xs = [-np.inf] + else: + xs = [events[sorting[0]]] + + for i in sorting: + j = np.searchsorted(ends, i) + delta = 1 if i - starts[j] < len(X[j]) else -1 + if events[i] == xs[-1]: + bettis[j][-1] += delta + else: + xs.append(events[i]) + for k in range(0, j): + bettis[k].append(bettis[k][-1]) + bettis[j].append(bettis[j][-1] + delta) + for k in range(j+1, N): + bettis[k].append(bettis[k][-1]) + + self.grid_ = np.array(xs) + return np.array(bettis, dtype=int) + + + def transform(self, X): + """ + Compute Betti curves. + + Parameters + ---------- + X: list of 2d arrays + Persistence diagrams. + + Returns + ------- + (len(X))x(len(self.grid_)) array of ints + Betti numbers of the given persistence diagrams at the grid points given in self.grid_. + """ + + if self.grid_ is None: + raise NotFittedError("Not fitted. You need to call fit or construct with a chosen sampling grid.") + + N = len(X) + + events = np.concatenate([pd.flatten(order="F") for pd in X], axis=0) + sorting = np.argsort(events) + offsets = np.zeros(1 + N, dtype=int) + for i in range(0, N): + offsets[i+1] = offsets[i] + 2*X[i].shape[0] + starts = offsets[0:N] + ends = offsets[1:N + 1] - 1 + + bettis = [[0] for i in range(0, N)] + + i = 0 + for x in self.grid_: + while i < len(sorting) and events[sorting[i]] <= x: + j = np.searchsorted(ends, sorting[i]) + delta = 1 if sorting[i] - starts[j] < len(X[j]) else -1 + bettis[j][-1] += delta + i += 1 + for k in range(0, N): + bettis[k].append(bettis[k][-1]) + + return np.array(bettis, dtype=int)[:, 0:-1] + + + def __call__(self, diag): + """ + Shorthand for transform on a single persistence diagram. + """ + return self.transform([diag])[0, :] + + class Entropy(BaseEstimator, TransformerMixin): """ This is a class for computing persistence entropy. Persistence entropy is a statistic for persistence diagrams inspired from Shannon entropy. This statistic can also be used to compute a feature vector, called the entropy summary function. See https://arxiv.org/pdf/1803.08304.pdf for more details. Note that a previous implementation was contributed by Manuel Soriano-Trigueros. -- cgit v1.2.3 From ccb63b32bc65c0a6030dfab0b70ece62d9eff988 Mon Sep 17 00:00:00 2001 From: Gard Spreemann Date: Sun, 28 Feb 2021 16:14:54 +0100 Subject: Move documentation string to class --- src/python/gudhi/representations/vector_methods.py | 24 +++++++++------------- 1 file changed, 10 insertions(+), 14 deletions(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index fda0a22d..13630360 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -357,6 +357,16 @@ class BettiCurve2(BaseEstimator, TransformerMixin): """ A more flexible replacement for the BettiCurve class. + Parameters + ---------- + grid: 1d array or None, default=None + Filtration grid points at which to compute the Betti curves. Must be strictly ordered. Infinites are OK. If None (default), a grid will be computed that captures all the filtration value changes. + + Attributes + ---------- + grid_: 1d array + Contains the compute grid after fit or fit_transform. + Examples -------- If pd is a persistence diagram and xs is a grid such that xs[0] >= pd.min(), then the result of @@ -372,20 +382,6 @@ class BettiCurve2(BaseEstimator, TransformerMixin): """ def __init__(self, grid = None): - """ - Constructor for the BettiCurve class. - - Parameters - ---------- - grid: 1d array or None, default=None - Filtration grid points at which to compute the Betti curves. Must be strictly ordered. Infinites are OK. If None (default), a grid will be computed that captures all the filtration value changes. - - Attributes - ---------- - grid_: 1d array - Contains the compute grid after fit or fit_transform. - """ - self.grid_ = np.array(grid) -- cgit v1.2.3 From fddeb5724fe2e7f1f37476c5e3cfade992a4edec Mon Sep 17 00:00:00 2001 From: Gard Spreemann Date: Sun, 28 Feb 2021 18:40:46 +0100 Subject: Behave in line with scikit-learn guidelines According to [1], we should in particular not do any validation in the constructor, and fit/fit_transform should always update underscored attributes (self.grid_ in this case). We still want to allow for a user-defined, data-independent grid, so we make this a separate parameter predefined_grid. [1] https://scikit-learn.org/stable/developers/develop.html --- src/python/gudhi/representations/vector_methods.py | 86 ++++++++++++---------- 1 file changed, 46 insertions(+), 40 deletions(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 13630360..62a467c0 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -359,13 +359,13 @@ class BettiCurve2(BaseEstimator, TransformerMixin): Parameters ---------- - grid: 1d array or None, default=None - Filtration grid points at which to compute the Betti curves. Must be strictly ordered. Infinites are OK. If None (default), a grid will be computed that captures all the filtration value changes. + predefined_grid: 1d array or None, default=None + Predefined filtration grid points at which to compute the Betti curves. Must be strictly ordered. Infinities are OK. If None (default), a grid will be computed that captures all changes in Betti numbers in the provided data. Attributes ---------- grid_: 1d array - Contains the compute grid after fit or fit_transform. + The grid on which the Betti numbers are computed. If predefined_grid was specified, grid_ will always be that grid, independently of data. If not, the grid is fitted to capture all filtration values at which the Betti numbers change. Examples -------- @@ -381,13 +381,17 @@ class BettiCurve2(BaseEstimator, TransformerMixin): are the same. """ - def __init__(self, grid = None): - self.grid_ = np.array(grid) + def __init__(self, predefined_grid = None): + self.predefined_grid = predefined_grid + + + def is_fitted(self): + return hasattr(self, "grid_") def fit(self, X, y = None): """ - Compute a filtration grid that captures all changes in Betti numbers for all the given persistence diagrams. + Compute a filtration grid that captures all changes in Betti numbers for all the given persistence diagrams, unless a predefined grid was provided. Parameters ---------- @@ -398,12 +402,11 @@ class BettiCurve2(BaseEstimator, TransformerMixin): Ignored. """ - events = np.unique(np.concatenate([pd.flatten() for pd in X], axis=0)) - - if len(events) == 0: - self.grid_ = np.array([-np.inf]) - else: + if self.predefined_grid is None: + events = np.unique(np.concatenate([pd.flatten() for pd in X] + [[-np.inf]], axis=0)) self.grid_ = np.array(events) + else: + self.grid_ = np.array(self.predefined_grid) return self @@ -413,37 +416,39 @@ class BettiCurve2(BaseEstimator, TransformerMixin): Find a sampling grid that captures all changes in Betti numbers, and compute those Betti numbers. The result is the same as fit(X) followed by transform(X), but potentially faster. """ - N = len(X) + if self.predefined_grid is None: + N = len(X) - events = np.concatenate([pd.flatten(order="F") for pd in X], axis=0) - sorting = np.argsort(events) - offsets = np.zeros(1 + N, dtype=int) - for i in range(0, N): - offsets[i+1] = offsets[i] + 2*X[i].shape[0] - starts = offsets[0:N] - ends = offsets[1:N + 1] - 1 + events = np.concatenate([pd.flatten(order="F") for pd in X], axis=0) + sorting = np.argsort(events) + offsets = np.zeros(1 + N, dtype=int) + for i in range(0, N): + offsets[i+1] = offsets[i] + 2*X[i].shape[0] + starts = offsets[0:N] + ends = offsets[1:N + 1] - 1 - bettis = [[0] for i in range(0, N)] - if len(sorting) == 0: xs = [-np.inf] - else: - xs = [events[sorting[0]]] + bettis = [[0] for i in range(0, N)] + + for i in sorting: + j = np.searchsorted(ends, i) + delta = 1 if i - starts[j] < len(X[j]) else -1 + if events[i] == xs[-1]: + bettis[j][-1] += delta + else: + xs.append(events[i]) + for k in range(0, j): + bettis[k].append(bettis[k][-1]) + bettis[j].append(bettis[j][-1] + delta) + for k in range(j+1, N): + bettis[k].append(bettis[k][-1]) + + self.grid_ = np.array(xs) + return np.array(bettis, dtype=int) - for i in sorting: - j = np.searchsorted(ends, i) - delta = 1 if i - starts[j] < len(X[j]) else -1 - if events[i] == xs[-1]: - bettis[j][-1] += delta - else: - xs.append(events[i]) - for k in range(0, j): - bettis[k].append(bettis[k][-1]) - bettis[j].append(bettis[j][-1] + delta) - for k in range(j+1, N): - bettis[k].append(bettis[k][-1]) - - self.grid_ = np.array(xs) - return np.array(bettis, dtype=int) + else: + self.grid_ = self.predefined_grid + return self.transform(X) def transform(self, X): @@ -461,8 +466,8 @@ class BettiCurve2(BaseEstimator, TransformerMixin): Betti numbers of the given persistence diagrams at the grid points given in self.grid_. """ - if self.grid_ is None: - raise NotFittedError("Not fitted. You need to call fit or construct with a chosen sampling grid.") + if not self.is_fitted(): + raise NotFittedError("Not fitted.") N = len(X) @@ -496,6 +501,7 @@ class BettiCurve2(BaseEstimator, TransformerMixin): return self.transform([diag])[0, :] + class Entropy(BaseEstimator, TransformerMixin): """ This is a class for computing persistence entropy. Persistence entropy is a statistic for persistence diagrams inspired from Shannon entropy. This statistic can also be used to compute a feature vector, called the entropy summary function. See https://arxiv.org/pdf/1803.08304.pdf for more details. Note that a previous implementation was contributed by Manuel Soriano-Trigueros. -- cgit v1.2.3 From 482c36c28b1feaf65a2f26b0ee9ad2f4ddfae86c Mon Sep 17 00:00:00 2001 From: Gard Spreemann Date: Sun, 28 Feb 2021 22:56:19 +0100 Subject: More precise interpolation invariant documentation text --- src/python/gudhi/representations/vector_methods.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 62a467c0..a82c0d3c 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -369,7 +369,7 @@ class BettiCurve2(BaseEstimator, TransformerMixin): Examples -------- - If pd is a persistence diagram and xs is a grid such that xs[0] >= pd.min(), then the result of + If pd is a persistence diagram and xs is a nonempty grid of finite values such that xs[0] >= pd.min(), then the result of >>> bc = BettiCurve2(xs) >>> result = bc(pd) and -- cgit v1.2.3 From 79f002efaa1584e89f85928e464dd73ea64593b6 Mon Sep 17 00:00:00 2001 From: Gard Spreemann Date: Sun, 28 Feb 2021 23:08:17 +0100 Subject: Elaborate doc string --- src/python/gudhi/representations/vector_methods.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index a82c0d3c..5133a64c 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -355,7 +355,7 @@ class BettiCurve(BaseEstimator, TransformerMixin): class BettiCurve2(BaseEstimator, TransformerMixin): """ - A more flexible replacement for the BettiCurve class. + A more flexible replacement for the BettiCurve class. There are two modes of operation: with a predefined grid, and without. With a predefined grid, the class computes the Betti numbers at those grid points. Without a predefined grid, it can be fit to a list of persistence diagrams and produce a grid that consists of (at least) the filtration values at which at least one of those persistence diagrams chance Betti numbers, and then compute the Betti numbers at those grid points. In the latter mode, the exact Betti curve is computed for the entire real line. Parameters ---------- -- cgit v1.2.3 From 7d3fba5d1561b3241b914583ac420434e788e27f Mon Sep 17 00:00:00 2001 From: Gard Spreemann Date: Wed, 28 Apr 2021 16:11:34 +0200 Subject: Handle an empty list of persistence diagrams --- src/python/gudhi/representations/vector_methods.py | 6 ++++++ src/python/test/test_betti_curve_representations.py | 15 +++++++++++++++ 2 files changed, 21 insertions(+) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 5133a64c..82f071d7 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -417,6 +417,9 @@ class BettiCurve2(BaseEstimator, TransformerMixin): """ if self.predefined_grid is None: + if not X: + X = [np.zeros((0, 2))] + N = len(X) events = np.concatenate([pd.flatten(order="F") for pd in X], axis=0) @@ -469,6 +472,9 @@ class BettiCurve2(BaseEstimator, TransformerMixin): if not self.is_fitted(): raise NotFittedError("Not fitted.") + if not X: + X = [np.zeros((0, 2))] + N = len(X) events = np.concatenate([pd.flatten(order="F") for pd in X], axis=0) diff --git a/src/python/test/test_betti_curve_representations.py b/src/python/test/test_betti_curve_representations.py index 5b95fa2c..475839ee 100755 --- a/src/python/test/test_betti_curve_representations.py +++ b/src/python/test/test_betti_curve_representations.py @@ -37,3 +37,18 @@ def test_betti_curve_is_irregular_betti_curve_followed_by_interpolation(): interp = scipy.interpolate.interp1d(bc.grid_, bettis[i, :], kind="previous", fill_value="extrapolate") bettis_interp = np.array(interp(grid), dtype=int) assert((bettis_interp == bettis_gridded).all()) + + +def test_empty_with_predefined_grid(): + random_grid = np.sort(np.random.uniform(0, 1, 100)) + bc = BettiCurve2(random_grid) + bettis = bc.fit_transform([]) + assert((bc.grid_ == random_grid).all()) + assert((bettis == 0).all()) + + +def test_empty(): + bc = BettiCurve2() + bettis = bc.fit_transform([]) + assert(bc.grid_ == [-np.inf]) + assert((bettis == 0).all()) -- cgit v1.2.3 From 9841a3c845905c9b278ddb7828260a3d6fa5fce7 Mon Sep 17 00:00:00 2001 From: Gard Spreemann Date: Fri, 30 Apr 2021 15:08:19 +0200 Subject: Allow specifying range for uniform predefined grid for compatibility with old class --- src/python/gudhi/representations/vector_methods.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 82f071d7..86afaa1c 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -359,8 +359,8 @@ class BettiCurve2(BaseEstimator, TransformerMixin): Parameters ---------- - predefined_grid: 1d array or None, default=None - Predefined filtration grid points at which to compute the Betti curves. Must be strictly ordered. Infinities are OK. If None (default), a grid will be computed that captures all changes in Betti numbers in the provided data. + predefined_grid: 1d array, triple or None, default=None + Predefined filtration grid points at which to compute the Betti curves. Must be strictly ordered. Infinities are OK. If a triple of the form (l, u, n), the grid will be uniform from l to u in n steps. If None (default), a grid will be computed that captures all changes in Betti numbers in the provided data. Attributes ---------- @@ -382,7 +382,13 @@ class BettiCurve2(BaseEstimator, TransformerMixin): """ def __init__(self, predefined_grid = None): - self.predefined_grid = predefined_grid + if isinstance(predefined_grid, tuple): + if len(predefined_grid) != 3: + raise ValueError("Expected array, None or triple.") + + self.predefined_grid = np.linspace(predefined_grid[0], predefined_grid[1], predefined_grid[2]) + else: + self.predefined_grid = predefined_grid def is_fitted(self): -- cgit v1.2.3 From 09fe9bd25d9212fa42b77570a0ef80bc97d742be Mon Sep 17 00:00:00 2001 From: Gard Spreemann Date: Fri, 30 Apr 2021 15:08:56 +0200 Subject: Replace old BettiCurve class --- src/python/gudhi/representations/vector_methods.py | 67 +--------------------- 1 file changed, 1 insertion(+), 66 deletions(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 86afaa1c..bdbaa175 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -287,73 +287,8 @@ class Silhouette(BaseEstimator, TransformerMixin): """ return self.fit_transform([diag])[0,:] -class BettiCurve(BaseEstimator, TransformerMixin): - """ - This is a class for computing Betti curves from a list of persistence diagrams. A Betti curve is a 1D piecewise-constant function obtained from the rank function. It is sampled evenly on a given range and the vector of samples is returned. See https://www.researchgate.net/publication/316604237_Time_Series_Classification_via_Topological_Data_Analysis for more details. - """ - def __init__(self, resolution=100, sample_range=[np.nan, np.nan]): - """ - Constructor for the BettiCurve class. - - Parameters: - resolution (int): number of sample for the piecewise-constant function (default 100). - sample_range ([double, double]): minimum and maximum of the piecewise-constant function domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method. - """ - self.resolution, self.sample_range = resolution, sample_range - - def fit(self, X, y=None): - """ - Fit the BettiCurve class on a list of persistence diagrams: if any of the values in **sample_range** is numpy.nan, replace it with the corresponding value computed on the given list of persistence diagrams. - - Parameters: - X (list of n x 2 numpy arrays): input persistence diagrams. - y (n x 1 array): persistence diagram labels (unused). - """ - if np.isnan(np.array(self.sample_range)).any(): - pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) - [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] - self.sample_range = np.where(np.isnan(np.array(self.sample_range)), np.array([mx, My]), np.array(self.sample_range)) - return self - - def transform(self, X): - """ - Compute the Betti curve for each persistence diagram individually and concatenate the results. - - Parameters: - X (list of n x 2 numpy arrays): input persistence diagrams. - - Returns: - numpy array with shape (number of diagrams) x (**resolution**): output Betti curves. - """ - Xfit = [] - x_values = np.linspace(self.sample_range[0], self.sample_range[1], self.resolution) - step_x = x_values[1] - x_values[0] - - for diagram in X: - diagram_int = np.clip(np.ceil((diagram[:,:2] - self.sample_range[0]) / step_x), 0, self.resolution).astype(int) - bc = np.zeros(self.resolution) - for interval in diagram_int: - bc[interval[0]:interval[1]] += 1 - Xfit.append(np.reshape(bc,[1,-1])) - - Xfit = np.concatenate(Xfit, 0) - - return Xfit - def __call__(self, diag): - """ - Apply BettiCurve on a single persistence diagram and outputs the result. - - Parameters: - diag (n x 2 numpy array): input persistence diagram. - - Returns: - numpy array with shape (**resolution**): output Betti curve. - """ - return self.fit_transform([diag])[0,:] - - -class BettiCurve2(BaseEstimator, TransformerMixin): +class BettiCurve(BaseEstimator, TransformerMixin): """ A more flexible replacement for the BettiCurve class. There are two modes of operation: with a predefined grid, and without. With a predefined grid, the class computes the Betti numbers at those grid points. Without a predefined grid, it can be fit to a list of persistence diagrams and produce a grid that consists of (at least) the filtration values at which at least one of those persistence diagrams chance Betti numbers, and then compute the Betti numbers at those grid points. In the latter mode, the exact Betti curve is computed for the entire real line. -- cgit v1.2.3 From 241cc1422e9362c23db1c4c25ba8b63f88a1153f Mon Sep 17 00:00:00 2001 From: Gard Spreemann Date: Sun, 16 May 2021 14:21:05 +0200 Subject: Update doc string to reflect new class name --- src/python/gudhi/representations/vector_methods.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index bdbaa175..7e615b70 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -290,7 +290,7 @@ class Silhouette(BaseEstimator, TransformerMixin): class BettiCurve(BaseEstimator, TransformerMixin): """ - A more flexible replacement for the BettiCurve class. There are two modes of operation: with a predefined grid, and without. With a predefined grid, the class computes the Betti numbers at those grid points. Without a predefined grid, it can be fit to a list of persistence diagrams and produce a grid that consists of (at least) the filtration values at which at least one of those persistence diagrams chance Betti numbers, and then compute the Betti numbers at those grid points. In the latter mode, the exact Betti curve is computed for the entire real line. + Compute Betti curves from persistence diagrams. There are two modes of operation: with a predefined grid, and without. With a predefined grid, the class computes the Betti numbers at those grid points. Without a predefined grid, it can be fit to a list of persistence diagrams and produce a grid that consists of (at least) the filtration values at which at least one of those persistence diagrams chance Betti numbers, and then compute the Betti numbers at those grid points. In the latter mode, the exact Betti curve is computed for the entire real line. Parameters ---------- @@ -305,11 +305,11 @@ class BettiCurve(BaseEstimator, TransformerMixin): Examples -------- If pd is a persistence diagram and xs is a nonempty grid of finite values such that xs[0] >= pd.min(), then the result of - >>> bc = BettiCurve2(xs) + >>> bc = BettiCurve(xs) >>> result = bc(pd) and >>> from scipy.interpolate import interp1d - >>> bc = BettiCurve2(None) + >>> bc = BettiCurve(None) >>> bettis = bc.fit_transform([pd]) >>> interp = interp1d(bc.grid_, bettis[0, :], kind="previous", fill_value="extrapolate") >>> result = np.array(interp(xs), dtype=int) -- cgit v1.2.3 From ec55f3e92e96951508f4b8b5b3e1704d33d1d015 Mon Sep 17 00:00:00 2001 From: Gard Spreemann Date: Sun, 16 May 2021 14:21:32 +0200 Subject: Typo --- src/python/gudhi/representations/vector_methods.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 7e615b70..814b6081 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -290,7 +290,7 @@ class Silhouette(BaseEstimator, TransformerMixin): class BettiCurve(BaseEstimator, TransformerMixin): """ - Compute Betti curves from persistence diagrams. There are two modes of operation: with a predefined grid, and without. With a predefined grid, the class computes the Betti numbers at those grid points. Without a predefined grid, it can be fit to a list of persistence diagrams and produce a grid that consists of (at least) the filtration values at which at least one of those persistence diagrams chance Betti numbers, and then compute the Betti numbers at those grid points. In the latter mode, the exact Betti curve is computed for the entire real line. + Compute Betti curves from persistence diagrams. There are two modes of operation: with a predefined grid, and without. With a predefined grid, the class computes the Betti numbers at those grid points. Without a predefined grid, it can be fit to a list of persistence diagrams and produce a grid that consists of (at least) the filtration values at which at least one of those persistence diagrams changes Betti numbers, and then compute the Betti numbers at those grid points. In the latter mode, the exact Betti curve is computed for the entire real line. Parameters ---------- -- cgit v1.2.3 From ec06a9b9ae0a9ff1897249dcbc2b497764f54aaf Mon Sep 17 00:00:00 2001 From: Vincent Rouvreau Date: Mon, 18 Oct 2021 17:01:02 +0200 Subject: First part of the fix --- src/python/gudhi/cubical_complex.pyx | 7 ++- src/python/gudhi/periodic_cubical_complex.pyx | 7 ++- src/python/gudhi/representations/vector_methods.py | 60 ++++++++++++++-------- src/python/gudhi/simplex_tree.pyx | 26 ++++++---- src/python/test/test_cubical_complex.py | 25 +++++++++ src/python/test/test_representations.py | 37 +++++++++++++ 6 files changed, 129 insertions(+), 33 deletions(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/cubical_complex.pyx b/src/python/gudhi/cubical_complex.pyx index 97c69a2d..04569bd8 100644 --- a/src/python/gudhi/cubical_complex.pyx +++ b/src/python/gudhi/cubical_complex.pyx @@ -281,4 +281,9 @@ cdef class CubicalComplex: launched first. """ assert self.pcohptr != NULL, "compute_persistence() must be called before persistence_intervals_in_dimension()" - return np.array(self.pcohptr.intervals_in_dimension(dimension)) + piid = np.array(self.pcohptr.intervals_in_dimension(dimension)) + # Workaround https://github.com/GUDHI/gudhi-devel/issues/507 + if piid.shape[0] == 0: + return np.empty(shape = [0, 2]) + else: + return piid diff --git a/src/python/gudhi/periodic_cubical_complex.pyx b/src/python/gudhi/periodic_cubical_complex.pyx index ef1d3080..bd91ccde 100644 --- a/src/python/gudhi/periodic_cubical_complex.pyx +++ b/src/python/gudhi/periodic_cubical_complex.pyx @@ -280,4 +280,9 @@ cdef class PeriodicCubicalComplex: launched first. """ assert self.pcohptr != NULL, "compute_persistence() must be called before persistence_intervals_in_dimension()" - return np.array(self.pcohptr.intervals_in_dimension(dimension)) + piid = np.array(self.pcohptr.intervals_in_dimension(dimension)) + # Workaround https://github.com/GUDHI/gudhi-devel/issues/507 + if piid.shape[0] == 0: + return np.empty(shape = [0, 2]) + else: + return piid diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 84bc99a2..711c32a7 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -44,11 +44,15 @@ class PersistenceImage(BaseEstimator, TransformerMixin): X (list of n x 2 numpy arrays): input persistence diagrams. y (n x 1 array): persistence diagram labels (unused). """ - if np.isnan(np.array(self.im_range)).any(): - new_X = BirthPersistenceTransform().fit_transform(X) - pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(new_X,y) - [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] - self.im_range = np.where(np.isnan(np.array(self.im_range)), np.array([mx, Mx, my, My]), np.array(self.im_range)) + try: + if np.isnan(np.array(self.im_range)).any(): + new_X = BirthPersistenceTransform().fit_transform(X) + pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(new_X,y) + [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] + self.im_range = np.where(np.isnan(np.array(self.im_range)), np.array([mx, Mx, my, My]), np.array(self.im_range)) + except ValueError: + # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 + pass return self def transform(self, X): @@ -120,9 +124,13 @@ class Landscape(BaseEstimator, TransformerMixin): y (n x 1 array): persistence diagram labels (unused). """ if self.nan_in_range.any(): - pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) - [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] - self.sample_range = np.where(self.nan_in_range, np.array([mx, My]), np.array(self.sample_range)) + try: + pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) + [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] + self.sample_range = np.where(self.nan_in_range, np.array([mx, My]), np.array(self.sample_range)) + except ValueError: + # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 + pass return self def transform(self, X): @@ -218,10 +226,14 @@ class Silhouette(BaseEstimator, TransformerMixin): X (list of n x 2 numpy arrays): input persistence diagrams. y (n x 1 array): persistence diagram labels (unused). """ - if np.isnan(np.array(self.sample_range)).any(): - pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) - [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] - self.sample_range = np.where(np.isnan(np.array(self.sample_range)), np.array([mx, My]), np.array(self.sample_range)) + try: + if np.isnan(np.array(self.sample_range)).any(): + pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) + [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] + self.sample_range = np.where(np.isnan(np.array(self.sample_range)), np.array([mx, My]), np.array(self.sample_range)) + except ValueError: + # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 + pass return self def transform(self, X): @@ -307,10 +319,14 @@ class BettiCurve(BaseEstimator, TransformerMixin): X (list of n x 2 numpy arrays): input persistence diagrams. y (n x 1 array): persistence diagram labels (unused). """ - if np.isnan(np.array(self.sample_range)).any(): - pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) - [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] - self.sample_range = np.where(np.isnan(np.array(self.sample_range)), np.array([mx, My]), np.array(self.sample_range)) + try: + if np.isnan(np.array(self.sample_range)).any(): + pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) + [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] + self.sample_range = np.where(np.isnan(np.array(self.sample_range)), np.array([mx, My]), np.array(self.sample_range)) + except ValueError: + # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 + pass return self def transform(self, X): @@ -374,10 +390,14 @@ class Entropy(BaseEstimator, TransformerMixin): X (list of n x 2 numpy arrays): input persistence diagrams. y (n x 1 array): persistence diagram labels (unused). """ - if np.isnan(np.array(self.sample_range)).any(): - pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) - [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] - self.sample_range = np.where(np.isnan(np.array(self.sample_range)), np.array([mx, My]), np.array(self.sample_range)) + try: + if np.isnan(np.array(self.sample_range)).any(): + pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) + [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] + self.sample_range = np.where(np.isnan(np.array(self.sample_range)), np.array([mx, My]), np.array(self.sample_range)) + except ValueError: + # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 + pass return self def transform(self, X): diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index 9c51cb46..e9bac036 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -9,8 +9,7 @@ from cython.operator import dereference, preincrement from libc.stdint cimport intptr_t -import numpy -from numpy import array as np_array +import numpy as np cimport gudhi.simplex_tree __author__ = "Vincent Rouvreau" @@ -542,7 +541,12 @@ cdef class SimplexTree: function to be launched first. """ assert self.pcohptr != NULL, "compute_persistence() must be called before persistence_intervals_in_dimension()" - return np_array(self.pcohptr.intervals_in_dimension(dimension)) + piid = np.array(self.pcohptr.intervals_in_dimension(dimension)) + # Workaround https://github.com/GUDHI/gudhi-devel/issues/507 + if piid.shape[0] == 0: + return np.empty(shape = [0, 2]) + else: + return piid def persistence_pairs(self): """This function returns a list of persistence birth and death simplices pairs. @@ -583,8 +587,8 @@ cdef class SimplexTree: """ assert self.pcohptr != NULL, "lower_star_persistence_generators() requires that persistence() be called first." gen = self.pcohptr.lower_star_generators() - normal = [np_array(d).reshape(-1,2) for d in gen.first] - infinite = [np_array(d) for d in gen.second] + normal = [np.array(d).reshape(-1,2) for d in gen.first] + infinite = [np.array(d) for d in gen.second] return (normal, infinite) def flag_persistence_generators(self): @@ -602,19 +606,19 @@ cdef class SimplexTree: assert self.pcohptr != NULL, "flag_persistence_generators() requires that persistence() be called first." gen = self.pcohptr.flag_generators() if len(gen.first) == 0: - normal0 = numpy.empty((0,3)) + normal0 = np.empty((0,3)) normals = [] else: l = iter(gen.first) - normal0 = np_array(next(l)).reshape(-1,3) - normals = [np_array(d).reshape(-1,4) for d in l] + normal0 = np.array(next(l)).reshape(-1,3) + normals = [np.array(d).reshape(-1,4) for d in l] if len(gen.second) == 0: - infinite0 = numpy.empty(0) + infinite0 = np.empty(0) infinites = [] else: l = iter(gen.second) - infinite0 = np_array(next(l)) - infinites = [np_array(d).reshape(-1,2) for d in l] + infinite0 = np.array(next(l)) + infinites = [np.array(d).reshape(-1,2) for d in l] return (normal0, normals, infinite0, infinites) def collapse_edges(self, nb_iterations = 1): diff --git a/src/python/test/test_cubical_complex.py b/src/python/test/test_cubical_complex.py index d0e4e9e8..29d559b3 100755 --- a/src/python/test/test_cubical_complex.py +++ b/src/python/test/test_cubical_complex.py @@ -174,3 +174,28 @@ def test_periodic_cofaces_of_persistence_pairs_when_pd_has_no_paired_birth_and_d assert np.array_equal(pairs[1][0], np.array([0])) assert np.array_equal(pairs[1][1], np.array([0, 1])) assert np.array_equal(pairs[1][2], np.array([1])) + +def test_cubical_persistence_intervals_in_dimension(): + cub = CubicalComplex( + dimensions=[3, 3], + top_dimensional_cells=[1, 2, 3, 4, 5, 6, 7, 8, 9], + ) + cub.compute_persistence() + H0 = cub.persistence_intervals_in_dimension(0) + assert np.array_equal(H0, np.array([[ 1., float("inf")]])) + assert cub.persistence_intervals_in_dimension(1).shape == (0, 2) + +def test_periodic_cubical_persistence_intervals_in_dimension(): + cub = PeriodicCubicalComplex( + dimensions=[3, 3], + top_dimensional_cells=[1, 2, 3, 4, 5, 6, 7, 8, 9], + periodic_dimensions = [True, True] + ) + cub.compute_persistence() + H0 = cub.persistence_intervals_in_dimension(0) + assert np.array_equal(H0, np.array([[ 1., float("inf")]])) + H1 = cub.persistence_intervals_in_dimension(1) + assert np.array_equal(H1, np.array([[ 3., float("inf")], [ 7., float("inf")]])) + H2 = cub.persistence_intervals_in_dimension(2) + assert np.array_equal(H2, np.array([[ 9., float("inf")]])) + assert cub.persistence_intervals_in_dimension(3).shape == (0, 2) diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py index cda1a15b..c1f4df12 100755 --- a/src/python/test/test_representations.py +++ b/src/python/test/test_representations.py @@ -6,6 +6,12 @@ import pytest from sklearn.cluster import KMeans +from gudhi.representations import (DiagramSelector, Clamping, Landscape, Silhouette, BettiCurve, ComplexPolynomial,\ + TopologicalVector, DiagramScaler, BirthPersistenceTransform,\ + PersistenceImage, PersistenceWeightedGaussianKernel, Entropy, \ + PersistenceScaleSpaceKernel, SlicedWassersteinDistance,\ + SlicedWassersteinKernel, PersistenceFisherKernel, WassersteinDistance) + def test_representations_examples(): # Disable graphics for testing purposes @@ -98,3 +104,34 @@ def test_infinity(): assert c[1] == 0 assert c[7] == 3 assert c[9] == 2 + +def pow(n): + return lambda x: np.power(x[1]-x[0],n) + +def test_vectorization_empty_diagrams(): + empty_diag = np.empty(shape = [0, 2]) + Landscape(resolution=1000)(empty_diag) + Silhouette(resolution=1000, weight=pow(2))(empty_diag) + BettiCurve(resolution=1000)(empty_diag) + ComplexPolynomial(threshold=-1, polynomial_type="T")(empty_diag) + TopologicalVector(threshold=-1)(empty_diag) + PersistenceImage(bandwidth=.1, weight=lambda x: x[1], im_range=[0,1,0,1], resolution=[100,100])(empty_diag) + #Entropy(mode="scalar")(empty_diag) + #Entropy(mode="vector", normalized=False)(empty_diag) + +#def arctan(C,p): +# return lambda x: C*np.arctan(np.power(x[1], p)) +# +#def test_kernel_empty_diagrams(): +# empty_diag = np.empty(shape = [0, 2]) +# PersistenceWeightedGaussianKernel(bandwidth=1., kernel_approx=None, weight=arctan(1.,1.))(empty_diag, empty_diag) +# PersistenceWeightedGaussianKernel(kernel_approx=RBFSampler(gamma=1./2, n_components=100000).fit(np.ones([1,2])), weight=arctan(1.,1.))(empty_diag, empty_diag) +# PersistenceScaleSpaceKernel(bandwidth=1.)(empty_diag, empty_diag) +# PersistenceScaleSpaceKernel(kernel_approx=RBFSampler(gamma=1./2, n_components=100000).fit(np.ones([1,2])))(empty_diag, empty_diag) +# SlicedWassersteinDistance(num_directions=100)(empty_diag, empty_diag) +# SlicedWassersteinKernel(num_directions=100, bandwidth=1.)(empty_diag, empty_diag) +# WassersteinDistance(order=2, internal_p=2, mode="pot")(empty_diag, empty_diag) +# WassersteinDistance(order=2, internal_p=2, mode="hera", delta=0.0001)(empty_diag, empty_diag) +# BottleneckDistance(epsilon=.001)(empty_diag, empty_diag) +# PersistenceFisherKernel(bandwidth_fisher=1., bandwidth=1.)(empty_diag, empty_diag) +# PersistenceFisherKernel(bandwidth_fisher=1., bandwidth=1., kernel_approx=RBFSampler(gamma=1./2, n_components=100000).fit(np.ones([1,2])))(empty_diag, empty_diag) -- cgit v1.2.3 From e4122147ee4643dbca6c65efebf83eb2adad6aec Mon Sep 17 00:00:00 2001 From: Vincent Rouvreau Date: Wed, 20 Oct 2021 11:31:00 +0200 Subject: Make Entropy work and also fix a bug in the loop --- src/python/gudhi/representations/vector_methods.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 711c32a7..47c5224c 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -416,9 +416,12 @@ class Entropy(BaseEstimator, TransformerMixin): new_X = BirthPersistenceTransform().fit_transform(X) for i in range(num_diag): - orig_diagram, diagram, num_pts_in_diag = X[i], new_X[i], X[i].shape[0] - new_diagram = DiagramScaler(use=True, scalers=[([1], MaxAbsScaler())]).fit_transform([diagram])[0] + try: + new_diagram = DiagramScaler(use=True, scalers=[([1], MaxAbsScaler())]).fit_transform([diagram])[0] + except ValueError: + # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 + new_diagram = np.empty(shape = [0, 2]) if self.mode == "scalar": ent = - np.sum( np.multiply(new_diagram[:,1], np.log(new_diagram[:,1])) ) @@ -432,12 +435,11 @@ class Entropy(BaseEstimator, TransformerMixin): max_idx = np.clip(np.ceil((py - self.sample_range[0]) / step_x).astype(int), 0, self.resolution) for k in range(min_idx, max_idx): ent[k] += (-1) * new_diagram[j,1] * np.log(new_diagram[j,1]) - if self.normalized: - ent = ent / np.linalg.norm(ent, ord=1) - Xfit.append(np.reshape(ent,[1,-1])) - - Xfit = np.concatenate(Xfit, 0) + if self.normalized: + ent = ent / np.linalg.norm(ent, ord=1) + Xfit.append(np.reshape(ent,[1,-1])) + Xfit = np.concatenate(Xfit, axis=0) return Xfit def __call__(self, diag): -- cgit v1.2.3 From 4a0bc0fe1d6424da9bf979cfc322067a62f41cc9 Mon Sep 17 00:00:00 2001 From: Vincent Rouvreau Date: Fri, 22 Oct 2021 12:44:07 +0200 Subject: Fix exception management when sklearn version < 1.0 --- src/python/gudhi/representations/vector_methods.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 47c5224c..b83c2a87 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -500,7 +500,11 @@ class TopologicalVector(BaseEstimator, TransformerMixin): diagram, num_pts_in_diag = X[i], X[i].shape[0] pers = 0.5 * (diagram[:,1]-diagram[:,0]) min_pers = np.minimum(pers,np.transpose(pers)) - distances = DistanceMetric.get_metric("chebyshev").pairwise(diagram) + # Works fine with sklearn 1.0, but an ValueError exception is thrown on past versions + try: + distances = DistanceMetric.get_metric("chebyshev").pairwise(diagram) + except ValueError: + distances = np.empty(shape = [0, 0]) vect = np.flip(np.sort(np.triu(np.minimum(distances, min_pers)), axis=None), 0) dim = min(len(vect), thresh) Xfit[i, :dim] = vect[:dim] -- cgit v1.2.3 From a2761c01ceb26a057b94be1d45433335704c1581 Mon Sep 17 00:00:00 2001 From: Vincent Rouvreau Date: Thu, 4 Nov 2021 17:24:15 +0100 Subject: code review: try-except inside the if --- src/python/gudhi/representations/vector_methods.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index b83c2a87..e7ee57a4 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -44,15 +44,15 @@ class PersistenceImage(BaseEstimator, TransformerMixin): X (list of n x 2 numpy arrays): input persistence diagrams. y (n x 1 array): persistence diagram labels (unused). """ - try: - if np.isnan(np.array(self.im_range)).any(): + if np.isnan(np.array(self.im_range)).any(): + try: new_X = BirthPersistenceTransform().fit_transform(X) pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(new_X,y) [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] self.im_range = np.where(np.isnan(np.array(self.im_range)), np.array([mx, Mx, my, My]), np.array(self.im_range)) - except ValueError: - # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 - pass + except ValueError: + # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 + pass return self def transform(self, X): -- cgit v1.2.3 From 3094e1fe51acc49e4ea7e4f38648bb25d96784a4 Mon Sep 17 00:00:00 2001 From: Vincent Rouvreau Date: Fri, 5 Nov 2021 10:27:46 +0100 Subject: code review: factorize sample range computation --- src/python/gudhi/representations/vector_methods.py | 46 ++++++++++++---------- 1 file changed, 26 insertions(+), 20 deletions(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index e7ee57a4..140162af 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -6,6 +6,7 @@ # # Modification(s): # - 2020/06 Martin: ATOL integration +# - 2021/11 Vincent Rouvreau: factorize _automatic_sample_range import numpy as np from sklearn.base import BaseEstimator, TransformerMixin @@ -98,6 +99,23 @@ class PersistenceImage(BaseEstimator, TransformerMixin): """ return self.fit_transform([diag])[0,:] +def _automatic_sample_range(sample_range, X, y): + """ + Compute and returns sample range from the persistence diagrams if one of the sample_range values is numpy.nan. + + Parameters: + sample_range (a numpy array of 2 float): minimum and maximum of all piecewise-linear function domains, of + the form [x_min, x_max]. + X (list of n x 2 numpy arrays): input persistence diagrams. + y (n x 1 array): persistence diagram labels (unused). + """ + nan_in_range = np.isnan(sample_range) + if nan_in_range.any(): + pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) + [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] + return np.where(nan_in_range, np.array([mx, My]), sample_range) + return sample_range + class Landscape(BaseEstimator, TransformerMixin): """ This is a class for computing persistence landscapes from a list of persistence diagrams. A persistence landscape is a collection of 1D piecewise-linear functions computed from the rank function associated to the persistence diagram. These piecewise-linear functions are then sampled evenly on a given range and the corresponding vectors of samples are concatenated and returned. See http://jmlr.org/papers/v16/bubenik15a.html for more details. @@ -123,14 +141,11 @@ class Landscape(BaseEstimator, TransformerMixin): X (list of n x 2 numpy arrays): input persistence diagrams. y (n x 1 array): persistence diagram labels (unused). """ - if self.nan_in_range.any(): - try: - pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) - [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] - self.sample_range = np.where(self.nan_in_range, np.array([mx, My]), np.array(self.sample_range)) - except ValueError: - # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 - pass + try: + self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) + except ValueError: + # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 + pass return self def transform(self, X): @@ -227,10 +242,7 @@ class Silhouette(BaseEstimator, TransformerMixin): y (n x 1 array): persistence diagram labels (unused). """ try: - if np.isnan(np.array(self.sample_range)).any(): - pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) - [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] - self.sample_range = np.where(np.isnan(np.array(self.sample_range)), np.array([mx, My]), np.array(self.sample_range)) + self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) except ValueError: # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 pass @@ -320,10 +332,7 @@ class BettiCurve(BaseEstimator, TransformerMixin): y (n x 1 array): persistence diagram labels (unused). """ try: - if np.isnan(np.array(self.sample_range)).any(): - pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) - [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] - self.sample_range = np.where(np.isnan(np.array(self.sample_range)), np.array([mx, My]), np.array(self.sample_range)) + self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) except ValueError: # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 pass @@ -391,10 +400,7 @@ class Entropy(BaseEstimator, TransformerMixin): y (n x 1 array): persistence diagram labels (unused). """ try: - if np.isnan(np.array(self.sample_range)).any(): - pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) - [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] - self.sample_range = np.where(np.isnan(np.array(self.sample_range)), np.array([mx, My]), np.array(self.sample_range)) + self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) except ValueError: # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 pass -- cgit v1.2.3 From 37d7743a91f7fb970425a06798ac6cb61b0be109 Mon Sep 17 00:00:00 2001 From: Vincent Rouvreau Date: Fri, 5 Nov 2021 12:05:45 +0100 Subject: code review: try/except in function and assert on length of diagrams for error menagement --- src/python/gudhi/representations/vector_methods.py | 38 +++++++++------------- 1 file changed, 15 insertions(+), 23 deletions(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 140162af..e883b5dd 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -111,9 +111,14 @@ def _automatic_sample_range(sample_range, X, y): """ nan_in_range = np.isnan(sample_range) if nan_in_range.any(): - pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) - [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] - return np.where(nan_in_range, np.array([mx, My]), sample_range) + try: + pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) + [mx,my] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]] + [Mx,My] = [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] + return np.where(nan_in_range, np.array([mx, My]), sample_range) + except ValueError: + # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 + pass return sample_range class Landscape(BaseEstimator, TransformerMixin): @@ -141,11 +146,7 @@ class Landscape(BaseEstimator, TransformerMixin): X (list of n x 2 numpy arrays): input persistence diagrams. y (n x 1 array): persistence diagram labels (unused). """ - try: - self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) - except ValueError: - # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 - pass + self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) return self def transform(self, X): @@ -241,11 +242,7 @@ class Silhouette(BaseEstimator, TransformerMixin): X (list of n x 2 numpy arrays): input persistence diagrams. y (n x 1 array): persistence diagram labels (unused). """ - try: - self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) - except ValueError: - # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 - pass + self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) return self def transform(self, X): @@ -331,11 +328,7 @@ class BettiCurve(BaseEstimator, TransformerMixin): X (list of n x 2 numpy arrays): input persistence diagrams. y (n x 1 array): persistence diagram labels (unused). """ - try: - self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) - except ValueError: - # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 - pass + self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) return self def transform(self, X): @@ -399,11 +392,7 @@ class Entropy(BaseEstimator, TransformerMixin): X (list of n x 2 numpy arrays): input persistence diagrams. y (n x 1 array): persistence diagram labels (unused). """ - try: - self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) - except ValueError: - # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 - pass + self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) return self def transform(self, X): @@ -427,6 +416,7 @@ class Entropy(BaseEstimator, TransformerMixin): new_diagram = DiagramScaler(use=True, scalers=[([1], MaxAbsScaler())]).fit_transform([diagram])[0] except ValueError: # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 + assert len(diagram) == 0 new_diagram = np.empty(shape = [0, 2]) if self.mode == "scalar": @@ -510,6 +500,8 @@ class TopologicalVector(BaseEstimator, TransformerMixin): try: distances = DistanceMetric.get_metric("chebyshev").pairwise(diagram) except ValueError: + # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 + assert len(diagram) == 0 distances = np.empty(shape = [0, 0]) vect = np.flip(np.sort(np.triu(np.minimum(distances, min_pers)), axis=None), 0) dim = min(len(vect), thresh) -- cgit v1.2.3 From 27d66e5a8a101d80a7dd8b1f21e1cdfb7dedd98e Mon Sep 17 00:00:00 2001 From: Hind-M Date: Wed, 24 Nov 2021 11:03:18 +0100 Subject: Make the new BettiCurve class compatible with the old interface --- src/python/CMakeLists.txt | 4 +- src/python/gudhi/representations/vector_methods.py | 128 ++++++++++----------- .../test/test_betti_curve_representations.py | 15 ++- 3 files changed, 74 insertions(+), 73 deletions(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index 26b8b7d6..2a5b961b 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -535,8 +535,8 @@ if(PYTHONINTERP_FOUND) add_gudhi_py_test(test_representations) endif() - # Betti curves. - if(SCIPY_FOUND) + # Betti curves + if(SKLEARN_FOUND AND SCIPY_FOUND) add_gudhi_py_test(test_betti_curve_representations) endif() diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 018e9b21..f1232040 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -311,12 +311,14 @@ class Silhouette(BaseEstimator, TransformerMixin): class BettiCurve(BaseEstimator, TransformerMixin): """ - Compute Betti curves from persistence diagrams. There are two modes of operation: with a predefined grid, and without. With a predefined grid, the class computes the Betti numbers at those grid points. Without a predefined grid, it can be fit to a list of persistence diagrams and produce a grid that consists of (at least) the filtration values at which at least one of those persistence diagrams changes Betti numbers, and then compute the Betti numbers at those grid points. In the latter mode, the exact Betti curve is computed for the entire real line. + Compute Betti curves from persistence diagrams. There are several modes of operation: with a given resolution (with or without a sample_range), with a predefined grid, and with none of the previous. With a predefined grid, the class computes the Betti numbers at those grid points. Without a predefined grid, if the resolution is set to None, it can be fit to a list of persistence diagrams and produce a grid that consists of (at least) the filtration values at which at least one of those persistence diagrams changes Betti numbers, and then compute the Betti numbers at those grid points. In the latter mode, the exact Betti curve is computed for the entire real line. Otherwise, if the resolution is given, the Betti curve is obtained by sampling evenly using either the given sample_range or based on the persistence diagrams. Parameters ---------- - predefined_grid: 1d array, triple or None, default=None - Predefined filtration grid points at which to compute the Betti curves. Must be strictly ordered. Infinities are OK. If a triple of the form (l, u, n), the grid will be uniform from l to u in n steps. If None (default), a grid will be computed that captures all changes in Betti numbers in the provided data. + resolution (int): number of sample for the piecewise-constant function (default 100). + sample_range ([double, double]): minimum and maximum of the piecewise-constant function domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method. + predefined_grid: 1d array or None, default=None + Predefined filtration grid points at which to compute the Betti curves. Must be strictly ordered. Infinities are OK. If None (default), and resolution is given, the grid will be uniform from x_min to x_max in 'resolution' steps, otherwise a grid will be computed that captures all changes in Betti numbers in the provided data. Attributes ---------- @@ -326,34 +328,31 @@ class BettiCurve(BaseEstimator, TransformerMixin): Examples -------- If pd is a persistence diagram and xs is a nonempty grid of finite values such that xs[0] >= pd.min(), then the result of - >>> bc = BettiCurve(xs) + >>> bc = BettiCurve(predefined_grid=xs) >>> result = bc(pd) and >>> from scipy.interpolate import interp1d - >>> bc = BettiCurve(None) + >>> bc = BettiCurve(resolution=None, predefined_grid=None) >>> bettis = bc.fit_transform([pd]) >>> interp = interp1d(bc.grid_, bettis[0, :], kind="previous", fill_value="extrapolate") >>> result = np.array(interp(xs), dtype=int) are the same. """ - def __init__(self, predefined_grid = None): - if isinstance(predefined_grid, tuple): - if len(predefined_grid) != 3: - raise ValueError("Expected array, None or triple.") + def __init__(self, resolution=100, sample_range=[np.nan, np.nan], predefined_grid=None): + if (predefined_grid is not None) and (not isinstance(predefined_grid, np.ndarray)): + raise ValueError("Expected array or None.") - self.predefined_grid = np.linspace(predefined_grid[0], predefined_grid[1], predefined_grid[2]) - else: - self.predefined_grid = predefined_grid + self.predefined_grid = predefined_grid + self.resolution = resolution + self.sample_range = sample_range - def is_fitted(self): return hasattr(self, "grid_") - def fit(self, X, y = None): """ - Compute a filtration grid that captures all changes in Betti numbers for all the given persistence diagrams, unless a predefined grid was provided. + Fit the BettiCurve class on a list of persistence diagrams: if any of the values in **sample_range** is numpy.nan, replace it with the corresponding value computed on the given list of persistence diagrams. When no predefined grid is provided and resolution set to None, compute a filtration grid that captures all changes in Betti numbers for all the given persistence diagrams. Parameters ---------- @@ -365,60 +364,17 @@ class BettiCurve(BaseEstimator, TransformerMixin): """ if self.predefined_grid is None: - events = np.unique(np.concatenate([pd.flatten() for pd in X] + [[-np.inf]], axis=0)) - self.grid_ = np.array(events) + if self.resolution is None: # Flexible/exact version + events = np.unique(np.concatenate([pd.flatten() for pd in X] + [[-np.inf]], axis=0)) + self.grid_ = np.array(events) + else: + self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) + self.grid_ = np.linspace(self.sample_range[0], self.sample_range[1], self.resolution) else: - self.grid_ = np.array(self.predefined_grid) - - - #self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) + self.grid_ = self.predefined_grid # Get the predefined grid from user return self - - def fit_transform(self, X): - """ - Find a sampling grid that captures all changes in Betti numbers, and compute those Betti numbers. The result is the same as fit(X) followed by transform(X), but potentially faster. - """ - - if self.predefined_grid is None: - if not X: - X = [np.zeros((0, 2))] - - N = len(X) - - events = np.concatenate([pd.flatten(order="F") for pd in X], axis=0) - sorting = np.argsort(events) - offsets = np.zeros(1 + N, dtype=int) - for i in range(0, N): - offsets[i+1] = offsets[i] + 2*X[i].shape[0] - starts = offsets[0:N] - ends = offsets[1:N + 1] - 1 - - xs = [-np.inf] - bettis = [[0] for i in range(0, N)] - - for i in sorting: - j = np.searchsorted(ends, i) - delta = 1 if i - starts[j] < len(X[j]) else -1 - if events[i] == xs[-1]: - bettis[j][-1] += delta - else: - xs.append(events[i]) - for k in range(0, j): - bettis[k].append(bettis[k][-1]) - bettis[j].append(bettis[j][-1] + delta) - for k in range(j+1, N): - bettis[k].append(bettis[k][-1]) - - self.grid_ = np.array(xs) - return np.array(bettis, dtype=int) - - else: - self.grid_ = self.predefined_grid - return self.transform(X) - - def transform(self, X): """ Compute Betti curves. @@ -464,12 +420,52 @@ class BettiCurve(BaseEstimator, TransformerMixin): return np.array(bettis, dtype=int)[:, 0:-1] + def fit_transform(self, X): + """ + Find a sampling grid that captures all changes in Betti numbers, and compute those Betti numbers. The result is the same as fit(X) followed by transform(X), but potentially faster. + """ + + if self.predefined_grid is None and self.resolution is None: + if not X: + X = [np.zeros((0, 2))] + + N = len(X) + + events = np.concatenate([pd.flatten(order="F") for pd in X], axis=0) + sorting = np.argsort(events) + offsets = np.zeros(1 + N, dtype=int) + for i in range(0, N): + offsets[i+1] = offsets[i] + 2*X[i].shape[0] + starts = offsets[0:N] + ends = offsets[1:N + 1] - 1 + + xs = [-np.inf] + bettis = [[0] for i in range(0, N)] + + for i in sorting: + j = np.searchsorted(ends, i) + delta = 1 if i - starts[j] < len(X[j]) else -1 + if events[i] == xs[-1]: + bettis[j][-1] += delta + else: + xs.append(events[i]) + for k in range(0, j): + bettis[k].append(bettis[k][-1]) + bettis[j].append(bettis[j][-1] + delta) + for k in range(j+1, N): + bettis[k].append(bettis[k][-1]) + + self.grid_ = np.array(xs) + return np.array(bettis, dtype=int) + + else: + return self.fit(X).transform(X) def __call__(self, diag): """ Shorthand for transform on a single persistence diagram. """ - return self.transform([diag])[0, :] + return self.fit_transform([diag])[0, :] diff --git a/src/python/test/test_betti_curve_representations.py b/src/python/test/test_betti_curve_representations.py index 3e77d760..6a45da4d 100755 --- a/src/python/test/test_betti_curve_representations.py +++ b/src/python/test/test_betti_curve_representations.py @@ -1,5 +1,6 @@ import numpy as np import scipy.interpolate +import pytest from gudhi.representations.vector_methods import BettiCurve @@ -19,18 +20,18 @@ def test_betti_curve_is_irregular_betti_curve_followed_by_interpolation(): pd[np.random.uniform(0, 1, n) < pinf, 1] = np.inf pds.append(pd) - bc = BettiCurve(None) + bc = BettiCurve(resolution=None, predefined_grid=None) bc.fit(pds) bettis = bc.transform(pds) - bc2 = BettiCurve(None) + bc2 = BettiCurve(resolution=None, predefined_grid=None) bettis2 = bc2.fit_transform(pds) assert((bc2.grid_ == bc.grid_).all()) assert((bettis2 == bettis).all()) for i in range(0, m): grid = np.linspace(pds[i][np.isfinite(pds[i])].min(), pds[i][np.isfinite(pds[i])].max() + 1, res) - bc_gridded = BettiCurve(grid) + bc_gridded = BettiCurve(predefined_grid=grid) bc_gridded.fit([]) bettis_gridded = bc_gridded(pds[i]) @@ -41,14 +42,18 @@ def test_betti_curve_is_irregular_betti_curve_followed_by_interpolation(): def test_empty_with_predefined_grid(): random_grid = np.sort(np.random.uniform(0, 1, 100)) - bc = BettiCurve(random_grid) + bc = BettiCurve(predefined_grid=random_grid) bettis = bc.fit_transform([]) assert((bc.grid_ == random_grid).all()) assert((bettis == 0).all()) def test_empty(): - bc = BettiCurve() + bc = BettiCurve(resolution=None, predefined_grid=None) bettis = bc.fit_transform([]) assert(bc.grid_ == [-np.inf]) assert((bettis == 0).all()) + +def test_wrong_value_of_predefined_grid(): + with pytest.raises(ValueError): + BettiCurve(predefined_grid=[1, 2, 3]) -- cgit v1.2.3 From d4303ede6ee862141e7fc89811d0d69b0b90a107 Mon Sep 17 00:00:00 2001 From: Hind-M Date: Tue, 18 Jan 2022 10:57:56 +0100 Subject: Fix BettiCurve doc in source code --- src/python/gudhi/representations/vector_methods.py | 78 ++++++++++------------ 1 file changed, 37 insertions(+), 41 deletions(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index f1232040..f8078d03 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -312,36 +312,40 @@ class Silhouette(BaseEstimator, TransformerMixin): class BettiCurve(BaseEstimator, TransformerMixin): """ Compute Betti curves from persistence diagrams. There are several modes of operation: with a given resolution (with or without a sample_range), with a predefined grid, and with none of the previous. With a predefined grid, the class computes the Betti numbers at those grid points. Without a predefined grid, if the resolution is set to None, it can be fit to a list of persistence diagrams and produce a grid that consists of (at least) the filtration values at which at least one of those persistence diagrams changes Betti numbers, and then compute the Betti numbers at those grid points. In the latter mode, the exact Betti curve is computed for the entire real line. Otherwise, if the resolution is given, the Betti curve is obtained by sampling evenly using either the given sample_range or based on the persistence diagrams. + """ - Parameters - ---------- - resolution (int): number of sample for the piecewise-constant function (default 100). - sample_range ([double, double]): minimum and maximum of the piecewise-constant function domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method. - predefined_grid: 1d array or None, default=None - Predefined filtration grid points at which to compute the Betti curves. Must be strictly ordered. Infinities are OK. If None (default), and resolution is given, the grid will be uniform from x_min to x_max in 'resolution' steps, otherwise a grid will be computed that captures all changes in Betti numbers in the provided data. + def __init__(self, resolution=100, sample_range=[np.nan, np.nan], predefined_grid=None): + """ + Constructor for the BettiCurve class. - Attributes - ---------- - grid_: 1d array - The grid on which the Betti numbers are computed. If predefined_grid was specified, grid_ will always be that grid, independently of data. If not, the grid is fitted to capture all filtration values at which the Betti numbers change. + Parameters: + resolution (int): number of sample for the piecewise-constant function (default 100). + sample_range ([double, double]): minimum and maximum of the piecewise-constant function domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method. + predefined_grid (1d array or None, default=None): Predefined filtration grid points at which to compute the Betti curves. Must be strictly ordered. Infinities are ok. If None (default), and resolution is given, the grid will be uniform from x_min to x_max in 'resolution' steps, otherwise a grid will be computed that captures all changes in Betti numbers in the provided data. - Examples - -------- - If pd is a persistence diagram and xs is a nonempty grid of finite values such that xs[0] >= pd.min(), then the result of - >>> bc = BettiCurve(predefined_grid=xs) - >>> result = bc(pd) - and - >>> from scipy.interpolate import interp1d - >>> bc = BettiCurve(resolution=None, predefined_grid=None) - >>> bettis = bc.fit_transform([pd]) - >>> interp = interp1d(bc.grid_, bettis[0, :], kind="previous", fill_value="extrapolate") - >>> result = np.array(interp(xs), dtype=int) - are the same. - """ + Attributes: + grid_ (1d array): The grid on which the Betti numbers are computed. If predefined_grid was specified, `grid_` will always be that grid, independently of data. If not, the grid is fitted to capture all filtration values at which the Betti numbers change. + + Examples + -------- + If pd is a persistence diagram and xs is a nonempty grid of finite values such that xs[0] >= pd.min(), then the results of: + + >>> bc = BettiCurve(predefined_grid=xs) # doctest: +SKIP + >>> result = bc(pd) # doctest: +SKIP + + and + + >>> from scipy.interpolate import interp1d # doctest: +SKIP + >>> bc = BettiCurve(resolution=None, predefined_grid=None) # doctest: +SKIP + >>> bettis = bc.fit_transform([pd]) # doctest: +SKIP + >>> interp = interp1d(bc.grid_, bettis[0, :], kind="previous", fill_value="extrapolate") # doctest: +SKIP + >>> result = np.array(interp(xs), dtype=int) # doctest: +SKIP + + are the same. + """ - def __init__(self, resolution=100, sample_range=[np.nan, np.nan], predefined_grid=None): if (predefined_grid is not None) and (not isinstance(predefined_grid, np.ndarray)): - raise ValueError("Expected array or None.") + raise ValueError("Expected predefined_grid as array or None.") self.predefined_grid = predefined_grid self.resolution = resolution @@ -354,13 +358,9 @@ class BettiCurve(BaseEstimator, TransformerMixin): """ Fit the BettiCurve class on a list of persistence diagrams: if any of the values in **sample_range** is numpy.nan, replace it with the corresponding value computed on the given list of persistence diagrams. When no predefined grid is provided and resolution set to None, compute a filtration grid that captures all changes in Betti numbers for all the given persistence diagrams. - Parameters - ---------- - X: list of 2d arrays - Persistence diagrams. - - y: None. - Ignored. + Parameters: + X (list of 2d arrays): Persistence diagrams. + y (None): Ignored. """ if self.predefined_grid is None: @@ -379,15 +379,11 @@ class BettiCurve(BaseEstimator, TransformerMixin): """ Compute Betti curves. - Parameters - ---------- - X: list of 2d arrays - Persistence diagrams. + Parameters: + X (list of 2d arrays): Persistence diagrams. - Returns - ------- - (len(X))x(len(self.grid_)) array of ints - Betti numbers of the given persistence diagrams at the grid points given in self.grid_. + Returns: + `len(X).len(self.grid_)` array of ints: Betti numbers of the given persistence diagrams at the grid points given in `self.grid_` """ if not self.is_fitted(): @@ -422,7 +418,7 @@ class BettiCurve(BaseEstimator, TransformerMixin): def fit_transform(self, X): """ - Find a sampling grid that captures all changes in Betti numbers, and compute those Betti numbers. The result is the same as fit(X) followed by transform(X), but potentially faster. + The result is the same as fit(X) followed by transform(X), but potentially faster. """ if self.predefined_grid is None and self.resolution is None: -- cgit v1.2.3