From 5dc55d25f71b16bd1a80f4dc9ebdfad1d861ee0d Mon Sep 17 00:00:00 2001 From: Gard Spreemann Date: Sun, 20 Dec 2020 15:22:22 +0100 Subject: Add tests for BettiCurve2. --- src/python/CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/python/CMakeLists.txt') diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index 5c1402a6..e0e88880 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -512,6 +512,11 @@ if(PYTHONINTERP_FOUND) add_gudhi_py_test(test_representations) endif() + # Betti curves. + if(SCIPY_FOUND) + add_gudhi_py_test(test_betti_curve_representations) + endif() + # Time Delay add_gudhi_py_test(test_time_delay) -- cgit v1.2.3 From 27d66e5a8a101d80a7dd8b1f21e1cdfb7dedd98e Mon Sep 17 00:00:00 2001 From: Hind-M Date: Wed, 24 Nov 2021 11:03:18 +0100 Subject: Make the new BettiCurve class compatible with the old interface --- src/python/CMakeLists.txt | 4 +- src/python/gudhi/representations/vector_methods.py | 128 ++++++++++----------- .../test/test_betti_curve_representations.py | 15 ++- 3 files changed, 74 insertions(+), 73 deletions(-) (limited to 'src/python/CMakeLists.txt') diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index 26b8b7d6..2a5b961b 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -535,8 +535,8 @@ if(PYTHONINTERP_FOUND) add_gudhi_py_test(test_representations) endif() - # Betti curves. - if(SCIPY_FOUND) + # Betti curves + if(SKLEARN_FOUND AND SCIPY_FOUND) add_gudhi_py_test(test_betti_curve_representations) endif() diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 018e9b21..f1232040 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -311,12 +311,14 @@ class Silhouette(BaseEstimator, TransformerMixin): class BettiCurve(BaseEstimator, TransformerMixin): """ - Compute Betti curves from persistence diagrams. There are two modes of operation: with a predefined grid, and without. With a predefined grid, the class computes the Betti numbers at those grid points. Without a predefined grid, it can be fit to a list of persistence diagrams and produce a grid that consists of (at least) the filtration values at which at least one of those persistence diagrams changes Betti numbers, and then compute the Betti numbers at those grid points. In the latter mode, the exact Betti curve is computed for the entire real line. + Compute Betti curves from persistence diagrams. There are several modes of operation: with a given resolution (with or without a sample_range), with a predefined grid, and with none of the previous. With a predefined grid, the class computes the Betti numbers at those grid points. Without a predefined grid, if the resolution is set to None, it can be fit to a list of persistence diagrams and produce a grid that consists of (at least) the filtration values at which at least one of those persistence diagrams changes Betti numbers, and then compute the Betti numbers at those grid points. In the latter mode, the exact Betti curve is computed for the entire real line. Otherwise, if the resolution is given, the Betti curve is obtained by sampling evenly using either the given sample_range or based on the persistence diagrams. Parameters ---------- - predefined_grid: 1d array, triple or None, default=None - Predefined filtration grid points at which to compute the Betti curves. Must be strictly ordered. Infinities are OK. If a triple of the form (l, u, n), the grid will be uniform from l to u in n steps. If None (default), a grid will be computed that captures all changes in Betti numbers in the provided data. + resolution (int): number of sample for the piecewise-constant function (default 100). + sample_range ([double, double]): minimum and maximum of the piecewise-constant function domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method. + predefined_grid: 1d array or None, default=None + Predefined filtration grid points at which to compute the Betti curves. Must be strictly ordered. Infinities are OK. If None (default), and resolution is given, the grid will be uniform from x_min to x_max in 'resolution' steps, otherwise a grid will be computed that captures all changes in Betti numbers in the provided data. Attributes ---------- @@ -326,34 +328,31 @@ class BettiCurve(BaseEstimator, TransformerMixin): Examples -------- If pd is a persistence diagram and xs is a nonempty grid of finite values such that xs[0] >= pd.min(), then the result of - >>> bc = BettiCurve(xs) + >>> bc = BettiCurve(predefined_grid=xs) >>> result = bc(pd) and >>> from scipy.interpolate import interp1d - >>> bc = BettiCurve(None) + >>> bc = BettiCurve(resolution=None, predefined_grid=None) >>> bettis = bc.fit_transform([pd]) >>> interp = interp1d(bc.grid_, bettis[0, :], kind="previous", fill_value="extrapolate") >>> result = np.array(interp(xs), dtype=int) are the same. """ - def __init__(self, predefined_grid = None): - if isinstance(predefined_grid, tuple): - if len(predefined_grid) != 3: - raise ValueError("Expected array, None or triple.") + def __init__(self, resolution=100, sample_range=[np.nan, np.nan], predefined_grid=None): + if (predefined_grid is not None) and (not isinstance(predefined_grid, np.ndarray)): + raise ValueError("Expected array or None.") - self.predefined_grid = np.linspace(predefined_grid[0], predefined_grid[1], predefined_grid[2]) - else: - self.predefined_grid = predefined_grid + self.predefined_grid = predefined_grid + self.resolution = resolution + self.sample_range = sample_range - def is_fitted(self): return hasattr(self, "grid_") - def fit(self, X, y = None): """ - Compute a filtration grid that captures all changes in Betti numbers for all the given persistence diagrams, unless a predefined grid was provided. + Fit the BettiCurve class on a list of persistence diagrams: if any of the values in **sample_range** is numpy.nan, replace it with the corresponding value computed on the given list of persistence diagrams. When no predefined grid is provided and resolution set to None, compute a filtration grid that captures all changes in Betti numbers for all the given persistence diagrams. Parameters ---------- @@ -365,60 +364,17 @@ class BettiCurve(BaseEstimator, TransformerMixin): """ if self.predefined_grid is None: - events = np.unique(np.concatenate([pd.flatten() for pd in X] + [[-np.inf]], axis=0)) - self.grid_ = np.array(events) + if self.resolution is None: # Flexible/exact version + events = np.unique(np.concatenate([pd.flatten() for pd in X] + [[-np.inf]], axis=0)) + self.grid_ = np.array(events) + else: + self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) + self.grid_ = np.linspace(self.sample_range[0], self.sample_range[1], self.resolution) else: - self.grid_ = np.array(self.predefined_grid) - - - #self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) + self.grid_ = self.predefined_grid # Get the predefined grid from user return self - - def fit_transform(self, X): - """ - Find a sampling grid that captures all changes in Betti numbers, and compute those Betti numbers. The result is the same as fit(X) followed by transform(X), but potentially faster. - """ - - if self.predefined_grid is None: - if not X: - X = [np.zeros((0, 2))] - - N = len(X) - - events = np.concatenate([pd.flatten(order="F") for pd in X], axis=0) - sorting = np.argsort(events) - offsets = np.zeros(1 + N, dtype=int) - for i in range(0, N): - offsets[i+1] = offsets[i] + 2*X[i].shape[0] - starts = offsets[0:N] - ends = offsets[1:N + 1] - 1 - - xs = [-np.inf] - bettis = [[0] for i in range(0, N)] - - for i in sorting: - j = np.searchsorted(ends, i) - delta = 1 if i - starts[j] < len(X[j]) else -1 - if events[i] == xs[-1]: - bettis[j][-1] += delta - else: - xs.append(events[i]) - for k in range(0, j): - bettis[k].append(bettis[k][-1]) - bettis[j].append(bettis[j][-1] + delta) - for k in range(j+1, N): - bettis[k].append(bettis[k][-1]) - - self.grid_ = np.array(xs) - return np.array(bettis, dtype=int) - - else: - self.grid_ = self.predefined_grid - return self.transform(X) - - def transform(self, X): """ Compute Betti curves. @@ -464,12 +420,52 @@ class BettiCurve(BaseEstimator, TransformerMixin): return np.array(bettis, dtype=int)[:, 0:-1] + def fit_transform(self, X): + """ + Find a sampling grid that captures all changes in Betti numbers, and compute those Betti numbers. The result is the same as fit(X) followed by transform(X), but potentially faster. + """ + + if self.predefined_grid is None and self.resolution is None: + if not X: + X = [np.zeros((0, 2))] + + N = len(X) + + events = np.concatenate([pd.flatten(order="F") for pd in X], axis=0) + sorting = np.argsort(events) + offsets = np.zeros(1 + N, dtype=int) + for i in range(0, N): + offsets[i+1] = offsets[i] + 2*X[i].shape[0] + starts = offsets[0:N] + ends = offsets[1:N + 1] - 1 + + xs = [-np.inf] + bettis = [[0] for i in range(0, N)] + + for i in sorting: + j = np.searchsorted(ends, i) + delta = 1 if i - starts[j] < len(X[j]) else -1 + if events[i] == xs[-1]: + bettis[j][-1] += delta + else: + xs.append(events[i]) + for k in range(0, j): + bettis[k].append(bettis[k][-1]) + bettis[j].append(bettis[j][-1] + delta) + for k in range(j+1, N): + bettis[k].append(bettis[k][-1]) + + self.grid_ = np.array(xs) + return np.array(bettis, dtype=int) + + else: + return self.fit(X).transform(X) def __call__(self, diag): """ Shorthand for transform on a single persistence diagram. """ - return self.transform([diag])[0, :] + return self.fit_transform([diag])[0, :] diff --git a/src/python/test/test_betti_curve_representations.py b/src/python/test/test_betti_curve_representations.py index 3e77d760..6a45da4d 100755 --- a/src/python/test/test_betti_curve_representations.py +++ b/src/python/test/test_betti_curve_representations.py @@ -1,5 +1,6 @@ import numpy as np import scipy.interpolate +import pytest from gudhi.representations.vector_methods import BettiCurve @@ -19,18 +20,18 @@ def test_betti_curve_is_irregular_betti_curve_followed_by_interpolation(): pd[np.random.uniform(0, 1, n) < pinf, 1] = np.inf pds.append(pd) - bc = BettiCurve(None) + bc = BettiCurve(resolution=None, predefined_grid=None) bc.fit(pds) bettis = bc.transform(pds) - bc2 = BettiCurve(None) + bc2 = BettiCurve(resolution=None, predefined_grid=None) bettis2 = bc2.fit_transform(pds) assert((bc2.grid_ == bc.grid_).all()) assert((bettis2 == bettis).all()) for i in range(0, m): grid = np.linspace(pds[i][np.isfinite(pds[i])].min(), pds[i][np.isfinite(pds[i])].max() + 1, res) - bc_gridded = BettiCurve(grid) + bc_gridded = BettiCurve(predefined_grid=grid) bc_gridded.fit([]) bettis_gridded = bc_gridded(pds[i]) @@ -41,14 +42,18 @@ def test_betti_curve_is_irregular_betti_curve_followed_by_interpolation(): def test_empty_with_predefined_grid(): random_grid = np.sort(np.random.uniform(0, 1, 100)) - bc = BettiCurve(random_grid) + bc = BettiCurve(predefined_grid=random_grid) bettis = bc.fit_transform([]) assert((bc.grid_ == random_grid).all()) assert((bettis == 0).all()) def test_empty(): - bc = BettiCurve() + bc = BettiCurve(resolution=None, predefined_grid=None) bettis = bc.fit_transform([]) assert(bc.grid_ == [-np.inf]) assert((bettis == 0).all()) + +def test_wrong_value_of_predefined_grid(): + with pytest.raises(ValueError): + BettiCurve(predefined_grid=[1, 2, 3]) -- cgit v1.2.3