summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/python/CMakeLists.txt5
-rw-r--r--src/python/gudhi/representations/vector_methods.py183
-rwxr-xr-xsrc/python/test/test_betti_curve_representations.py54
-rwxr-xr-xsrc/python/test/test_representations.py9
4 files changed, 125 insertions, 126 deletions
diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt
index 5c1402a6..e0e88880 100644
--- a/src/python/CMakeLists.txt
+++ b/src/python/CMakeLists.txt
@@ -512,6 +512,11 @@ if(PYTHONINTERP_FOUND)
add_gudhi_py_test(test_representations)
endif()
+ # Betti curves.
+ if(SCIPY_FOUND)
+ add_gudhi_py_test(test_betti_curve_representations)
+ endif()
+
# Time Delay
add_gudhi_py_test(test_time_delay)
diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index fda0a22d..814b6081 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -287,111 +287,52 @@ class Silhouette(BaseEstimator, TransformerMixin):
"""
return self.fit_transform([diag])[0,:]
+
class BettiCurve(BaseEstimator, TransformerMixin):
"""
- This is a class for computing Betti curves from a list of persistence diagrams. A Betti curve is a 1D piecewise-constant function obtained from the rank function. It is sampled evenly on a given range and the vector of samples is returned. See https://www.researchgate.net/publication/316604237_Time_Series_Classification_via_Topological_Data_Analysis for more details.
- """
- def __init__(self, resolution=100, sample_range=[np.nan, np.nan]):
- """
- Constructor for the BettiCurve class.
-
- Parameters:
- resolution (int): number of sample for the piecewise-constant function (default 100).
- sample_range ([double, double]): minimum and maximum of the piecewise-constant function domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
- """
- self.resolution, self.sample_range = resolution, sample_range
-
- def fit(self, X, y=None):
- """
- Fit the BettiCurve class on a list of persistence diagrams: if any of the values in **sample_range** is numpy.nan, replace it with the corresponding value computed on the given list of persistence diagrams.
-
- Parameters:
- X (list of n x 2 numpy arrays): input persistence diagrams.
- y (n x 1 array): persistence diagram labels (unused).
- """
- if np.isnan(np.array(self.sample_range)).any():
- pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y)
- [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]]
- self.sample_range = np.where(np.isnan(np.array(self.sample_range)), np.array([mx, My]), np.array(self.sample_range))
- return self
-
- def transform(self, X):
- """
- Compute the Betti curve for each persistence diagram individually and concatenate the results.
-
- Parameters:
- X (list of n x 2 numpy arrays): input persistence diagrams.
-
- Returns:
- numpy array with shape (number of diagrams) x (**resolution**): output Betti curves.
- """
- Xfit = []
- x_values = np.linspace(self.sample_range[0], self.sample_range[1], self.resolution)
- step_x = x_values[1] - x_values[0]
+ Compute Betti curves from persistence diagrams. There are two modes of operation: with a predefined grid, and without. With a predefined grid, the class computes the Betti numbers at those grid points. Without a predefined grid, it can be fit to a list of persistence diagrams and produce a grid that consists of (at least) the filtration values at which at least one of those persistence diagrams changes Betti numbers, and then compute the Betti numbers at those grid points. In the latter mode, the exact Betti curve is computed for the entire real line.
- for diagram in X:
- diagram_int = np.clip(np.ceil((diagram[:,:2] - self.sample_range[0]) / step_x), 0, self.resolution).astype(int)
- bc = np.zeros(self.resolution)
- for interval in diagram_int:
- bc[interval[0]:interval[1]] += 1
- Xfit.append(np.reshape(bc,[1,-1]))
-
- Xfit = np.concatenate(Xfit, 0)
+ Parameters
+ ----------
+ predefined_grid: 1d array, triple or None, default=None
+ Predefined filtration grid points at which to compute the Betti curves. Must be strictly ordered. Infinities are OK. If a triple of the form (l, u, n), the grid will be uniform from l to u in n steps. If None (default), a grid will be computed that captures all changes in Betti numbers in the provided data.
- return Xfit
-
- def __call__(self, diag):
- """
- Apply BettiCurve on a single persistence diagram and outputs the result.
-
- Parameters:
- diag (n x 2 numpy array): input persistence diagram.
-
- Returns:
- numpy array with shape (**resolution**): output Betti curve.
- """
- return self.fit_transform([diag])[0,:]
-
-
-class BettiCurve2(BaseEstimator, TransformerMixin):
- """
- A more flexible replacement for the BettiCurve class.
+ Attributes
+ ----------
+ grid_: 1d array
+ The grid on which the Betti numbers are computed. If predefined_grid was specified, grid_ will always be that grid, independently of data. If not, the grid is fitted to capture all filtration values at which the Betti numbers change.
Examples
--------
- If pd is a persistence diagram and xs is a grid such that xs[0] >= pd.min(), then the result of
- >>> bc = BettiCurve2(xs)
+ If pd is a persistence diagram and xs is a nonempty grid of finite values such that xs[0] >= pd.min(), then the result of
+ >>> bc = BettiCurve(xs)
>>> result = bc(pd)
and
>>> from scipy.interpolate import interp1d
- >>> bc = BettiCurve2(None)
+ >>> bc = BettiCurve(None)
>>> bettis = bc.fit_transform([pd])
>>> interp = interp1d(bc.grid_, bettis[0, :], kind="previous", fill_value="extrapolate")
>>> result = np.array(interp(xs), dtype=int)
are the same.
"""
- def __init__(self, grid = None):
- """
- Constructor for the BettiCurve class.
-
- Parameters
- ----------
- grid: 1d array or None, default=None
- Filtration grid points at which to compute the Betti curves. Must be strictly ordered. Infinites are OK. If None (default), a grid will be computed that captures all the filtration value changes.
+ def __init__(self, predefined_grid = None):
+ if isinstance(predefined_grid, tuple):
+ if len(predefined_grid) != 3:
+ raise ValueError("Expected array, None or triple.")
- Attributes
- ----------
- grid_: 1d array
- Contains the compute grid after fit or fit_transform.
- """
+ self.predefined_grid = np.linspace(predefined_grid[0], predefined_grid[1], predefined_grid[2])
+ else:
+ self.predefined_grid = predefined_grid
- self.grid_ = np.array(grid)
+
+ def is_fitted(self):
+ return hasattr(self, "grid_")
def fit(self, X, y = None):
"""
- Compute a filtration grid that captures all changes in Betti numbers for all the given persistence diagrams.
+ Compute a filtration grid that captures all changes in Betti numbers for all the given persistence diagrams, unless a predefined grid was provided.
Parameters
----------
@@ -402,12 +343,11 @@ class BettiCurve2(BaseEstimator, TransformerMixin):
Ignored.
"""
- events = np.unique(np.concatenate([pd.flatten() for pd in X], axis=0))
-
- if len(events) == 0:
- self.grid_ = np.array([-np.inf])
- else:
+ if self.predefined_grid is None:
+ events = np.unique(np.concatenate([pd.flatten() for pd in X] + [[-np.inf]], axis=0))
self.grid_ = np.array(events)
+ else:
+ self.grid_ = np.array(self.predefined_grid)
return self
@@ -417,37 +357,42 @@ class BettiCurve2(BaseEstimator, TransformerMixin):
Find a sampling grid that captures all changes in Betti numbers, and compute those Betti numbers. The result is the same as fit(X) followed by transform(X), but potentially faster.
"""
- N = len(X)
+ if self.predefined_grid is None:
+ if not X:
+ X = [np.zeros((0, 2))]
+
+ N = len(X)
- events = np.concatenate([pd.flatten(order="F") for pd in X], axis=0)
- sorting = np.argsort(events)
- offsets = np.zeros(1 + N, dtype=int)
- for i in range(0, N):
- offsets[i+1] = offsets[i] + 2*X[i].shape[0]
- starts = offsets[0:N]
- ends = offsets[1:N + 1] - 1
+ events = np.concatenate([pd.flatten(order="F") for pd in X], axis=0)
+ sorting = np.argsort(events)
+ offsets = np.zeros(1 + N, dtype=int)
+ for i in range(0, N):
+ offsets[i+1] = offsets[i] + 2*X[i].shape[0]
+ starts = offsets[0:N]
+ ends = offsets[1:N + 1] - 1
- bettis = [[0] for i in range(0, N)]
- if len(sorting) == 0:
xs = [-np.inf]
- else:
- xs = [events[sorting[0]]]
-
- for i in sorting:
- j = np.searchsorted(ends, i)
- delta = 1 if i - starts[j] < len(X[j]) else -1
- if events[i] == xs[-1]:
- bettis[j][-1] += delta
- else:
- xs.append(events[i])
- for k in range(0, j):
- bettis[k].append(bettis[k][-1])
- bettis[j].append(bettis[j][-1] + delta)
- for k in range(j+1, N):
- bettis[k].append(bettis[k][-1])
+ bettis = [[0] for i in range(0, N)]
+
+ for i in sorting:
+ j = np.searchsorted(ends, i)
+ delta = 1 if i - starts[j] < len(X[j]) else -1
+ if events[i] == xs[-1]:
+ bettis[j][-1] += delta
+ else:
+ xs.append(events[i])
+ for k in range(0, j):
+ bettis[k].append(bettis[k][-1])
+ bettis[j].append(bettis[j][-1] + delta)
+ for k in range(j+1, N):
+ bettis[k].append(bettis[k][-1])
+
+ self.grid_ = np.array(xs)
+ return np.array(bettis, dtype=int)
- self.grid_ = np.array(xs)
- return np.array(bettis, dtype=int)
+ else:
+ self.grid_ = self.predefined_grid
+ return self.transform(X)
def transform(self, X):
@@ -465,9 +410,12 @@ class BettiCurve2(BaseEstimator, TransformerMixin):
Betti numbers of the given persistence diagrams at the grid points given in self.grid_.
"""
- if self.grid_ is None:
- raise NotFittedError("Not fitted. You need to call fit or construct with a chosen sampling grid.")
+ if not self.is_fitted():
+ raise NotFittedError("Not fitted.")
+ if not X:
+ X = [np.zeros((0, 2))]
+
N = len(X)
events = np.concatenate([pd.flatten(order="F") for pd in X], axis=0)
@@ -500,6 +448,7 @@ class BettiCurve2(BaseEstimator, TransformerMixin):
return self.transform([diag])[0, :]
+
class Entropy(BaseEstimator, TransformerMixin):
"""
This is a class for computing persistence entropy. Persistence entropy is a statistic for persistence diagrams inspired from Shannon entropy. This statistic can also be used to compute a feature vector, called the entropy summary function. See https://arxiv.org/pdf/1803.08304.pdf for more details. Note that a previous implementation was contributed by Manuel Soriano-Trigueros.
diff --git a/src/python/test/test_betti_curve_representations.py b/src/python/test/test_betti_curve_representations.py
new file mode 100755
index 00000000..3e77d760
--- /dev/null
+++ b/src/python/test/test_betti_curve_representations.py
@@ -0,0 +1,54 @@
+import numpy as np
+import scipy.interpolate
+
+from gudhi.representations.vector_methods import BettiCurve
+
+def test_betti_curve_is_irregular_betti_curve_followed_by_interpolation():
+ m = 10
+ n = 1000
+ pinf = 0.05
+ pzero = 0.05
+ res = 100
+
+ pds = []
+ for i in range(0, m):
+ pd = np.zeros((n, 2))
+ pd[:, 0] = np.random.uniform(0, 10, n)
+ pd[:, 1] = np.random.uniform(pd[:, 0], 10, n)
+ pd[np.random.uniform(0, 1, n) < pzero, 0] = 0
+ pd[np.random.uniform(0, 1, n) < pinf, 1] = np.inf
+ pds.append(pd)
+
+ bc = BettiCurve(None)
+ bc.fit(pds)
+ bettis = bc.transform(pds)
+
+ bc2 = BettiCurve(None)
+ bettis2 = bc2.fit_transform(pds)
+ assert((bc2.grid_ == bc.grid_).all())
+ assert((bettis2 == bettis).all())
+
+ for i in range(0, m):
+ grid = np.linspace(pds[i][np.isfinite(pds[i])].min(), pds[i][np.isfinite(pds[i])].max() + 1, res)
+ bc_gridded = BettiCurve(grid)
+ bc_gridded.fit([])
+ bettis_gridded = bc_gridded(pds[i])
+
+ interp = scipy.interpolate.interp1d(bc.grid_, bettis[i, :], kind="previous", fill_value="extrapolate")
+ bettis_interp = np.array(interp(grid), dtype=int)
+ assert((bettis_interp == bettis_gridded).all())
+
+
+def test_empty_with_predefined_grid():
+ random_grid = np.sort(np.random.uniform(0, 1, 100))
+ bc = BettiCurve(random_grid)
+ bettis = bc.fit_transform([])
+ assert((bc.grid_ == random_grid).all())
+ assert((bettis == 0).all())
+
+
+def test_empty():
+ bc = BettiCurve()
+ bettis = bc.fit_transform([])
+ assert(bc.grid_ == [-np.inf])
+ assert((bettis == 0).all())
diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py
index 43c914f3..86439655 100755
--- a/src/python/test/test_representations.py
+++ b/src/python/test/test_representations.py
@@ -63,12 +63,3 @@ def test_dummy_atol():
atol_vectoriser.transform(X=[a, b, c])
-from gudhi.representations.vector_methods import BettiCurve
-
-
-def test_infinity():
- a = np.array([[1.0, 8.0], [2.0, np.inf], [3.0, 4.0]])
- c = BettiCurve(20, [0.0, 10.0])(a)
- assert c[1] == 0
- assert c[7] == 3
- assert c[9] == 2