From 26c62d8a7b9567ef354fcbe6360856fe1f133641 Mon Sep 17 00:00:00 2001 From: Gard Spreemann Date: Sun, 15 Nov 2020 17:34:50 +0100 Subject: Add support for irregular (non-evenly, completely sampled) Betti curves. There's a conceptual difference in the meaning of the constructor's argument when compared to the existing BettiCurve class. In IrregularBettiCurve, only intervals born in [x_min, x_max) contribute to the Betti curve. The behavior of BettiCurve is of less use when not sampling evenly. TODO: We might want to move this "cropping" of the persistence diagram to a more general class that transforms PDs, instead of arbitrarily having it as part of the IrregularBettiCurve class. --- src/python/gudhi/representations/vector_methods.py | 89 +++++++++++++++++++++- 1 file changed, 88 insertions(+), 1 deletion(-) (limited to 'src/python') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index cdcb1fde..8c51cdd8 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -1,11 +1,12 @@ # This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. # See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. -# Author(s): Mathieu Carrière, Martin Royer +# Author(s): Mathieu Carrière, Martin Royer, Gard Spreemann # # Copyright (C) 2018-2020 Inria # # Modification(s): # - 2020/06 Martin: ATOL integration +# - 2020/11 Gard: Irregular (non-evenly sampled) Betti curves. import numpy as np from sklearn.base import BaseEstimator, TransformerMixin @@ -350,6 +351,92 @@ class BettiCurve(BaseEstimator, TransformerMixin): """ return self.fit_transform([diag])[0,:] + +class IrregularBettiCurve(BaseEstimator, TransformerMixin): + """ + This is a class for computing Betti curves from a list of persistence diagrams. Unlike the BettiCurve class, the curves are not sampled at fixed, regular points. Each curve has all its data intact. + """ + + def __init__(self, filtration_range = [np.nan, np.nan]): + """ + Constructor for the IrregularBettiCurve class. + + Parameters: + filtration_range ([float, float]): minimum and maximum filtration range to consider, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). The behavior is not the same as for sample_range in BettiCurve. In particular, intervals born before x_min are not considered. Intervals born in [x_min, x_max) contribute to the Betti curves as if they were infinite. + """ + self.filtration_range = np.array(filtration_range) + + def fit(self, X, y = None): + """ + See the corresponding function in BettiCurve. + """ + if np.isnan(self.filtration_range).any(): + pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) + [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] + self.filtration_range = np.where(np.isnan(self.filtration_range), np.array([mx, My]), np.array(self.filtration_range)) + + return self + + def transform(self, X): + """ + Compute the Betti curve for each persistence diagram. + + Parameters: + X (list of n x 2 NumPy arrays): input persistence diagrams. + + Returns: + Pair (xs, bettis), where xs is a length-k NumPy array with all filtration values for which any of the Betti curves change value, and bettis is an (number of diagrams) x k NumPy integer array with the values of the k Betti curves at the corresponding filtration values. The value bettis[i, k] is the value of the Betti curve of the k'th diagram on the interval [xs[i], xs[i+1]). + """ + assert(not np.isnan(self.filtration_range).any()) + + N = len(X) + cropped_pds = [pd[pd[:, 0] >= self.filtration_range[0]] for pd in X] + + events = np.concatenate([pd.flatten(order="F") for pd in cropped_pds], axis=0) + sorting = np.argsort(events) + offsets = np.zeros(1 + N, dtype=int) + for i in range(0, N): + offsets[i+1] = offsets[i] + 2*cropped_pds[i].shape[0] + starts = offsets[0:N] + ends = offsets[1:N + 1] - 1 + + xs = [self.filtration_range[0]] + bettis = [[0] for i in range(0, N)] + + for i in sorting: + if events[i] >= self.filtration_range[1]: + break + j = np.searchsorted(ends, i) + delta = 1 if i - starts[j] < len(cropped_pds[j]) else -1 + if events[i] == xs[-1]: + bettis[j][-1] += delta + else: + xs.append(events[i]) + for k in range(0, j): + bettis[k].append(bettis[k][-1]) + bettis[j].append(bettis[j][-1] + delta) + for k in range(j+1, N): + bettis[k].append(bettis[k][-1]) + + xs = np.array(xs) + bettis = np.array(bettis, dtype=int) + + return(xs, bettis) + + def __call__(self, diag): + """ + Apply IrregularBettiCurve on a single persistence diagram and outputs the result. + + Parameters: + diag (n x 2 numpy array): input persistence diagram. + + Returns: + Pair (xs, b) of 1D NumPy arrays, xs containing the filtration values and b containing the Betti curve values. The Betti curve takes the value b[i] on the interval [xs[i], xs[i+1]). + """ + res = self.fit_transform([diag]) + return (res[0], res[1][0, :]) + + class Entropy(BaseEstimator, TransformerMixin): """ This is a class for computing persistence entropy. Persistence entropy is a statistic for persistence diagrams inspired from Shannon entropy. This statistic can also be used to compute a feature vector, called the entropy summary function. See https://arxiv.org/pdf/1803.08304.pdf for more details. Note that a previous implementation was contributed by Manuel Soriano-Trigueros. -- cgit v1.2.3