Add support for irregular (non-evenly, completely sampled) Betti curves.

There's a conceptual difference in the meaning of the constructor's argument when compared to the existing BettiCurve class. In IrregularBettiCurve, only intervals born in [x_min, x_max) contribute to the Betti curve. The behavior of BettiCurve is of less use when not sampling evenly. TODO: We might want to move this "cropping" of the persistence diagram to a more general class that transforms PDs, instead of arbitrarily having it as part of the IrregularBettiCurve class.
author: Gard Spreemann <gspr@nonempty.org> 2020-11-15 17:34:50 +0100
committer: Gard Spreemann <gspr@nonempty.org> 2020-11-15 18:22:16 +0100
commit: 26c62d8a7b9567ef354fcbe6360856fe1f133641 (patch)
tree: ddc9e84cff77335fa3b43f9bbd0291af8719e874
parent: ad5d38986542715e0a0518537afaadcda71d9c49 (diff)
1 files changed, 88 insertions, 1 deletions
diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index cdcb1fde..8c51cdd8 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -1,11 +1,12 @@
 # This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
 # See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
-# Author(s):       Mathieu Carrière, Martin Royer
+# Author(s):       Mathieu Carrière, Martin Royer, Gard Spreemann
 #
 # Copyright (C) 2018-2020 Inria
 #
 # Modification(s):
 #   - 2020/06 Martin: ATOL integration
+#   - 2020/11 Gard: Irregular (non-evenly sampled) Betti curves.
 
 import numpy as np
 from sklearn.base          import BaseEstimator, TransformerMixin
@@ -350,6 +351,92 @@ class BettiCurve(BaseEstimator, TransformerMixin):
         """
         return self.fit_transform([diag])[0,:]
 
+
+class IrregularBettiCurve(BaseEstimator, TransformerMixin):
+    """
+    This is a class for computing Betti curves from a list of persistence diagrams. Unlike the BettiCurve class, the curves are not sampled at fixed, regular points. Each curve has all its data intact.
+    """
+
+    def __init__(self, filtration_range = [np.nan, np.nan]):
+        """
+        Constructor for the IrregularBettiCurve class.
+
+        Parameters:
+            filtration_range ([float, float]): minimum and maximum filtration range to consider, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). The behavior is not the same as for sample_range in BettiCurve. In particular, intervals born before x_min are not considered. Intervals born in [x_min, x_max) contribute to the Betti curves as if they were infinite.
+        """
+        self.filtration_range = np.array(filtration_range)
+
+    def fit(self, X, y = None):
+        """
+        See the corresponding function in BettiCurve.
+        """
+        if np.isnan(self.filtration_range).any():
+            pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y)
+            [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]]
+            self.filtration_range = np.where(np.isnan(self.filtration_range), np.array([mx, My]), np.array(self.filtration_range))
+
+        return self
+
+    def transform(self, X):
+        """
+        Compute the Betti curve for each persistence diagram.
+
+        Parameters:
+            X (list of n x 2 NumPy arrays): input persistence diagrams.
+        
+        Returns:
+            Pair (xs, bettis), where xs is a length-k NumPy array with all filtration values for which any of the Betti curves change value, and bettis is an (number of diagrams) x k NumPy integer array with the values of the k Betti curves at the corresponding filtration values. The value bettis[i, k] is the value of the Betti curve of the k'th diagram on the interval [xs[i], xs[i+1]).
+        """
+        assert(not np.isnan(self.filtration_range).any())
+
+        N = len(X)
+        cropped_pds = [pd[pd[:, 0] >= self.filtration_range[0]] for pd in X]
+
+        events = np.concatenate([pd.flatten(order="F") for pd in cropped_pds], axis=0)
+        sorting = np.argsort(events)
+        offsets = np.zeros(1 + N, dtype=int)
+        for i in range(0, N):
+            offsets[i+1] = offsets[i] + 2*cropped_pds[i].shape[0]
+        starts = offsets[0:N]
+        ends = offsets[1:N + 1] - 1
+
+        xs = [self.filtration_range[0]]
+        bettis = [[0] for i in range(0, N)]
+
+        for i in sorting:
+            if events[i] >= self.filtration_range[1]:
+                break
+            j = np.searchsorted(ends, i)
+            delta = 1 if i - starts[j] < len(cropped_pds[j]) else -1
+            if events[i] == xs[-1]:
+                bettis[j][-1] += delta
+            else:
+                xs.append(events[i])
+                for k in range(0, j):
+                    bettis[k].append(bettis[k][-1])
+                bettis[j].append(bettis[j][-1] + delta)
+                for k in range(j+1, N):
+                    bettis[k].append(bettis[k][-1])
+
+        xs = np.array(xs)
+        bettis = np.array(bettis, dtype=int)
+
+        return(xs, bettis)
+
+    def __call__(self, diag):
+        """
+        Apply IrregularBettiCurve on a single persistence diagram and outputs the result.
+
+        Parameters:
+            diag (n x 2 numpy array): input persistence diagram.
+
+        Returns:
+            Pair (xs, b) of 1D NumPy arrays, xs containing the filtration values and b containing the Betti curve values. The Betti curve takes the value b[i] on the interval [xs[i], xs[i+1]).
+        """
+        res = self.fit_transform([diag])
+        return (res[0], res[1][0, :])
+
+
 class Entropy(BaseEstimator, TransformerMixin):
     """
     This is a class for computing persistence entropy. Persistence entropy is a statistic for persistence diagrams inspired from Shannon entropy. This statistic can also be used to compute a feature vector, called the entropy summary function. See https://arxiv.org/pdf/1803.08304.pdf for more details. Note that a previous implementation was contributed by Manuel Soriano-Trigueros.
author	Gard Spreemann <gspr@nonempty.org>	2020-11-15 17:34:50 +0100
committer	Gard Spreemann <gspr@nonempty.org>	2020-11-15 18:22:16 +0100
commit	26c62d8a7b9567ef354fcbe6360856fe1f133641 (patch)
tree	ddc9e84cff77335fa3b43f9bbd0291af8719e874
parent	ad5d38986542715e0a0518537afaadcda71d9c49 (diff)