Merge master and resolve conflicts

author: Vincent Rouvreau <vincent.rouvreau@inria.fr> 2022-02-01 16:21:10 +0100
committer: Vincent Rouvreau <vincent.rouvreau@inria.fr> 2022-02-01 16:21:10 +0100
commit: 15163959fd57ea5318e19a5613cc69bc3f0f6b9e (patch)
tree: 183314a8b538bf6a39b81b20825e9416d66ae43e /src/python/gudhi
parent: 002487d3cd747d6ff979f33474d8bb0a7e61f44d (diff)
parent: 7f1b8eb706c72921141b53e607d6e2aa28e2bf19 (diff)
7 files changed, 254 insertions, 112 deletions
diff --git a/src/python/gudhi/alpha_complex.pyx b/src/python/gudhi/alpha_complex.pyx
index ea128743..a4888914 100644
--- a/src/python/gudhi/alpha_complex.pyx
+++ b/src/python/gudhi/alpha_complex.pyx
@@ -16,7 +16,7 @@ from libcpp.utility cimport pair
 from libcpp.string cimport string
 from libcpp cimport bool
 from libc.stdint cimport intptr_t
-import os
+import warnings
 
 from gudhi.simplex_tree cimport *
 from gudhi.simplex_tree import SimplexTree
@@ -28,66 +28,72 @@ __license__ = "GPL v3"
 
 cdef extern from "Alpha_complex_interface.h" namespace "Gudhi":
     cdef cppclass Alpha_complex_interface "Gudhi::alpha_complex::Alpha_complex_interface":
-        Alpha_complex_interface(vector[vector[double]] points, bool fast_version, bool exact_version) nogil except +
+        Alpha_complex_interface(vector[vector[double]] points, vector[double] weights, bool fast_version, bool exact_version) nogil except +
         vector[double] get_point(int vertex) nogil except +
         void create_simplex_tree(Simplex_tree_interface_full_featured* simplex_tree, double max_alpha_square, bool default_filtration_value) nogil except +
 
 # AlphaComplex python interface
 cdef class AlphaComplex:
-    """AlphaComplex is a simplicial complex constructed from the finite cells
-    of a Delaunay Triangulation.
+    """AlphaComplex is a simplicial complex constructed from the finite cells of a Delaunay Triangulation.
 
-    The filtration value of each simplex is computed as the square of the
-    circumradius of the simplex if the circumsphere is empty (the simplex is
-    then said to be Gabriel), and as the minimum of the filtration values of
-    the codimension 1 cofaces that make it not Gabriel otherwise.
+    The filtration value of each simplex is computed as the square of the circumradius of the simplex if the
+    circumsphere is empty (the simplex is then said to be Gabriel), and as the minimum of the filtration values of the
+    codimension 1 cofaces that make it not Gabriel otherwise.
 
-    All simplices that have a filtration value strictly greater than a given
-    alpha squared value are not inserted into the complex.
+    All simplices that have a filtration value strictly greater than a given alpha squared value are not inserted into
+    the complex.
 
     .. note::
 
-        When Alpha_complex is constructed with an infinite value of alpha, the
-        complex is a Delaunay complex.
-
+        When Alpha_complex is constructed with an infinite value of alpha, the complex is a Delaunay complex.
     """
 
     cdef Alpha_complex_interface * this_ptr
 
     # Fake constructor that does nothing but documenting the constructor
-    def __init__(self, points=None, off_file='', precision='safe'):
+    def __init__(self, points=[], off_file='', weights=None, precision='safe'):
         """AlphaComplex constructor.
 
         :param points: A list of points in d-Dimension.
-        :type points: list of list of double
-
-        Or
+        :type points: Iterable[Iterable[float]]
 
-        :param off_file: An OFF file style name.
+        :param off_file: **[deprecated]** An `OFF file style <fileformats.html#off-file-format>`_ name.
+            If an `off_file` is given with `points` as arguments, only points from the file are taken into account.
         :type off_file: string
 
+        :param weights: A list of weights. If set, the number of weights must correspond to the number of points.
+        :type weights: Iterable[float]
+
         :param precision: Alpha complex precision can be 'fast', 'safe' or 'exact'. Default is 'safe'.
         :type precision: string
+
+        :raises FileNotFoundError: **[deprecated]** If `off_file` is set but not found.
+        :raises ValueError: In case of inconsistency between the number of points and weights.
         """
 
     # The real cython constructor
-    def __cinit__(self, points = None, off_file = '', precision = 'safe'):
+    def __cinit__(self, points = [], off_file = '', weights=None, precision = 'safe'):
         assert precision in ['fast', 'safe', 'exact'], "Alpha complex precision can only be 'fast', 'safe' or 'exact'"
         cdef bool fast = precision == 'fast'
         cdef bool exact = precision == 'exact'
 
-        cdef vector[vector[double]] pts
         if off_file:
-            if os.path.isfile(off_file):
-                points = read_points_from_off_file(off_file = off_file)
-            else:
-                print("file " + off_file + " not found.")
-        if points is None:
-            # Empty Alpha construction
-            points=[]
+            warnings.warn("off_file is a deprecated parameter, please consider using gudhi.read_points_from_off_file",
+                          DeprecationWarning)
+            points = read_points_from_off_file(off_file = off_file)
+
+        # weights are set but is inconsistent with the number of points
+        if weights != None and len(weights) != len(points):
+            raise ValueError("Inconsistency between the number of points and weights")
+
+        # need to copy the points to use them without the gil
+        cdef vector[vector[double]] pts
+        cdef vector[double] wgts
         pts = points
+        if weights != None:
+            wgts = weights
         with nogil:
-            self.this_ptr = new Alpha_complex_interface(pts, fast, exact)
+            self.this_ptr = new Alpha_complex_interface(pts, wgts, fast, exact)
 
     def __dealloc__(self):
         if self.this_ptr != NULL:
diff --git a/src/python/gudhi/cubical_complex.pyx b/src/python/gudhi/cubical_complex.pyx
index 97c69a2d..8e244bb8 100644
--- a/src/python/gudhi/cubical_complex.pyx
+++ b/src/python/gudhi/cubical_complex.pyx
@@ -281,4 +281,8 @@ cdef class CubicalComplex:
             launched first.
         """
         assert self.pcohptr != NULL, "compute_persistence() must be called before persistence_intervals_in_dimension()"
-        return np.array(self.pcohptr.intervals_in_dimension(dimension))
+        piid = np.array(self.pcohptr.intervals_in_dimension(dimension))
+        # Workaround https://github.com/GUDHI/gudhi-devel/issues/507
+        if len(piid) == 0:
+            return np.empty(shape = [0, 2])
+        return piid
diff --git a/src/python/gudhi/datasets/generators/_points.cc b/src/python/gudhi/datasets/generators/_points.cc
index 70ce4925..82fea25b 100644
--- a/src/python/gudhi/datasets/generators/_points.cc
+++ b/src/python/gudhi/datasets/generators/_points.cc
@@ -96,7 +96,6 @@ PYBIND11_MODULE(_points, m) {
           :type radius: float
           :param sample: The sample type. Default and only available value is `"random"`.
           :type sample: string
-          :rtype: numpy array of float
           :returns: the generated points on a sphere.
           )pbdoc");
 
@@ -111,10 +110,12 @@ PYBIND11_MODULE(_points, m) {
           :type dim: integer
           :param sample: The sample type. Available values are: `"random"` and `"grid"`. Default value is `"random"`.
           :type sample: string
-          :rtype: numpy array of float.
-          The shape of returned numpy array is :
-              if sample is 'random' : (n_samples, 2*dim).
-              if sample is 'grid' : (⌊n_samples**(1./dim)⌋**dim, 2*dim), where shape[0] is rounded down to the closest perfect 'dim'th power.
           :returns: the generated points on a torus.
+
+          The shape of returned numpy array is:
+
+          If sample is 'random': (n_samples, 2*dim).
+
+          If sample is 'grid': (⌊n_samples**(1./dim)⌋**dim, 2*dim), where shape[0] is rounded down to the closest perfect 'dim'th power.
           )pbdoc");
 }
diff --git a/src/python/gudhi/datasets/generators/points.py b/src/python/gudhi/datasets/generators/points.py
index cf97777d..9bb2799d 100644
--- a/src/python/gudhi/datasets/generators/points.py
+++ b/src/python/gudhi/datasets/generators/points.py
@@ -19,15 +19,15 @@ def _generate_random_points_on_torus(n_samples, dim):
 
     # Based on angles, construct points of size n_samples*dim on a circle and reshape the result in a n_samples*2*dim array
     array_points = np.column_stack([np.cos(alpha), np.sin(alpha)]).reshape(-1, 2*dim)
-    
+
     return array_points
 
 def _generate_grid_points_on_torus(n_samples, dim):
-    
+
     # Generate points on a dim-torus as a grid
     n_samples_grid = int((n_samples+.5)**(1./dim)) # add .5 to avoid rounding down with numerical approximations
     alpha = np.linspace(0, 2*np.pi, n_samples_grid, endpoint=False)
-    
+
     array_points = np.column_stack([np.cos(alpha), np.sin(alpha)])
     array_points_idx = np.empty([n_samples_grid]*dim + [dim], dtype=int)
     for i, x in enumerate(np.ix_(*([np.arange(n_samples_grid)]*dim))):
@@ -35,16 +35,19 @@ def _generate_grid_points_on_torus(n_samples, dim):
     return array_points[array_points_idx].reshape(-1, 2*dim)
 
 def torus(n_samples, dim, sample='random'):
-    """ 
+    """
     Generate points on a flat dim-torus in R^2dim either randomly or on a grid
-    
+
     :param n_samples: The number of points to be generated.
     :param dim: The dimension of the torus on which points would be generated in R^2*dim.
     :param sample: The sample type of the generated points. Can be 'random' or 'grid'.
     :returns: numpy array containing the generated points on a torus.
-        The shape of returned numpy array is:
-        if sample is 'random' : (n_samples, 2*dim).
-        if sample is 'grid' : (⌊n_samples**(1./dim)⌋**dim, 2*dim), where shape[0] is rounded down to the closest perfect 'dim'th power.
+
+    The shape of returned numpy array is:
+
+    If sample is 'random': (n_samples, 2*dim).
+
+    If sample is 'grid': (⌊n_samples**(1./dim)⌋**dim, 2*dim), where shape[0] is rounded down to the closest perfect 'dim'th power.
     """
     if sample == 'random':
         # Generate points randomly
diff --git a/src/python/gudhi/periodic_cubical_complex.pyx b/src/python/gudhi/periodic_cubical_complex.pyx
index ef1d3080..6c21e902 100644
--- a/src/python/gudhi/periodic_cubical_complex.pyx
+++ b/src/python/gudhi/periodic_cubical_complex.pyx
@@ -280,4 +280,8 @@ cdef class PeriodicCubicalComplex:
             launched first.
         """
         assert self.pcohptr != NULL, "compute_persistence() must be called before persistence_intervals_in_dimension()"
-        return np.array(self.pcohptr.intervals_in_dimension(dimension))
+        piid = np.array(self.pcohptr.intervals_in_dimension(dimension))
+        # Workaround https://github.com/GUDHI/gudhi-devel/issues/507
+        if len(piid) == 0:
+            return np.empty(shape = [0, 2])
+        return piid
diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 84bc99a2..f8078d03 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -1,14 +1,17 @@
 # This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
 # See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
-# Author(s):       Mathieu Carrière, Martin Royer
+# Author(s):       Mathieu Carrière, Martin Royer, Gard Spreemann
 #
 # Copyright (C) 2018-2020 Inria
 #
 # Modification(s):
 #   - 2020/06 Martin: ATOL integration
+#   - 2020/12 Gard: A more flexible Betti curve class capable of computing exact curves.
+#   - 2021/11 Vincent Rouvreau: factorize _automatic_sample_range
 
 import numpy as np
 from sklearn.base          import BaseEstimator, TransformerMixin
+from sklearn.exceptions    import NotFittedError
 from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler
 from sklearn.neighbors     import DistanceMetric
 from sklearn.metrics       import pairwise
@@ -45,10 +48,14 @@ class PersistenceImage(BaseEstimator, TransformerMixin):
             y (n x 1 array): persistence diagram labels (unused).
         """
         if np.isnan(np.array(self.im_range)).any():
-            new_X = BirthPersistenceTransform().fit_transform(X)
-            pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(new_X,y)
-            [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]]
-            self.im_range = np.where(np.isnan(np.array(self.im_range)), np.array([mx, Mx, my, My]), np.array(self.im_range))
+            try:
+                new_X = BirthPersistenceTransform().fit_transform(X)
+                pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(new_X,y)
+                [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]]
+                self.im_range = np.where(np.isnan(np.array(self.im_range)), np.array([mx, Mx, my, My]), np.array(self.im_range))
+            except ValueError:
+                # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507
+                pass
         return self
 
     def transform(self, X):
@@ -94,6 +101,28 @@ class PersistenceImage(BaseEstimator, TransformerMixin):
         """
         return self.fit_transform([diag])[0,:]
 
+def _automatic_sample_range(sample_range, X, y):
+        """
+        Compute and returns sample range from the persistence diagrams if one of the sample_range values is numpy.nan.
+
+        Parameters:
+            sample_range (a numpy array of 2 float): minimum and maximum of all piecewise-linear function domains, of
+                the form [x_min, x_max].
+            X (list of n x 2 numpy arrays): input persistence diagrams.
+            y (n x 1 array): persistence diagram labels (unused).
+        """
+        nan_in_range = np.isnan(sample_range)
+        if nan_in_range.any():
+            try:
+                pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y)
+                [mx,my] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]]
+                [Mx,My] = [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]]
+                return np.where(nan_in_range, np.array([mx, My]), sample_range)
+            except ValueError:
+                # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507
+                pass
+        return sample_range
+
 class Landscape(BaseEstimator, TransformerMixin):
     """
     This is a class for computing persistence landscapes from a list of persistence diagrams. A persistence landscape is a collection of 1D piecewise-linear functions computed from the rank function associated to the persistence diagram. These piecewise-linear functions are then sampled evenly on a given range and the corresponding vectors of samples are concatenated and returned. See http://jmlr.org/papers/v16/bubenik15a.html for more details.
@@ -119,10 +148,7 @@ class Landscape(BaseEstimator, TransformerMixin):
             X (list of n x 2 numpy arrays): input persistence diagrams.
             y (n x 1 array): persistence diagram labels (unused).
         """
-        if self.nan_in_range.any():
-            pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y)
-            [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]]
-            self.sample_range = np.where(self.nan_in_range, np.array([mx, My]), np.array(self.sample_range))
+        self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
         return self
 
     def transform(self, X):
@@ -218,10 +244,7 @@ class Silhouette(BaseEstimator, TransformerMixin):
             X (list of n x 2 numpy arrays): input persistence diagrams.
             y (n x 1 array): persistence diagram labels (unused).
         """
-        if np.isnan(np.array(self.sample_range)).any():
-            pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y)
-            [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]]
-            self.sample_range = np.where(np.isnan(np.array(self.sample_range)), np.array([mx, My]), np.array(self.sample_range))
+        self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
         return self
 
     def transform(self, X):
@@ -285,70 +308,162 @@ class Silhouette(BaseEstimator, TransformerMixin):
         """
         return self.fit_transform([diag])[0,:]
 
+
 class BettiCurve(BaseEstimator, TransformerMixin):
     """
-    This is a class for computing Betti curves from a list of persistence diagrams. A Betti curve is a 1D piecewise-constant function obtained from the rank function. It is sampled evenly on a given range and the vector of samples is returned. See https://www.researchgate.net/publication/316604237_Time_Series_Classification_via_Topological_Data_Analysis for more details.
+    Compute Betti curves from persistence diagrams. There are several modes of operation: with a given resolution (with or without a sample_range), with a predefined grid, and with none of the previous. With a predefined grid, the class computes the Betti numbers at those grid points. Without a predefined grid, if the resolution is set to None, it can be fit to a list of persistence diagrams and produce a grid that consists of (at least) the filtration values at which at least one of those persistence diagrams changes Betti numbers, and then compute the Betti numbers at those grid points. In the latter mode, the exact Betti curve is computed for the entire real line. Otherwise, if the resolution is given, the Betti curve is obtained by sampling evenly using either the given sample_range or based on the persistence diagrams.
     """
-    def __init__(self, resolution=100, sample_range=[np.nan, np.nan]):
+
+    def __init__(self, resolution=100, sample_range=[np.nan, np.nan], predefined_grid=None):
         """
         Constructor for the BettiCurve class.
 
         Parameters:
             resolution (int): number of sample for the piecewise-constant function (default 100).
             sample_range ([double, double]): minimum and maximum of the piecewise-constant function domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
+            predefined_grid (1d array or None, default=None): Predefined filtration grid points at which to compute the Betti curves. Must be strictly ordered. Infinities are ok. If None (default), and resolution is given, the grid will be uniform from x_min to x_max in 'resolution' steps, otherwise a grid will be computed that captures all changes in Betti numbers in the provided data.
+
+        Attributes:
+            grid_ (1d array): The grid on which the Betti numbers are computed. If predefined_grid was specified, `grid_` will always be that grid, independently of data. If not, the grid is fitted to capture all filtration values at which the Betti numbers change.
+
+        Examples
+        --------
+        If pd is a persistence diagram and xs is a nonempty grid of finite values such that xs[0] >= pd.min(), then the results of:
+
+        >>> bc = BettiCurve(predefined_grid=xs) # doctest: +SKIP
+        >>> result = bc(pd) # doctest: +SKIP
+
+        and
+
+        >>> from scipy.interpolate import interp1d # doctest: +SKIP
+        >>> bc = BettiCurve(resolution=None, predefined_grid=None) # doctest: +SKIP
+        >>> bettis = bc.fit_transform([pd]) # doctest: +SKIP
+        >>> interp = interp1d(bc.grid_, bettis[0, :], kind="previous", fill_value="extrapolate") # doctest: +SKIP
+        >>> result = np.array(interp(xs), dtype=int) # doctest: +SKIP
+
+        are the same.
         """
-        self.resolution, self.sample_range = resolution, sample_range
 
-    def fit(self, X, y=None):
+        if (predefined_grid is not None) and (not isinstance(predefined_grid, np.ndarray)):
+            raise ValueError("Expected predefined_grid as array or None.")
+
+        self.predefined_grid = predefined_grid
+        self.resolution = resolution
+        self.sample_range = sample_range
+
+    def is_fitted(self):
+        return hasattr(self, "grid_")
+
+    def fit(self, X, y = None):
         """
-        Fit the BettiCurve class on a list of persistence diagrams: if any of the values in **sample_range** is numpy.nan, replace it with the corresponding value computed on the given list of persistence diagrams.
+        Fit the BettiCurve class on a list of persistence diagrams: if any of the values in **sample_range** is numpy.nan, replace it with the corresponding value computed on the given list of persistence diagrams. When no predefined grid is provided and resolution set to None, compute a filtration grid that captures all changes in Betti numbers for all the given persistence diagrams.
 
         Parameters:
-            X (list of n x 2 numpy arrays): input persistence diagrams.
-            y (n x 1 array): persistence diagram labels (unused).
+            X (list of 2d arrays): Persistence diagrams.
+            y (None): Ignored.
         """
-        if np.isnan(np.array(self.sample_range)).any():
-            pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y)
-            [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]]
-            self.sample_range = np.where(np.isnan(np.array(self.sample_range)), np.array([mx, My]), np.array(self.sample_range))
+
+        if self.predefined_grid is None:
+            if self.resolution is None: # Flexible/exact version
+                events = np.unique(np.concatenate([pd.flatten() for pd in X] + [[-np.inf]], axis=0))
+                self.grid_ = np.array(events)
+            else:
+                self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
+                self.grid_ = np.linspace(self.sample_range[0], self.sample_range[1], self.resolution)
+        else:
+            self.grid_ = self.predefined_grid # Get the predefined grid from user
+
         return self
 
     def transform(self, X):
         """
-        Compute the Betti curve for each persistence diagram individually and concatenate the results.
+        Compute Betti curves.
 
         Parameters:
-            X (list of n x 2 numpy arrays): input persistence diagrams.
-    
+            X (list of 2d arrays): Persistence diagrams.
+
         Returns:
-            numpy array with shape (number of diagrams) x (**resolution**): output Betti curves.
+            `len(X).len(self.grid_)` array of ints: Betti numbers of the given persistence diagrams at the grid points given in `self.grid_`
         """
-        Xfit = []
-        x_values = np.linspace(self.sample_range[0], self.sample_range[1], self.resolution)
-        step_x = x_values[1] - x_values[0]
 
-        for diagram in X:
-            diagram_int = np.clip(np.ceil((diagram[:,:2] - self.sample_range[0]) / step_x), 0, self.resolution).astype(int)
-            bc =  np.zeros(self.resolution)
-            for interval in diagram_int:
-                bc[interval[0]:interval[1]] += 1
-            Xfit.append(np.reshape(bc,[1,-1]))
+        if not self.is_fitted():
+            raise NotFittedError("Not fitted.")
 
-        Xfit = np.concatenate(Xfit, 0)
+        if not X:
+            X = [np.zeros((0, 2))]
+        
+        N = len(X)
 
-        return Xfit
+        events = np.concatenate([pd.flatten(order="F") for pd in X], axis=0)
+        sorting = np.argsort(events)
+        offsets = np.zeros(1 + N, dtype=int)
+        for i in range(0, N):
+            offsets[i+1] = offsets[i] + 2*X[i].shape[0]
+        starts = offsets[0:N]
+        ends = offsets[1:N + 1] - 1
 
-    def __call__(self, diag):
+        bettis = [[0] for i in range(0, N)]
+
+        i = 0
+        for x in self.grid_:
+            while i < len(sorting) and events[sorting[i]] <= x:
+                j = np.searchsorted(ends, sorting[i])
+                delta = 1 if sorting[i] - starts[j] < len(X[j]) else -1
+                bettis[j][-1] += delta
+                i += 1
+            for k in range(0, N):
+                bettis[k].append(bettis[k][-1])
+
+        return np.array(bettis, dtype=int)[:, 0:-1]
+
+    def fit_transform(self, X):
+        """
+        The result is the same as fit(X) followed by transform(X), but potentially faster.
         """
-        Apply BettiCurve on a single persistence diagram and outputs the result.
 
-        Parameters:
-            diag (n x 2 numpy array): input persistence diagram.
+        if self.predefined_grid is None and self.resolution is None:
+            if not X:
+                X = [np.zeros((0, 2))]
 
-        Returns:
-            numpy array with shape (**resolution**): output Betti curve.
+            N = len(X)
+
+            events = np.concatenate([pd.flatten(order="F") for pd in X], axis=0)
+            sorting = np.argsort(events)
+            offsets = np.zeros(1 + N, dtype=int)
+            for i in range(0, N):
+                offsets[i+1] = offsets[i] + 2*X[i].shape[0]
+            starts = offsets[0:N]
+            ends = offsets[1:N + 1] - 1
+
+            xs = [-np.inf]
+            bettis = [[0] for i in range(0, N)]
+
+            for i in sorting:
+                j = np.searchsorted(ends, i)
+                delta = 1 if i - starts[j] < len(X[j]) else -1
+                if events[i] == xs[-1]:
+                    bettis[j][-1] += delta
+                else:
+                    xs.append(events[i])
+                    for k in range(0, j):
+                        bettis[k].append(bettis[k][-1])
+                    bettis[j].append(bettis[j][-1] + delta)
+                    for k in range(j+1, N):
+                        bettis[k].append(bettis[k][-1])
+
+            self.grid_ = np.array(xs)
+            return np.array(bettis, dtype=int)
+
+        else:
+            return self.fit(X).transform(X)
+
+    def __call__(self, diag):
         """
-        return self.fit_transform([diag])[0,:]
+        Shorthand for transform on a single persistence diagram.
+        """
+        return self.fit_transform([diag])[0, :]
+
+
 
 class Entropy(BaseEstimator, TransformerMixin):
     """
@@ -374,10 +489,7 @@ class Entropy(BaseEstimator, TransformerMixin):
             X (list of n x 2 numpy arrays): input persistence diagrams.
             y (n x 1 array): persistence diagram labels (unused).
         """
-        if np.isnan(np.array(self.sample_range)).any():
-            pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y)
-            [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]]
-            self.sample_range = np.where(np.isnan(np.array(self.sample_range)), np.array([mx, My]), np.array(self.sample_range))
+        self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
         return self
 
     def transform(self, X):
@@ -396,9 +508,13 @@ class Entropy(BaseEstimator, TransformerMixin):
         new_X = BirthPersistenceTransform().fit_transform(X)        
 
         for i in range(num_diag):
-
             orig_diagram, diagram, num_pts_in_diag = X[i], new_X[i], X[i].shape[0]
-            new_diagram = DiagramScaler(use=True, scalers=[([1], MaxAbsScaler())]).fit_transform([diagram])[0]
+            try:
+                new_diagram = DiagramScaler(use=True, scalers=[([1], MaxAbsScaler())]).fit_transform([diagram])[0]
+            except ValueError:
+                # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507
+                assert len(diagram) == 0
+                new_diagram = np.empty(shape = [0, 2])
 
             if self.mode == "scalar":
                 ent = - np.sum( np.multiply(new_diagram[:,1], np.log(new_diagram[:,1])) )
@@ -412,12 +528,11 @@ class Entropy(BaseEstimator, TransformerMixin):
                     max_idx = np.clip(np.ceil((py - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
                     for k in range(min_idx, max_idx):
                         ent[k] += (-1) * new_diagram[j,1] * np.log(new_diagram[j,1])
-                    if self.normalized:
-                        ent = ent / np.linalg.norm(ent, ord=1)
-                    Xfit.append(np.reshape(ent,[1,-1]))
-
-        Xfit = np.concatenate(Xfit, 0)
+                if self.normalized:
+                    ent = ent / np.linalg.norm(ent, ord=1)
+                Xfit.append(np.reshape(ent,[1,-1]))
 
+        Xfit = np.concatenate(Xfit, axis=0)
         return Xfit
 
     def __call__(self, diag):
@@ -478,7 +593,13 @@ class TopologicalVector(BaseEstimator, TransformerMixin):
             diagram, num_pts_in_diag = X[i], X[i].shape[0]
             pers = 0.5 * (diagram[:,1]-diagram[:,0])
             min_pers = np.minimum(pers,np.transpose(pers))
-            distances = DistanceMetric.get_metric("chebyshev").pairwise(diagram)
+            # Works fine with sklearn 1.0, but an ValueError exception is thrown on past versions
+            try:
+                distances = DistanceMetric.get_metric("chebyshev").pairwise(diagram)
+            except ValueError:
+                # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507
+                assert len(diagram) == 0
+                distances = np.empty(shape = [0, 0])
             vect = np.flip(np.sort(np.triu(np.minimum(distances, min_pers)), axis=None), 0)
             dim = min(len(vect), thresh)
             Xfit[i, :dim] = vect[:dim]
diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx
index 9c51cb46..c3720936 100644
--- a/src/python/gudhi/simplex_tree.pyx
+++ b/src/python/gudhi/simplex_tree.pyx
@@ -9,8 +9,7 @@
 
 from cython.operator import dereference, preincrement
 from libc.stdint cimport intptr_t
-import numpy
-from numpy import array as np_array
+import numpy as np
 cimport gudhi.simplex_tree
 
 __author__ = "Vincent Rouvreau"
@@ -542,7 +541,11 @@ cdef class SimplexTree:
             function to be launched first.
         """
         assert self.pcohptr != NULL, "compute_persistence() must be called before persistence_intervals_in_dimension()"
-        return np_array(self.pcohptr.intervals_in_dimension(dimension))
+        piid = np.array(self.pcohptr.intervals_in_dimension(dimension))
+        # Workaround https://github.com/GUDHI/gudhi-devel/issues/507
+        if len(piid) == 0:
+            return np.empty(shape = [0, 2])
+        return piid
 
     def persistence_pairs(self):
         """This function returns a list of persistence birth and death simplices pairs.
@@ -583,8 +586,8 @@ cdef class SimplexTree:
         """
         assert self.pcohptr != NULL, "lower_star_persistence_generators() requires that persistence() be called first."
         gen = self.pcohptr.lower_star_generators()
-        normal = [np_array(d).reshape(-1,2) for d in gen.first]
-        infinite = [np_array(d) for d in gen.second]
+        normal = [np.array(d).reshape(-1,2) for d in gen.first]
+        infinite = [np.array(d) for d in gen.second]
         return (normal, infinite)
 
     def flag_persistence_generators(self):
@@ -602,19 +605,19 @@ cdef class SimplexTree:
         assert self.pcohptr != NULL, "flag_persistence_generators() requires that persistence() be called first."
         gen = self.pcohptr.flag_generators()
         if len(gen.first) == 0:
-            normal0 = numpy.empty((0,3))
+            normal0 = np.empty((0,3))
             normals = []
         else:
             l = iter(gen.first)
-            normal0 = np_array(next(l)).reshape(-1,3)
-            normals = [np_array(d).reshape(-1,4) for d in l]
+            normal0 = np.array(next(l)).reshape(-1,3)
+            normals = [np.array(d).reshape(-1,4) for d in l]
         if len(gen.second) == 0:
-            infinite0 = numpy.empty(0)
+            infinite0 = np.empty(0)
             infinites = []
         else:
             l = iter(gen.second)
-            infinite0 = np_array(next(l))
-            infinites = [np_array(d).reshape(-1,2) for d in l]
+            infinite0 = np.array(next(l))
+            infinites = [np.array(d).reshape(-1,2) for d in l]
         return (normal0, normals, infinite0, infinites)
 
     def collapse_edges(self, nb_iterations = 1):
author	Vincent Rouvreau <vincent.rouvreau@inria.fr>	2022-02-01 16:21:10 +0100
committer	Vincent Rouvreau <vincent.rouvreau@inria.fr>	2022-02-01 16:21:10 +0100
commit	15163959fd57ea5318e19a5613cc69bc3f0f6b9e (patch)
tree	183314a8b538bf6a39b81b20825e9416d66ae43e /src/python/gudhi
parent	002487d3cd747d6ff979f33474d8bb0a7e61f44d (diff)
parent	7f1b8eb706c72921141b53e607d6e2aa28e2bf19 (diff)