summaryrefslogtreecommitdiff
path: root/src/python/gudhi
diff options
context:
space:
mode:
Diffstat (limited to 'src/python/gudhi')
-rw-r--r--src/python/gudhi/hera/bottleneck.cc2
-rw-r--r--src/python/gudhi/hera/wasserstein.cc10
-rw-r--r--src/python/gudhi/off_utils.pyx (renamed from src/python/gudhi/off_reader.pyx)23
-rw-r--r--src/python/gudhi/persistence_graphical_tools.py8
-rw-r--r--src/python/gudhi/point_cloud/knn.py4
-rw-r--r--src/python/gudhi/representations/vector_methods.py248
-rw-r--r--src/python/gudhi/rips_complex.pyx13
-rw-r--r--src/python/gudhi/simplex_tree.pxd2
-rw-r--r--src/python/gudhi/simplex_tree.pyx105
-rw-r--r--src/python/gudhi/tensorflow/cubical_layer.py2
10 files changed, 268 insertions, 149 deletions
diff --git a/src/python/gudhi/hera/bottleneck.cc b/src/python/gudhi/hera/bottleneck.cc
index 0cb562ce..ec461f7c 100644
--- a/src/python/gudhi/hera/bottleneck.cc
+++ b/src/python/gudhi/hera/bottleneck.cc
@@ -16,7 +16,7 @@
using py::ssize_t;
#endif
-#include <bottleneck.h> // Hera
+#include <hera/bottleneck.h> // Hera
double bottleneck_distance(Dgm d1, Dgm d2, double delta)
{
diff --git a/src/python/gudhi/hera/wasserstein.cc b/src/python/gudhi/hera/wasserstein.cc
index fa0cf8aa..3516352e 100644
--- a/src/python/gudhi/hera/wasserstein.cc
+++ b/src/python/gudhi/hera/wasserstein.cc
@@ -8,10 +8,16 @@
* - YYYY/MM Author: Description of the modification
*/
-#include <wasserstein.h> // Hera
-
#include <pybind11_diagram_utils.h>
+#ifdef _MSC_VER
+// https://github.com/grey-narn/hera/issues/3
+// ssize_t is a non-standard type (well, posix)
+using py::ssize_t;
+#endif
+
+#include <hera/wasserstein.h> // Hera
+
double wasserstein_distance(
Dgm d1, Dgm d2,
double wasserstein_power, double internal_p,
diff --git a/src/python/gudhi/off_reader.pyx b/src/python/gudhi/off_utils.pyx
index a3200704..9276c7b0 100644
--- a/src/python/gudhi/off_reader.pyx
+++ b/src/python/gudhi/off_utils.pyx
@@ -13,8 +13,10 @@ from __future__ import print_function
from cython cimport numeric
from libcpp.vector cimport vector
from libcpp.string cimport string
+cimport cython
import errno
import os
+import numpy as np
__author__ = "Vincent Rouvreau"
__copyright__ = "Copyright (C) 2016 Inria"
@@ -24,7 +26,7 @@ cdef extern from "Off_reader_interface.h" namespace "Gudhi":
vector[vector[double]] read_points_from_OFF_file(string off_file)
def read_points_from_off_file(off_file=''):
- """Read points from OFF file.
+ """Read points from an `OFF file <fileformats.html#off-file-format>`_.
:param off_file: An OFF file style name.
:type off_file: string
@@ -39,3 +41,22 @@ def read_points_from_off_file(off_file=''):
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT),
off_file)
+@cython.embedsignature(True)
+def write_points_to_off_file(fname, points):
+ """Write points to an `OFF file <fileformats.html#off-file-format>`_.
+
+ A simple wrapper for `numpy.savetxt`.
+
+ :param fname: Name of the OFF file.
+ :type fname: str or file handle
+ :param points: Point coordinates.
+ :type points: numpy array of shape (n, dim)
+ """
+ points = np.array(points, copy=False)
+ assert len(points.shape) == 2
+ dim = points.shape[1]
+ if dim == 3:
+ head = 'OFF\n{} 0 0'.format(points.shape[0])
+ else:
+ head = 'nOFF\n{} {} 0 0'.format(dim, points.shape[0])
+ np.savetxt(fname, points, header=head, comments='')
diff --git a/src/python/gudhi/persistence_graphical_tools.py b/src/python/gudhi/persistence_graphical_tools.py
index 21275cdd..e438aa66 100644
--- a/src/python/gudhi/persistence_graphical_tools.py
+++ b/src/python/gudhi/persistence_graphical_tools.py
@@ -194,19 +194,21 @@ def plot_persistence_barcode(
y=[(death - birth) if death != float("inf") else (infinity - birth) for (dim,(birth,death)) in persistence]
c=[colormap[dim] for (dim,(birth,death)) in persistence]
- axes.barh(list(reversed(range(len(x)))), y, height=0.8, left=x, alpha=alpha, color=c, linewidth=0)
+ axes.barh(range(len(x)), y, left=x, alpha=alpha, color=c, linewidth=0)
if legend:
- dimensions = list(set(item[0] for item in persistence))
+ dimensions = set(item[0] for item in persistence)
axes.legend(
handles=[mpatches.Patch(color=colormap[dim], label=str(dim)) for dim in dimensions], loc="lower right",
)
axes.set_title("Persistence barcode", fontsize=fontsize)
+ axes.set_yticks([])
+ axes.invert_yaxis()
# Ends plot on infinity value and starts a little bit before min_birth
if len(x) != 0:
- axes.axis([axis_start, infinity, 0, len(x)])
+ axes.set_xlim((axis_start, infinity))
return axes
except ImportError as import_error:
diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py
index de5844f9..7dc83817 100644
--- a/src/python/gudhi/point_cloud/knn.py
+++ b/src/python/gudhi/point_cloud/knn.py
@@ -314,7 +314,9 @@ class KNearestNeighbors:
return None
if self.params["implementation"] == "ckdtree":
- qargs = {key: val for key, val in self.params.items() if key in {"p", "eps", "n_jobs"}}
+ qargs = {key: val for key, val in self.params.items() if key in {"p", "eps"}}
+ # SciPy renamed n_jobs to workers
+ qargs["workers"] = self.params.get("workers") or self.params.get("n_jobs") or 1
distances, neighbors = self.kdtree.query(X, k=self.k, **qargs)
if k == 1:
# SciPy decided to squeeze the last dimension for k=1
diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 69ff5e1e..ce74aee5 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -13,8 +13,13 @@ import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.exceptions import NotFittedError
from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler
-from sklearn.neighbors import DistanceMetric
from sklearn.metrics import pairwise
+try:
+ # New location since 1.0
+ from sklearn.metrics import DistanceMetric
+except ImportError:
+ # Will be removed in 1.3
+ from sklearn.neighbors import DistanceMetric
from .preprocessing import DiagramScaler, BirthPersistenceTransform
@@ -85,7 +90,7 @@ class PersistenceImage(BaseEstimator, TransformerMixin):
Xfit.append(image.flatten()[np.newaxis,:])
- Xfit = np.concatenate(Xfit,0)
+ Xfit = np.concatenate(Xfit, 0)
return Xfit
@@ -101,7 +106,7 @@ class PersistenceImage(BaseEstimator, TransformerMixin):
"""
return self.fit_transform([diag])[0,:]
-def _automatic_sample_range(sample_range, X, y):
+def _automatic_sample_range(sample_range, X):
"""
Compute and returns sample range from the persistence diagrams if one of the sample_range values is numpy.nan.
@@ -114,7 +119,7 @@ def _automatic_sample_range(sample_range, X, y):
nan_in_range = np.isnan(sample_range)
if nan_in_range.any():
try:
- pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y)
+ pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X)
[mx,my] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]]
[Mx,My] = [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]]
return np.where(nan_in_range, np.array([mx, My]), sample_range)
@@ -123,11 +128,35 @@ def _automatic_sample_range(sample_range, X, y):
pass
return sample_range
+
+def _trim_endpoints(x, are_endpoints_nan):
+ if are_endpoints_nan[0]:
+ x = x[1:]
+ if are_endpoints_nan[1]:
+ x = x[:-1]
+ return x
+
+
+def _grid_from_sample_range(self, X):
+ sample_range = np.array(self.sample_range)
+ self.nan_in_range = np.isnan(sample_range)
+ self.new_resolution = self.resolution
+ if not self.keep_endpoints:
+ self.new_resolution += self.nan_in_range.sum()
+ self.sample_range_fixed = _automatic_sample_range(sample_range, X)
+ self.grid_ = np.linspace(self.sample_range_fixed[0], self.sample_range_fixed[1], self.new_resolution)
+ if not self.keep_endpoints:
+ self.grid_ = _trim_endpoints(self.grid_, self.nan_in_range)
+
+
class Landscape(BaseEstimator, TransformerMixin):
"""
This is a class for computing persistence landscapes from a list of persistence diagrams. A persistence landscape is a collection of 1D piecewise-linear functions computed from the rank function associated to the persistence diagram. These piecewise-linear functions are then sampled evenly on a given range and the corresponding vectors of samples are concatenated and returned. See http://jmlr.org/papers/v16/bubenik15a.html for more details.
+
+ Attributes:
+ grid_ (1d array): The grid on which the landscapes are computed.
"""
- def __init__(self, num_landscapes=5, resolution=100, sample_range=[np.nan, np.nan]):
+ def __init__(self, num_landscapes=5, resolution=100, sample_range=[np.nan, np.nan], *, keep_endpoints=False):
"""
Constructor for the Landscape class.
@@ -135,10 +164,10 @@ class Landscape(BaseEstimator, TransformerMixin):
num_landscapes (int): number of piecewise-linear functions to output (default 5).
resolution (int): number of sample for all piecewise-linear functions (default 100).
sample_range ([double, double]): minimum and maximum of all piecewise-linear function domains, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
+ keep_endpoints (bool): when computing `sample_range`, use the exact extremities (where the value is always 0). This is mostly useful for plotting, the default is to use a slightly smaller range.
"""
self.num_landscapes, self.resolution, self.sample_range = num_landscapes, resolution, sample_range
- self.nan_in_range = np.isnan(np.array(self.sample_range))
- self.new_resolution = self.resolution + self.nan_in_range.sum()
+ self.keep_endpoints = keep_endpoints
def fit(self, X, y=None):
"""
@@ -148,7 +177,7 @@ class Landscape(BaseEstimator, TransformerMixin):
X (list of n x 2 numpy arrays): input persistence diagrams.
y (n x 1 array): persistence diagram labels (unused).
"""
- self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
+ _grid_from_sample_range(self, X)
return self
def transform(self, X):
@@ -161,53 +190,26 @@ class Landscape(BaseEstimator, TransformerMixin):
Returns:
numpy array with shape (number of diagrams) x (number of samples = **num_landscapes** x **resolution**): output persistence landscapes.
"""
- num_diag, Xfit = len(X), []
- x_values = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution)
- step_x = x_values[1] - x_values[0]
-
- for i in range(num_diag):
-
- diagram, num_pts_in_diag = X[i], X[i].shape[0]
-
- ls = np.zeros([self.num_landscapes, self.new_resolution])
- events = []
- for j in range(self.new_resolution):
- events.append([])
+ Xfit = []
+ x_values = self.grid_
+ for diag in X:
+ midpoints, heights = (diag[:, 0] + diag[:, 1]) / 2., (diag[:, 1] - diag[:, 0]) / 2.
+ tent_functions = np.maximum(heights[None, :] - np.abs(x_values[:, None] - midpoints[None, :]), 0)
+ n_points = diag.shape[0]
+ # Complete the array with zeros to get the right number of landscapes
+ if self.num_landscapes > n_points:
+ tent_functions = np.concatenate(
+ [tent_functions, np.zeros((tent_functions.shape[0], self.num_landscapes-n_points))],
+ axis=1
+ )
+ tent_functions.partition(tent_functions.shape[1]-self.num_landscapes, axis=1)
+ landscapes = np.sort(tent_functions[:, -self.num_landscapes:], axis=1)[:, ::-1].T
- for j in range(num_pts_in_diag):
- [px,py] = diagram[j,:2]
- min_idx = np.clip(np.ceil((px - self.sample_range[0]) / step_x).astype(int), 0, self.new_resolution)
- mid_idx = np.clip(np.ceil((0.5*(py+px) - self.sample_range[0]) / step_x).astype(int), 0, self.new_resolution)
- max_idx = np.clip(np.ceil((py - self.sample_range[0]) / step_x).astype(int), 0, self.new_resolution)
-
- if min_idx < self.new_resolution and max_idx > 0:
-
- landscape_value = self.sample_range[0] + min_idx * step_x - px
- for k in range(min_idx, mid_idx):
- events[k].append(landscape_value)
- landscape_value += step_x
-
- landscape_value = py - self.sample_range[0] - mid_idx * step_x
- for k in range(mid_idx, max_idx):
- events[k].append(landscape_value)
- landscape_value -= step_x
+ landscapes = np.sqrt(2) * np.ravel(landscapes)
+ Xfit.append(landscapes)
- for j in range(self.new_resolution):
- events[j].sort(reverse=True)
- for k in range( min(self.num_landscapes, len(events[j])) ):
- ls[k,j] = events[j][k]
-
- if self.nan_in_range[0]:
- ls = ls[:,1:]
- if self.nan_in_range[1]:
- ls = ls[:,:-1]
- ls = np.sqrt(2)*np.reshape(ls,[1,-1])
- Xfit.append(ls)
-
- Xfit = np.concatenate(Xfit,0)
-
- return Xfit
+ return np.stack(Xfit, axis=0)
def __call__(self, diag):
"""
@@ -219,13 +221,16 @@ class Landscape(BaseEstimator, TransformerMixin):
Returns:
numpy array with shape (number of samples = **num_landscapes** x **resolution**): output persistence landscape.
"""
- return self.fit_transform([diag])[0,:]
+ return self.fit_transform([diag])[0, :]
class Silhouette(BaseEstimator, TransformerMixin):
"""
This is a class for computing persistence silhouettes from a list of persistence diagrams. A persistence silhouette is computed by taking a weighted average of the collection of 1D piecewise-linear functions given by the persistence landscapes, and then by evenly sampling this average on a given range. Finally, the corresponding vector of samples is returned. See https://arxiv.org/abs/1312.0308 for more details.
+
+ Attributes:
+ grid_ (1d array): The grid on which the silhouette is computed.
"""
- def __init__(self, weight=lambda x: 1, resolution=100, sample_range=[np.nan, np.nan]):
+ def __init__(self, weight=lambda x: 1, resolution=100, sample_range=[np.nan, np.nan], *, keep_endpoints=False):
"""
Constructor for the Silhouette class.
@@ -233,8 +238,10 @@ class Silhouette(BaseEstimator, TransformerMixin):
weight (function): weight function for the persistence diagram points (default constant function, ie lambda x: 1). This function must be defined on 2D points, ie on lists or numpy arrays of the form [p_x,p_y].
resolution (int): number of samples for the weighted average (default 100).
sample_range ([double, double]): minimum and maximum for the weighted average domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
+ keep_endpoints (bool): when computing `sample_range`, use the exact extremities (where the value is always 0). This is mostly useful for plotting, the default is to use a slightly smaller range.
"""
self.weight, self.resolution, self.sample_range = weight, resolution, sample_range
+ self.keep_endpoints = keep_endpoints
def fit(self, X, y=None):
"""
@@ -244,7 +251,7 @@ class Silhouette(BaseEstimator, TransformerMixin):
X (list of n x 2 numpy arrays): input persistence diagrams.
y (n x 1 array): persistence diagram labels (unused).
"""
- self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
+ _grid_from_sample_range(self, X)
return self
def transform(self, X):
@@ -257,44 +264,19 @@ class Silhouette(BaseEstimator, TransformerMixin):
Returns:
numpy array with shape (number of diagrams) x (**resolution**): output persistence silhouettes.
"""
- num_diag, Xfit = len(X), []
- x_values = np.linspace(self.sample_range[0], self.sample_range[1], self.resolution)
- step_x = x_values[1] - x_values[0]
-
- for i in range(num_diag):
-
- diagram, num_pts_in_diag = X[i], X[i].shape[0]
+ Xfit = []
+ x_values = self.grid_
- sh, weights = np.zeros(self.resolution), np.zeros(num_pts_in_diag)
- for j in range(num_pts_in_diag):
- weights[j] = self.weight(diagram[j,:])
+ for diag in X:
+ midpoints, heights = (diag[:, 0] + diag[:, 1]) / 2., (diag[:, 1] - diag[:, 0]) / 2.
+ weights = np.array([self.weight(pt) for pt in diag])
total_weight = np.sum(weights)
- for j in range(num_pts_in_diag):
-
- [px,py] = diagram[j,:2]
- weight = weights[j] / total_weight
- min_idx = np.clip(np.ceil((px - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
- mid_idx = np.clip(np.ceil((0.5*(py+px) - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
- max_idx = np.clip(np.ceil((py - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
-
- if min_idx < self.resolution and max_idx > 0:
+ tent_functions = np.maximum(heights[None, :] - np.abs(x_values[:, None] - midpoints[None, :]), 0)
+ silhouette = np.sum(weights[None, :] / total_weight * tent_functions, axis=1)
+ Xfit.append(silhouette * np.sqrt(2))
- silhouette_value = self.sample_range[0] + min_idx * step_x - px
- for k in range(min_idx, mid_idx):
- sh[k] += weight * silhouette_value
- silhouette_value += step_x
-
- silhouette_value = py - self.sample_range[0] - mid_idx * step_x
- for k in range(mid_idx, max_idx):
- sh[k] += weight * silhouette_value
- silhouette_value -= step_x
-
- Xfit.append(np.reshape(np.sqrt(2) * sh, [1,-1]))
-
- Xfit = np.concatenate(Xfit, 0)
-
- return Xfit
+ return np.stack(Xfit, axis=0)
def __call__(self, diag):
"""
@@ -312,36 +294,39 @@ class Silhouette(BaseEstimator, TransformerMixin):
class BettiCurve(BaseEstimator, TransformerMixin):
"""
Compute Betti curves from persistence diagrams. There are several modes of operation: with a given resolution (with or without a sample_range), with a predefined grid, and with none of the previous. With a predefined grid, the class computes the Betti numbers at those grid points. Without a predefined grid, if the resolution is set to None, it can be fit to a list of persistence diagrams and produce a grid that consists of (at least) the filtration values at which at least one of those persistence diagrams changes Betti numbers, and then compute the Betti numbers at those grid points. In the latter mode, the exact Betti curve is computed for the entire real line. Otherwise, if the resolution is given, the Betti curve is obtained by sampling evenly using either the given sample_range or based on the persistence diagrams.
- """
- def __init__(self, resolution=100, sample_range=[np.nan, np.nan], predefined_grid=None):
- """
- Constructor for the BettiCurve class.
+ Examples
+ --------
+ If pd is a persistence diagram and xs is a nonempty grid of finite values such that xs[0] >= pd.min(), then the results of:
- Parameters:
- resolution (int): number of sample for the piecewise-constant function (default 100).
- sample_range ([double, double]): minimum and maximum of the piecewise-constant function domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
- predefined_grid (1d array or None, default=None): Predefined filtration grid points at which to compute the Betti curves. Must be strictly ordered. Infinities are ok. If None (default), and resolution is given, the grid will be uniform from x_min to x_max in 'resolution' steps, otherwise a grid will be computed that captures all changes in Betti numbers in the provided data.
+ >>> bc = BettiCurve(predefined_grid=xs) # doctest: +SKIP
+ >>> result = bc(pd) # doctest: +SKIP
- Attributes:
- grid_ (1d array): The grid on which the Betti numbers are computed. If predefined_grid was specified, `grid_` will always be that grid, independently of data. If not, the grid is fitted to capture all filtration values at which the Betti numbers change.
+ and
- Examples
- --------
- If pd is a persistence diagram and xs is a nonempty grid of finite values such that xs[0] >= pd.min(), then the results of:
+ >>> from scipy.interpolate import interp1d # doctest: +SKIP
+ >>> bc = BettiCurve(resolution=None, predefined_grid=None) # doctest: +SKIP
+ >>> bettis = bc.fit_transform([pd]) # doctest: +SKIP
+ >>> interp = interp1d(bc.grid_, bettis[0, :], kind="previous", fill_value="extrapolate") # doctest: +SKIP
+ >>> result = np.array(interp(xs), dtype=int) # doctest: +SKIP
- >>> bc = BettiCurve(predefined_grid=xs) # doctest: +SKIP
- >>> result = bc(pd) # doctest: +SKIP
+ are the same.
- and
+ Attributes
+ ----------
+ grid_ : 1d array
+ The grid on which the Betti numbers are computed. If predefined_grid was specified, `grid_` will always be that grid, independently of data. If not and resolution is None, the grid is fitted to capture all filtration values at which the Betti numbers change.
+ """
- >>> from scipy.interpolate import interp1d # doctest: +SKIP
- >>> bc = BettiCurve(resolution=None, predefined_grid=None) # doctest: +SKIP
- >>> bettis = bc.fit_transform([pd]) # doctest: +SKIP
- >>> interp = interp1d(bc.grid_, bettis[0, :], kind="previous", fill_value="extrapolate") # doctest: +SKIP
- >>> result = np.array(interp(xs), dtype=int) # doctest: +SKIP
+ def __init__(self, resolution=100, sample_range=[np.nan, np.nan], predefined_grid=None, *, keep_endpoints=False):
+ """
+ Constructor for the BettiCurve class.
- are the same.
+ Parameters:
+ resolution (int): number of samples for the piecewise-constant function (default 100), or None for the exact curve.
+ sample_range ([double, double]): minimum and maximum of the piecewise-constant function domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
+ predefined_grid (1d array or None, default=None): Predefined filtration grid points at which to compute the Betti curves. Must be strictly ordered. Infinities are ok. If None (default), and resolution is given, the grid will be uniform from x_min to x_max in 'resolution' steps, otherwise a grid will be computed that captures all changes in Betti numbers in the provided data.
+ keep_endpoints (bool): when computing `sample_range` (fixed `resolution`, no `predefined_grid`), use the exact extremities. This is mostly useful for plotting, the default is to use a slightly smaller range.
"""
if (predefined_grid is not None) and (not isinstance(predefined_grid, np.ndarray)):
@@ -350,6 +335,7 @@ class BettiCurve(BaseEstimator, TransformerMixin):
self.predefined_grid = predefined_grid
self.resolution = resolution
self.sample_range = sample_range
+ self.keep_endpoints = keep_endpoints
def is_fitted(self):
return hasattr(self, "grid_")
@@ -368,8 +354,7 @@ class BettiCurve(BaseEstimator, TransformerMixin):
events = np.unique(np.concatenate([pd.flatten() for pd in X] + [[-np.inf]], axis=0))
self.grid_ = np.array(events)
else:
- self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
- self.grid_ = np.linspace(self.sample_range[0], self.sample_range[1], self.resolution)
+ _grid_from_sample_range(self, X)
else:
self.grid_ = self.predefined_grid # Get the predefined grid from user
@@ -468,8 +453,11 @@ class BettiCurve(BaseEstimator, TransformerMixin):
class Entropy(BaseEstimator, TransformerMixin):
"""
This is a class for computing persistence entropy. Persistence entropy is a statistic for persistence diagrams inspired from Shannon entropy. This statistic can also be used to compute a feature vector, called the entropy summary function. See https://arxiv.org/pdf/1803.08304.pdf for more details. Note that a previous implementation was contributed by Manuel Soriano-Trigueros.
+
+ Attributes:
+ grid_ (1d array): In vector mode, the grid on which the entropy summary function is computed.
"""
- def __init__(self, mode="scalar", normalized=True, resolution=100, sample_range=[np.nan, np.nan]):
+ def __init__(self, mode="scalar", normalized=True, resolution=100, sample_range=[np.nan, np.nan], *, keep_endpoints=False):
"""
Constructor for the Entropy class.
@@ -478,8 +466,10 @@ class Entropy(BaseEstimator, TransformerMixin):
normalized (bool): whether to normalize the entropy summary function (default True). Used only if **mode** = "vector".
resolution (int): number of sample for the entropy summary function (default 100). Used only if **mode** = "vector".
sample_range ([double, double]): minimum and maximum of the entropy summary function domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method. Used only if **mode** = "vector".
+ keep_endpoints (bool): when computing `sample_range`, use the exact extremities. This is mostly useful for plotting, the default is to use a slightly smaller range.
"""
self.mode, self.normalized, self.resolution, self.sample_range = mode, normalized, resolution, sample_range
+ self.keep_endpoints = keep_endpoints
def fit(self, X, y=None):
"""
@@ -489,7 +479,9 @@ class Entropy(BaseEstimator, TransformerMixin):
X (list of n x 2 numpy arrays): input persistence diagrams.
y (n x 1 array): persistence diagram labels (unused).
"""
- self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
+ if self.mode == "vector":
+ _grid_from_sample_range(self, X)
+ self.step_ = self.grid_[1] - self.grid_[0]
return self
def transform(self, X):
@@ -503,8 +495,6 @@ class Entropy(BaseEstimator, TransformerMixin):
numpy array with shape (number of diagrams) x (1 if **mode** = "scalar" else **resolution**): output entropy.
"""
num_diag, Xfit = len(X), []
- x_values = np.linspace(self.sample_range[0], self.sample_range[1], self.resolution)
- step_x = x_values[1] - x_values[0]
new_X = BirthPersistenceTransform().fit_transform(X)
for i in range(num_diag):
@@ -519,8 +509,8 @@ class Entropy(BaseEstimator, TransformerMixin):
ent = np.zeros(self.resolution)
for j in range(num_pts_in_diag):
[px,py] = orig_diagram[j,:2]
- min_idx = np.clip(np.ceil((px - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
- max_idx = np.clip(np.ceil((py - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
+ min_idx = np.clip(np.ceil((px - self.sample_range_fixed[0]) / self.step_).astype(int), 0, self.resolution)
+ max_idx = np.clip(np.ceil((py - self.sample_range_fixed[0]) / self.step_).astype(int), 0, self.resolution)
ent[min_idx:max_idx]-=p[j]*np.log(p[j])
if self.normalized:
ent = ent / np.linalg.norm(ent, ord=1)
@@ -720,17 +710,17 @@ class Atol(BaseEstimator, TransformerMixin):
>>> b = np.array([[4, 2, 0], [4, 4, 0], [4, 0, 2]])
>>> c = np.array([[3, 2, -1], [1, 2, -1]])
>>> atol_vectoriser = Atol(quantiser=KMeans(n_clusters=2, random_state=202006))
- >>> atol_vectoriser.fit(X=[a, b, c]).centers # doctest: +SKIP
- >>> # array([[ 2. , 0.66666667, 3.33333333],
- >>> # [ 2.6 , 2.8 , -0.4 ]])
+ >>> atol_vectoriser.fit(X=[a, b, c]).centers
+ array([[ 2.6 , 2.8 , -0.4 ],
+ [ 2. , 0.66666667, 3.33333333]])
>>> atol_vectoriser(a)
- >>> # array([1.18168665, 0.42375966]) # doctest: +SKIP
+ array([0.42375966, 1.18168665])
>>> atol_vectoriser(c)
- >>> # array([0.02062512, 1.25157463]) # doctest: +SKIP
- >>> atol_vectoriser.transform(X=[a, b, c]) # doctest: +SKIP
- >>> # array([[1.18168665, 0.42375966],
- >>> # [0.29861028, 1.06330156],
- >>> # [0.02062512, 1.25157463]])
+ array([1.25157463, 0.02062512])
+ >>> atol_vectoriser.transform(X=[a, b, c])
+ array([[0.42375966, 1.18168665],
+ [1.06330156, 0.29861028],
+ [1.25157463, 0.02062512]])
"""
# Note the example above must be up to date with the one in tests called test_atol_doc
def __init__(self, quantiser, weighting_method="cloud", contrast="gaussian"):
@@ -781,6 +771,8 @@ class Atol(BaseEstimator, TransformerMixin):
measures_concat = np.concatenate(X)
self.quantiser.fit(X=measures_concat, sample_weight=sample_weight)
self.centers = self.quantiser.cluster_centers_
+ # Hack, but some people are unhappy if the order depends on the version of sklearn
+ self.centers = self.centers[np.lexsort(self.centers.T)]
if self.quantiser.n_clusters == 1:
dist_centers = pairwise.pairwise_distances(measures_concat)
np.fill_diagonal(dist_centers, 0)
diff --git a/src/python/gudhi/rips_complex.pyx b/src/python/gudhi/rips_complex.pyx
index c3470292..d748f91e 100644
--- a/src/python/gudhi/rips_complex.pyx
+++ b/src/python/gudhi/rips_complex.pyx
@@ -41,31 +41,30 @@ cdef class RipsComplex:
cdef Rips_complex_interface thisref
# Fake constructor that does nothing but documenting the constructor
- def __init__(self, points=None, distance_matrix=None,
+ def __init__(self, *, points=None, distance_matrix=None,
max_edge_length=float('inf'), sparse=None):
"""RipsComplex constructor.
- :param max_edge_length: Rips value.
- :type max_edge_length: float
-
:param points: A list of points in d-Dimension.
- :type points: list of list of float
+ :type points: List[List[float]]
Or
:param distance_matrix: A distance matrix (full square or lower
triangular).
- :type points: list of list of float
+ :type distance_matrix: List[List[float]]
And in both cases
+ :param max_edge_length: Rips value.
+ :type max_edge_length: float
:param sparse: If this is not None, it switches to building a sparse
Rips and represents the approximation parameter epsilon.
:type sparse: float
"""
# The real cython constructor
- def __cinit__(self, points=None, distance_matrix=None,
+ def __cinit__(self, *, points=None, distance_matrix=None,
max_edge_length=float('inf'), sparse=None):
if sparse is not None:
if distance_matrix is not None:
diff --git a/src/python/gudhi/simplex_tree.pxd b/src/python/gudhi/simplex_tree.pxd
index 5642f82d..5309c6fa 100644
--- a/src/python/gudhi/simplex_tree.pxd
+++ b/src/python/gudhi/simplex_tree.pxd
@@ -56,6 +56,8 @@ cdef extern from "Simplex_tree_interface.h" namespace "Gudhi":
int upper_bound_dimension() nogil
bool find_simplex(vector[int] simplex) nogil
bool insert(vector[int] simplex, double filtration) nogil
+ void insert_matrix(double* filtrations, int n, int stride0, int stride1, double max_filtration) nogil except +
+ void insert_batch_vertices(vector[int] v, double f) nogil except +
vector[pair[vector[int], double]] get_star(vector[int] simplex) nogil
vector[pair[vector[int], double]] get_cofaces(vector[int] simplex, int dimension) nogil
void expansion(int max_dim) nogil except +
diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx
index 05bfe22e..4cf176f5 100644
--- a/src/python/gudhi/simplex_tree.pyx
+++ b/src/python/gudhi/simplex_tree.pyx
@@ -8,14 +8,24 @@
# - YYYY/MM Author: Description of the modification
from cython.operator import dereference, preincrement
-from libc.stdint cimport intptr_t
+from libc.stdint cimport intptr_t, int32_t, int64_t
import numpy as np
cimport gudhi.simplex_tree
+cimport cython
+from numpy.math cimport INFINITY
__author__ = "Vincent Rouvreau"
__copyright__ = "Copyright (C) 2016 Inria"
__license__ = "MIT"
+ctypedef fused some_int:
+ int32_t
+ int64_t
+
+ctypedef fused some_float:
+ float
+ double
+
cdef bool callback(vector[int] simplex, void *blocker_func):
return (<object>blocker_func)(simplex)
@@ -228,6 +238,91 @@ cdef class SimplexTree:
"""
return self.get_ptr().insert(simplex, <double>filtration)
+ @staticmethod
+ @cython.boundscheck(False)
+ def create_from_array(filtrations, double max_filtration=INFINITY):
+ """Creates a new, empty complex and inserts vertices and edges. The vertices are numbered from 0 to n-1, and
+ the filtration values are encoded in the array, with the diagonal representing the vertices. It is the
+ caller's responsibility to ensure that this defines a filtration, which can be achieved with either::
+
+ filtrations[np.diag_indices_from(filtrations)] = filtrations.min(axis=1)
+
+ or::
+
+ diag = filtrations.diagonal()
+ filtrations = np.fmax(np.fmax(filtrations, diag[:, None]), diag[None, :])
+
+ :param filtrations: the filtration values of the vertices and edges to insert. The matrix is assumed to be symmetric.
+ :type filtrations: numpy.ndarray of shape (n,n)
+ :param max_filtration: only insert vertices and edges with filtration values no larger than max_filtration
+ :type max_filtration: float
+ :returns: the new complex
+ :rtype: SimplexTree
+ """
+ # TODO: document which half of the matrix is actually read?
+ filtrations = np.asanyarray(filtrations, dtype=float)
+ cdef double[:,:] F = filtrations
+ ret = SimplexTree()
+ cdef int n = F.shape[0]
+ assert n == F.shape[1], 'create_from_array() expects a square array'
+ with nogil:
+ ret.get_ptr().insert_matrix(&F[0,0], n, F.strides[0], F.strides[1], max_filtration)
+ return ret
+
+ def insert_edges_from_coo_matrix(self, edges):
+ """Inserts edges given by a sparse matrix in `COOrdinate format
+ <https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.coo_matrix.html>`_.
+ If an edge is repeated, the smallest filtration value is used. Missing entries are not inserted.
+ Diagonal entries are currently interpreted as vertices, although we do not guarantee this behavior
+ in the future, and this is only useful if you want to insert vertices with a smaller filtration value
+ than the smallest edge containing it, since vertices are implicitly inserted together with the edges.
+
+ :param edges: the edges to insert and their filtration values.
+ :type edges: scipy.sparse.coo_matrix of shape (n,n)
+
+ .. seealso:: :func:`insert_batch`
+ """
+ # Without this, it could be slow if we end up inserting vertices in a bad order (flat_map).
+ self.get_ptr().insert_batch_vertices(np.unique(np.stack((edges.row, edges.col))), INFINITY)
+ # TODO: optimize this?
+ for edge in zip(edges.row, edges.col, edges.data):
+ self.get_ptr().insert((edge[0], edge[1]), edge[2])
+
+ @cython.boundscheck(False)
+ @cython.wraparound(False)
+ def insert_batch(self, some_int[:,:] vertex_array, some_float[:] filtrations):
+ """Inserts k-simplices given by a sparse array in a format similar
+ to `torch.sparse <https://pytorch.org/docs/stable/sparse.html>`_.
+ The n-th simplex has vertices `vertex_array[0,n]`, ...,
+ `vertex_array[k,n]` and filtration value `filtrations[n]`.
+ If a simplex is repeated, the smallest filtration value is used.
+ Simplices with a repeated vertex are currently interpreted as lower
+ dimensional simplices, but we do not guarantee this behavior in the
+ future. Any time a simplex is inserted, its faces are inserted as well
+ if needed to preserve a simplicial complex.
+
+ :param vertex_array: the k-simplices to insert.
+ :type vertex_array: numpy.array of shape (k+1,n)
+ :param filtrations: the filtration values.
+ :type filtrations: numpy.array of shape (n,)
+ """
+ cdef vector[int] vertices = np.unique(vertex_array)
+ cdef Py_ssize_t k = vertex_array.shape[0]
+ cdef Py_ssize_t n = vertex_array.shape[1]
+ assert filtrations.shape[0] == n, 'inconsistent sizes for vertex_array and filtrations'
+ cdef Py_ssize_t i
+ cdef Py_ssize_t j
+ cdef vector[int] v
+ with nogil:
+ # Without this, it could be slow if we end up inserting vertices in a bad order (flat_map).
+ # NaN currently does the wrong thing
+ self.get_ptr().insert_batch_vertices(vertices, INFINITY)
+ for i in range(n):
+ for j in range(k):
+ v.push_back(vertex_array[j, i])
+ self.get_ptr().insert(v, filtrations[i])
+ v.clear()
+
def get_simplices(self):
"""This function returns a generator with simplices and their given
filtration values.
@@ -376,7 +471,7 @@ cdef class SimplexTree:
"""
return self.get_ptr().prune_above_filtration(filtration)
- def expansion(self, max_dim):
+ def expansion(self, max_dimension):
"""Expands the simplex tree containing only its one skeleton
until dimension max_dim.
@@ -390,10 +485,10 @@ cdef class SimplexTree:
The simplex tree must contain no simplex of dimension bigger than
1 when calling the method.
- :param max_dim: The maximal dimension.
- :type max_dim: int
+ :param max_dimension: The maximal dimension.
+ :type max_dimension: int
"""
- cdef int maxdim = max_dim
+ cdef int maxdim = max_dimension
with nogil:
self.get_ptr().expansion(maxdim)
diff --git a/src/python/gudhi/tensorflow/cubical_layer.py b/src/python/gudhi/tensorflow/cubical_layer.py
index 3304e719..5df2c370 100644
--- a/src/python/gudhi/tensorflow/cubical_layer.py
+++ b/src/python/gudhi/tensorflow/cubical_layer.py
@@ -18,7 +18,7 @@ def _Cubical(Xflat, Xdim, dimensions, homology_coeff_field):
cc = CubicalComplex(dimensions=Xdim[::-1], top_dimensional_cells=Xflat)
cc.compute_persistence(homology_coeff_field=homology_coeff_field)
- # Retrieve and ouput image indices/pixels corresponding to positive and negative simplices
+ # Retrieve and output image indices/pixels corresponding to positive and negative simplices
cof_pp = cc.cofaces_of_persistence_pairs()
L_cofs = []