From ce58cc97866605fe64df479e96d455e90f56f8e2 Mon Sep 17 00:00:00 2001 From: MathieuCarriere Date: Sun, 8 Dec 2019 21:22:09 -0500 Subject: fixed useless coordinates in Landscape if min and max are computed from data --- src/python/doc/representations.rst | 25 ++++++++++++++++++++-- .../diagram_vectorizations_distances_kernels.py | 6 +++--- src/python/gudhi/representations/vector_methods.py | 12 ++++++++--- 3 files changed, 35 insertions(+), 8 deletions(-) diff --git a/src/python/doc/representations.rst b/src/python/doc/representations.rst index b3131a25..b338f7f0 100644 --- a/src/python/doc/representations.rst +++ b/src/python/doc/representations.rst @@ -8,9 +8,9 @@ Representations manual .. include:: representations_sum.inc -This module, originally named sklearn_tda, aims at bridging the gap between persistence diagrams and machine learning tools, in particular scikit-learn. It provides tools, using the scikit-learn standard interface, to compute distances and kernels on diagrams, and to convert diagrams into vectors. +This module, originally available at https://github.com/MathieuCarriere/sklearn-tda and named sklearn_tda, aims at bridging the gap between persistence diagrams and machine learning, by providing implementations of most of the vector representations for persistence diagrams in the literature, in a scikit-learn format. More specifically, it provides tools, using the scikit-learn standard interface, to compute distances and kernels on persistence diagrams, and to convert these diagrams into vectors in Euclidean space. -A diagram is represented as a numpy array of shape (n,2), as can be obtained from :func:`~gudhi.SimplexTree.persistence_intervals_in_dimension` for instance. Points at infinity are represented as a numpy array of shape (n,1), storing only the birth time. +A diagram is represented as a numpy array of shape (n,2), as can be obtained from `SimplexTree.persistence_intervals_in_dimension` for instance. Points at infinity are represented as a numpy array of shape (n,1), storing only the birth time. A small example is provided @@ -46,3 +46,24 @@ Metrics :members: :special-members: :show-inheritance: + +Basic example +------------- + +This example computes the first two Landscapes associated to a persistence diagram with four points. The landscapes are evaluated on ten samples, leading to two vectors with ten coordinates each, that are eventually concatenated in order to produce a single vector representation. + +.. testcode:: + + import numpy as np + from gudhi.representations import Landscape + # A single diagram with 4 points + D = np.array([[0.,4.],[1.,2.],[3.,8.],[6.,8.]]) + diags = [D] + l=Landscape(num_landscapes=2,resolution=10).fit_transform(diags) + print(l) + +The output is: + +.. testoutput:: + + [[0. 1.25707872 2.51415744 1.88561808 0.7856742 2.04275292 3.29983165 2.51415744 1.25707872 0. 0. 0. 0.31426968 0. 0.62853936 0. 0. 0.31426968 1.25707872 0. ]] diff --git a/src/python/example/diagram_vectorizations_distances_kernels.py b/src/python/example/diagram_vectorizations_distances_kernels.py index 119072eb..f777984c 100755 --- a/src/python/example/diagram_vectorizations_distances_kernels.py +++ b/src/python/example/diagram_vectorizations_distances_kernels.py @@ -26,9 +26,9 @@ plt.show() LS = Landscape(resolution=1000) L = LS.fit_transform(diags) -plt.plot(L[0][:1000]) -plt.plot(L[0][1000:2000]) -plt.plot(L[0][2000:3000]) +plt.plot(L[0][:999]) +plt.plot(L[0][999:2*999]) +plt.plot(L[0][2*999:3*999]) plt.title("Landscape") plt.show() diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 61c4fb84..083551a4 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -104,10 +104,11 @@ class Landscape(BaseEstimator, TransformerMixin): X (list of n x 2 numpy arrays): input persistence diagrams. y (n x 1 array): persistence diagram labels (unused). """ + self.nan_in_range = np.isnan(np.array(self.sample_range)) if np.isnan(np.array(self.sample_range)).any(): pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] - self.sample_range = np.where(np.isnan(np.array(self.sample_range)), np.array([mx, My]), np.array(self.sample_range)) + self.sample_range = np.where(self.nan_in_range, np.array([mx, My]), np.array(self.sample_range)) return self def transform(self, X): @@ -121,7 +122,7 @@ class Landscape(BaseEstimator, TransformerMixin): numpy array with shape (number of diagrams) x (number of samples = **num_landscapes** x **resolution**): output persistence landscapes. """ num_diag, Xfit = len(X), [] - x_values = np.linspace(self.sample_range[0], self.sample_range[1], self.resolution) + x_values = np.linspace(self.sample_range[0], self.sample_range[1], self.resolution + self.nan_in_range.sum()) step_x = x_values[1] - x_values[0] for i in range(num_diag): @@ -157,7 +158,12 @@ class Landscape(BaseEstimator, TransformerMixin): for k in range( min(self.num_landscapes, len(events[j])) ): ls[k,j] = events[j][k] - Xfit.append(np.sqrt(2)*np.reshape(ls,[1,-1])) + if self.nan_in_range[0]: + ls = ls[:,1:] + if self.nan_in_range[1]: + ls = ls[:,:-1] + ls = np.sqrt(2)*np.reshape(ls,[1,-1]) + Xfit.append(ls) Xfit = np.concatenate(Xfit,0) -- cgit v1.2.3