summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVincent Rouvreau <10407034+VincentRouvreau@users.noreply.github.com>2019-12-12 10:17:24 +0100
committerGitHub <noreply@github.com>2019-12-12 10:17:24 +0100
commit5a5a534b840216719fb3f9cb819d1306ca7ed196 (patch)
treedb5e90a48b287e237cdf5c4aeb069c294e13436d
parent3e829fd6f4a3a122da9df35a88e5c51122860bf6 (diff)
parent7bd6907e577e22803fec179f652ecf0ec64dcb4a (diff)
Merge pull request #173 from MathieuCarriere/master
Example in doc + fix on Landscape Fix #166
-rw-r--r--src/python/doc/representations.rst26
-rw-r--r--src/python/gudhi/representations/vector_methods.py29
2 files changed, 43 insertions, 12 deletions
diff --git a/src/python/doc/representations.rst b/src/python/doc/representations.rst
index b3131a25..11dcbcf9 100644
--- a/src/python/doc/representations.rst
+++ b/src/python/doc/representations.rst
@@ -8,7 +8,7 @@ Representations manual
.. include:: representations_sum.inc
-This module, originally named sklearn_tda, aims at bridging the gap between persistence diagrams and machine learning tools, in particular scikit-learn. It provides tools, using the scikit-learn standard interface, to compute distances and kernels on diagrams, and to convert diagrams into vectors.
+This module, originally available at https://github.com/MathieuCarriere/sklearn-tda and named sklearn_tda, aims at bridging the gap between persistence diagrams and machine learning, by providing implementations of most of the vector representations for persistence diagrams in the literature, in a scikit-learn format. More specifically, it provides tools, using the scikit-learn standard interface, to compute distances and kernels on persistence diagrams, and to convert these diagrams into vectors in Euclidean space.
A diagram is represented as a numpy array of shape (n,2), as can be obtained from :func:`~gudhi.SimplexTree.persistence_intervals_in_dimension` for instance. Points at infinity are represented as a numpy array of shape (n,1), storing only the birth time.
@@ -46,3 +46,27 @@ Metrics
:members:
:special-members:
:show-inheritance:
+
+Basic example
+-------------
+
+This example computes the first two Landscapes associated to a persistence diagram with four points. The landscapes are evaluated on ten samples, leading to two vectors with ten coordinates each, that are eventually concatenated in order to produce a single vector representation.
+
+.. testcode::
+
+ import numpy as np
+ from gudhi.representations import Landscape
+ # A single diagram with 4 points
+ D = np.array([[0.,4.],[1.,2.],[3.,8.],[6.,8.]])
+ diags = [D]
+ l=Landscape(num_landscapes=2,resolution=10).fit_transform(diags)
+ print(l)
+
+The output is:
+
+.. testoutput::
+
+ [[1.02851895 2.05703791 2.57129739 1.54277843 0.89995409 1.92847304
+ 2.95699199 3.08555686 2.05703791 1.02851895 0. 0.64282435
+ 0. 0. 0.51425948 0. 0. 0.
+ 0.77138922 1.02851895]]
diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 61c4fb84..fe26dbe2 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -95,6 +95,8 @@ class Landscape(BaseEstimator, TransformerMixin):
sample_range ([double, double]): minimum and maximum of all piecewise-linear function domains, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
"""
self.num_landscapes, self.resolution, self.sample_range = num_landscapes, resolution, sample_range
+ self.nan_in_range = np.isnan(np.array(self.sample_range))
+ self.new_resolution = self.resolution + self.nan_in_range.sum()
def fit(self, X, y=None):
"""
@@ -104,10 +106,10 @@ class Landscape(BaseEstimator, TransformerMixin):
X (list of n x 2 numpy arrays): input persistence diagrams.
y (n x 1 array): persistence diagram labels (unused).
"""
- if np.isnan(np.array(self.sample_range)).any():
+ if self.nan_in_range.any():
pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y)
[mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]]
- self.sample_range = np.where(np.isnan(np.array(self.sample_range)), np.array([mx, My]), np.array(self.sample_range))
+ self.sample_range = np.where(self.nan_in_range, np.array([mx, My]), np.array(self.sample_range))
return self
def transform(self, X):
@@ -121,26 +123,26 @@ class Landscape(BaseEstimator, TransformerMixin):
numpy array with shape (number of diagrams) x (number of samples = **num_landscapes** x **resolution**): output persistence landscapes.
"""
num_diag, Xfit = len(X), []
- x_values = np.linspace(self.sample_range[0], self.sample_range[1], self.resolution)
+ x_values = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution)
step_x = x_values[1] - x_values[0]
for i in range(num_diag):
diagram, num_pts_in_diag = X[i], X[i].shape[0]
- ls = np.zeros([self.num_landscapes, self.resolution])
+ ls = np.zeros([self.num_landscapes, self.new_resolution])
events = []
- for j in range(self.resolution):
+ for j in range(self.new_resolution):
events.append([])
for j in range(num_pts_in_diag):
[px,py] = diagram[j,:2]
- min_idx = np.clip(np.ceil((px - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
- mid_idx = np.clip(np.ceil((0.5*(py+px) - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
- max_idx = np.clip(np.ceil((py - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
+ min_idx = np.clip(np.ceil((px - self.sample_range[0]) / step_x).astype(int), 0, self.new_resolution)
+ mid_idx = np.clip(np.ceil((0.5*(py+px) - self.sample_range[0]) / step_x).astype(int), 0, self.new_resolution)
+ max_idx = np.clip(np.ceil((py - self.sample_range[0]) / step_x).astype(int), 0, self.new_resolution)
- if min_idx < self.resolution and max_idx > 0:
+ if min_idx < self.new_resolution and max_idx > 0:
landscape_value = self.sample_range[0] + min_idx * step_x - px
for k in range(min_idx, mid_idx):
@@ -152,12 +154,17 @@ class Landscape(BaseEstimator, TransformerMixin):
events[k].append(landscape_value)
landscape_value -= step_x
- for j in range(self.resolution):
+ for j in range(self.new_resolution):
events[j].sort(reverse=True)
for k in range( min(self.num_landscapes, len(events[j])) ):
ls[k,j] = events[j][k]
- Xfit.append(np.sqrt(2)*np.reshape(ls,[1,-1]))
+ if self.nan_in_range[0]:
+ ls = ls[:,1:]
+ if self.nan_in_range[1]:
+ ls = ls[:,:-1]
+ ls = np.sqrt(2)*np.reshape(ls,[1,-1])
+ Xfit.append(ls)
Xfit = np.concatenate(Xfit,0)