From f911ed3882c03b049a18f2a638758cb5ef994bcc Mon Sep 17 00:00:00 2001 From: wreise Date: Wed, 25 May 2022 11:59:37 +0200 Subject: Add tests for silhouettes --- src/python/test/test_representations.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'src') diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py index d219ce7a..8be3a1db 100755 --- a/src/python/test/test_representations.py +++ b/src/python/test/test_representations.py @@ -168,3 +168,32 @@ def test_kernel_empty_diagrams(): # PersistenceFisherKernel(bandwidth_fisher=1., bandwidth=1.)(empty_diag, empty_diag) # PersistenceFisherKernel(bandwidth_fisher=1., bandwidth=1., kernel_approx=RBFSampler(gamma=1./2, n_components=100000).fit(np.ones([1,2])))(empty_diag, empty_diag) + +def test_silhouette_permutation_invariance(): + dgm = _n_diags(1)[0] + dgm_permuted = dgm[np.random.permutation(dgm.shape[0]).astype(int)] + random_resolution = random.randint(50, 100) * 10 + slt = Silhouette(resolution=random_resolution, weight=pow(2)) + + assert np.all(np.isclose(slt(dgm), slt(dgm_permuted))) + + +def test_silhouette_multiplication_invariance(): + dgm = _n_diags(1)[0] + n_repetitions = np.random.randint(2, high=10) + dgm_augmented = np.repeat(dgm, repeats=n_repetitions, axis=0) + + random_resolution = random.randint(50, 100) * 10 + slt = Silhouette(resolution=random_resolution, weight=pow(2)) + assert np.all(np.isclose(slt(dgm), slt(dgm_augmented))) + + +def test_silhouette_numeric(): + dgm = np.array([[2., 3.], [5., 6.]]) + slt = Silhouette(resolution=9, weight=pow(1)) + #slt.fit([dgm]) + # x_values = array([2., 2.5, 3., 3.5, 4., 4.5, 5., 5.5, 6.]) + + expected_silhouette = np.array([0., 0.5, 0., 0., 0., 0., 0., 0.5, 0.])/np.sqrt(2) + output_silhouette = slt(dgm) + assert np.all(np.isclose(output_silhouette, expected_silhouette)) -- cgit v1.2.3 From 3aa89676d1dc2cafcc692480bbf424a97dbbd501 Mon Sep 17 00:00:00 2001 From: wreise Date: Wed, 25 May 2022 14:20:12 +0200 Subject: Vectorize Silhouette implementation --- src/python/gudhi/representations/vector_methods.py | 48 ++++++---------------- 1 file changed, 13 insertions(+), 35 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index f8078d03..62b35389 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -85,7 +85,7 @@ class PersistenceImage(BaseEstimator, TransformerMixin): Xfit.append(image.flatten()[np.newaxis,:]) - Xfit = np.concatenate(Xfit,0) + Xfit = np.concatenate(Xfit, 0) return Xfit @@ -235,6 +235,7 @@ class Silhouette(BaseEstimator, TransformerMixin): sample_range ([double, double]): minimum and maximum for the weighted average domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method. """ self.weight, self.resolution, self.sample_range = weight, resolution, sample_range + self.im_range = None def fit(self, X, y=None): """ @@ -245,6 +246,7 @@ class Silhouette(BaseEstimator, TransformerMixin): y (n x 1 array): persistence diagram labels (unused). """ self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) + self.im_range = np.linspace(self.sample_range[0], self.sample_range[1], self.resolution) return self def transform(self, X): @@ -257,44 +259,20 @@ class Silhouette(BaseEstimator, TransformerMixin): Returns: numpy array with shape (number of diagrams) x (**resolution**): output persistence silhouettes. """ - num_diag, Xfit = len(X), [] - x_values = np.linspace(self.sample_range[0], self.sample_range[1], self.resolution) - step_x = x_values[1] - x_values[0] + Xfit = [] + x_values = self.im_range - for i in range(num_diag): - - diagram, num_pts_in_diag = X[i], X[i].shape[0] - - sh, weights = np.zeros(self.resolution), np.zeros(num_pts_in_diag) - for j in range(num_pts_in_diag): - weights[j] = self.weight(diagram[j,:]) + for i, diag in enumerate(X): + midpoints, heights = (diag[:, 0] + diag[:, 1])/2., (diag[:, 1] - diag[:, 0])/2. + weights = np.array([self.weight(point) for point in diag]) total_weight = np.sum(weights) - for j in range(num_pts_in_diag): - - [px,py] = diagram[j,:2] - weight = weights[j] / total_weight - min_idx = np.clip(np.ceil((px - self.sample_range[0]) / step_x).astype(int), 0, self.resolution) - mid_idx = np.clip(np.ceil((0.5*(py+px) - self.sample_range[0]) / step_x).astype(int), 0, self.resolution) - max_idx = np.clip(np.ceil((py - self.sample_range[0]) / step_x).astype(int), 0, self.resolution) - - if min_idx < self.resolution and max_idx > 0: - - silhouette_value = self.sample_range[0] + min_idx * step_x - px - for k in range(min_idx, mid_idx): - sh[k] += weight * silhouette_value - silhouette_value += step_x - - silhouette_value = py - self.sample_range[0] - mid_idx * step_x - for k in range(mid_idx, max_idx): - sh[k] += weight * silhouette_value - silhouette_value -= step_x - - Xfit.append(np.reshape(np.sqrt(2) * sh, [1,-1])) - - Xfit = np.concatenate(Xfit, 0) + tent_functions = heights[None, :] - np.abs(x_values[:, None] - midpoints[None, :]) + tent_functions[tent_functions < 0.] = 0. + silhouette = np.sum(weights[None, :]/total_weight * tent_functions, axis=1) + Xfit.append(silhouette * np.sqrt(2)) - return Xfit + return np.stack(Xfit, axis=0) def __call__(self, diag): """ -- cgit v1.2.3 From 1a76ecc3e7459e3461e1f182004362dcb663addd Mon Sep 17 00:00:00 2001 From: wreise Date: Wed, 25 May 2022 14:34:11 +0200 Subject: Compactify --- src/python/gudhi/representations/vector_methods.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 62b35389..e6289a37 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -267,8 +267,7 @@ class Silhouette(BaseEstimator, TransformerMixin): weights = np.array([self.weight(point) for point in diag]) total_weight = np.sum(weights) - tent_functions = heights[None, :] - np.abs(x_values[:, None] - midpoints[None, :]) - tent_functions[tent_functions < 0.] = 0. + tent_functions = np.maximum(heights[None, :] - np.abs(x_values[:, None] - midpoints[None, :]), 0) silhouette = np.sum(weights[None, :]/total_weight * tent_functions, axis=1) Xfit.append(silhouette * np.sqrt(2)) -- cgit v1.2.3 From e8d0cbc3311765900e098b472608dc40b84d07d8 Mon Sep 17 00:00:00 2001 From: wreise Date: Wed, 25 May 2022 15:14:15 +0200 Subject: Optimize using keops --- src/python/gudhi/representations/vector_methods.py | 27 +++++++++++++++------- 1 file changed, 19 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index e6289a37..55dc2c5b 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -10,6 +10,7 @@ # - 2021/11 Vincent Rouvreau: factorize _automatic_sample_range import numpy as np +from pykeops.numpy import Genred from sklearn.base import BaseEstimator, TransformerMixin from sklearn.exceptions import NotFittedError from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler @@ -259,19 +260,29 @@ class Silhouette(BaseEstimator, TransformerMixin): Returns: numpy array with shape (number of diagrams) x (**resolution**): output persistence silhouettes. """ - Xfit = [] - x_values = self.im_range + silhouette_formula = "normalized_weights * ReLU(heights - Abs(x_values - midpoints))" + variables = [ + "normalized_weights = Vi(1)", + "heights = Vi(1)", + "midpoints = Vi(1)", + "x_values = Vj(1)", + ] + silhouette = Genred(silhouette_formula, variables, reduction_op="Sum", axis=0) + + silhouettes_list = [] + x_values = self.im_range for i, diag in enumerate(X): - midpoints, heights = (diag[:, 0] + diag[:, 1])/2., (diag[:, 1] - diag[:, 0])/2. + midpoints, heights = (diag[:, 0] + diag[:, 1]) / 2., (diag[:, 1] - diag[:, 0]) / 2. weights = np.array([self.weight(point) for point in diag]) - total_weight = np.sum(weights) + weights /= np.sum(weights) - tent_functions = np.maximum(heights[None, :] - np.abs(x_values[:, None] - midpoints[None, :]), 0) - silhouette = np.sum(weights[None, :]/total_weight * tent_functions, axis=1) - Xfit.append(silhouette * np.sqrt(2)) + silhouettes_list.append( + np.sqrt(2) * silhouette(weights[:, None], heights[:, None], + midpoints[:, None], x_values[:, None])[:, 0] + ) - return np.stack(Xfit, axis=0) + return np.stack(silhouettes_list, axis=0) def __call__(self, diag): """ -- cgit v1.2.3 From 912156b36da1dce1f73f8d2a63cc18e67c173d54 Mon Sep 17 00:00:00 2001 From: wreise Date: Wed, 25 May 2022 16:43:15 +0200 Subject: Move the initialisation of the Genred method to the constructor of Silhouette --- src/python/gudhi/representations/vector_methods.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 55dc2c5b..c250c98c 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -238,6 +238,15 @@ class Silhouette(BaseEstimator, TransformerMixin): self.weight, self.resolution, self.sample_range = weight, resolution, sample_range self.im_range = None + silhouette_formula = "normalized_weights * ReLU(heights - Abs(x_values - midpoints))" + variables = [ + "normalized_weights = Vi(1)", + "heights = Vi(1)", + "midpoints = Vi(1)", + "x_values = Vj(1)", + ] + self.silhouette = Genred(silhouette_formula, variables, reduction_op="Sum", axis=0) + def fit(self, X, y=None): """ Fit the Silhouette class on a list of persistence diagrams: if any of the values in **sample_range** is numpy.nan, replace it with the corresponding value computed on the given list of persistence diagrams. @@ -260,16 +269,6 @@ class Silhouette(BaseEstimator, TransformerMixin): Returns: numpy array with shape (number of diagrams) x (**resolution**): output persistence silhouettes. """ - - silhouette_formula = "normalized_weights * ReLU(heights - Abs(x_values - midpoints))" - variables = [ - "normalized_weights = Vi(1)", - "heights = Vi(1)", - "midpoints = Vi(1)", - "x_values = Vj(1)", - ] - silhouette = Genred(silhouette_formula, variables, reduction_op="Sum", axis=0) - silhouettes_list = [] x_values = self.im_range for i, diag in enumerate(X): @@ -278,7 +277,7 @@ class Silhouette(BaseEstimator, TransformerMixin): weights /= np.sum(weights) silhouettes_list.append( - np.sqrt(2) * silhouette(weights[:, None], heights[:, None], + np.sqrt(2) * self.silhouette(weights[:, None], heights[:, None], midpoints[:, None], x_values[:, None])[:, 0] ) -- cgit v1.2.3 From 500de3b265c227eadd81fb860fade4f6c416cfbc Mon Sep 17 00:00:00 2001 From: wreise Date: Thu, 2 Jun 2022 10:49:07 +0200 Subject: Add tests for Landscapes --- src/python/test/test_representations.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'src') diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py index 8be3a1db..0324b026 100755 --- a/src/python/test/test_representations.py +++ b/src/python/test/test_representations.py @@ -197,3 +197,29 @@ def test_silhouette_numeric(): expected_silhouette = np.array([0., 0.5, 0., 0., 0., 0., 0., 0.5, 0.])/np.sqrt(2) output_silhouette = slt(dgm) assert np.all(np.isclose(output_silhouette, expected_silhouette)) + + +def test_landscape_small_persistence_invariance(): + dgm = np.array([[2., 6.], [2., 5.], [3., 7.]]) + small_persistence_pts = np.random.rand(10, 2) + small_persistence_pts[:, 1] += small_persistence_pts[:, 0] + small_persistence_pts += np.min(dgm) + dgm_augmented = np.concatenate([dgm, small_persistence_pts], axis=0) + + lds = Landscape(num_landscapes=2, resolution=5) + lds_dgm, lds_dgm_augmented = lds(dgm), lds(dgm_augmented) + + assert np.all(np.isclose(lds_dgm, lds_dgm_augmented)) + + +def test_landscape_numeric(): + dgm = np.array([[2., 6.], [3., 5.]]) + lds_ref = np.array([ + 0., 0.5, 1., 1.5, 2., 1.5, 1., 0.5, 0., # tent of [2, 6] + 0., 0., 0., 0.5, 1., 0.5, 0., 0., 0. + ]) + lds_ref *= np.sqrt(2) + lds = Landscape(num_landscapes=2, resolution=9, sample_range=[2., 6.]) + lds_dgm = lds(dgm) + print(lds.sample_range, lds.new_resolution, lds_dgm.shape) + assert np.all(np.isclose(lds_dgm, lds_ref)) -- cgit v1.2.3 From 775cd158c869f75a4925ac6f7a94a1847f7d7788 Mon Sep 17 00:00:00 2001 From: wreise Date: Thu, 2 Jun 2022 15:38:20 +0200 Subject: Remove print from test --- src/python/test/test_representations.py | 1 - 1 file changed, 1 deletion(-) (limited to 'src') diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py index 0324b026..5f29740f 100755 --- a/src/python/test/test_representations.py +++ b/src/python/test/test_representations.py @@ -221,5 +221,4 @@ def test_landscape_numeric(): lds_ref *= np.sqrt(2) lds = Landscape(num_landscapes=2, resolution=9, sample_range=[2., 6.]) lds_dgm = lds(dgm) - print(lds.sample_range, lds.new_resolution, lds_dgm.shape) assert np.all(np.isclose(lds_dgm, lds_ref)) -- cgit v1.2.3 From a70ad064ac3e0aee5c9b8084d35b7ce329c6bddc Mon Sep 17 00:00:00 2001 From: wreise Date: Thu, 2 Jun 2022 15:38:38 +0200 Subject: Start landscape optimization --- src/python/gudhi/representations/vector_methods.py | 64 ++++++++-------------- 1 file changed, 22 insertions(+), 42 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index c250c98c..acc62943 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -141,6 +141,16 @@ class Landscape(BaseEstimator, TransformerMixin): self.nan_in_range = np.isnan(np.array(self.sample_range)) self.new_resolution = self.resolution + self.nan_in_range.sum() + landscape_formula = "(-1)*ReLU(heights - Abs(x_values - midpoints))" + variables = [ + "heights = Vi(1)", + "midpoints = Vi(1)", + "x_values = Vj(1)", + ] + from pykeops.numpy import Genred + self.landscape = Genred(landscape_formula, variables, reduction_op="KMin", + axis=0, opt_arg=self.num_landscapes) + def fit(self, X, y=None): """ Fit the Landscape class on a list of persistence diagrams: if any of the values in **sample_range** is numpy.nan, replace it with the corresponding value computed on the given list of persistence diagrams. @@ -162,53 +172,23 @@ class Landscape(BaseEstimator, TransformerMixin): Returns: numpy array with shape (number of diagrams) x (number of samples = **num_landscapes** x **resolution**): output persistence landscapes. """ - num_diag, Xfit = len(X), [] - x_values = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution) - step_x = x_values[1] - x_values[0] - - for i in range(num_diag): - - diagram, num_pts_in_diag = X[i], X[i].shape[0] - ls = np.zeros([self.num_landscapes, self.new_resolution]) - - events = [] - for j in range(self.new_resolution): - events.append([]) - - for j in range(num_pts_in_diag): - [px,py] = diagram[j,:2] - min_idx = np.clip(np.ceil((px - self.sample_range[0]) / step_x).astype(int), 0, self.new_resolution) - mid_idx = np.clip(np.ceil((0.5*(py+px) - self.sample_range[0]) / step_x).astype(int), 0, self.new_resolution) - max_idx = np.clip(np.ceil((py - self.sample_range[0]) / step_x).astype(int), 0, self.new_resolution) - - if min_idx < self.new_resolution and max_idx > 0: - - landscape_value = self.sample_range[0] + min_idx * step_x - px - for k in range(min_idx, mid_idx): - events[k].append(landscape_value) - landscape_value += step_x - - landscape_value = py - self.sample_range[0] - mid_idx * step_x - for k in range(mid_idx, max_idx): - events[k].append(landscape_value) - landscape_value -= step_x - - for j in range(self.new_resolution): - events[j].sort(reverse=True) - for k in range( min(self.num_landscapes, len(events[j])) ): - ls[k,j] = events[j][k] + Xfit = [] + x_values = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution) + for i, diag in enumerate(X): + midpoints, heights = (diag[:, 0] + diag[:, 1]) / 2., (diag[:, 1] - diag[:, 0]) / 2. + tent_functions = np.maximum(heights[None, :] - np.abs(x_values[:, None] - midpoints[None, :]), 0) + tent_functions.partition(diag.shape[0] - self.num_landscapes, axis=1) + landscapes = np.sort(tent_functions[-self.num_landscapes:, :])[::-1].T if self.nan_in_range[0]: - ls = ls[:,1:] + landscapes = landscapes[:,1:] if self.nan_in_range[1]: - ls = ls[:,:-1] - ls = np.sqrt(2)*np.reshape(ls,[1,-1]) - Xfit.append(ls) - - Xfit = np.concatenate(Xfit,0) + landscapes = landscapes[:,:-1] + landscapes = np.sqrt(2)*np.reshape(landscapes, [1, -1]) + Xfit.append(landscapes) - return Xfit + return np.stack(Xfit, axis=0) def __call__(self, diag): """ -- cgit v1.2.3 From 42b18e60e418f4078cd6406dcc202b696798c844 Mon Sep 17 00:00:00 2001 From: wreise Date: Sun, 5 Jun 2022 16:59:02 +0200 Subject: Import pykeops locally in Silhouette and Lanscapes --- src/python/gudhi/representations/vector_methods.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index acc62943..b0843120 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -10,7 +10,6 @@ # - 2021/11 Vincent Rouvreau: factorize _automatic_sample_range import numpy as np -from pykeops.numpy import Genred from sklearn.base import BaseEstimator, TransformerMixin from sklearn.exceptions import NotFittedError from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler @@ -218,6 +217,7 @@ class Silhouette(BaseEstimator, TransformerMixin): self.weight, self.resolution, self.sample_range = weight, resolution, sample_range self.im_range = None + from pykeops.numpy import Genred silhouette_formula = "normalized_weights * ReLU(heights - Abs(x_values - midpoints))" variables = [ "normalized_weights = Vi(1)", -- cgit v1.2.3 From 60e57f9c86a7aae67c2931200066aba059ec2721 Mon Sep 17 00:00:00 2001 From: wreise Date: Fri, 5 Aug 2022 22:19:30 +0200 Subject: Test the numpy version --- src/python/gudhi/representations/vector_methods.py | 42 ++++++---------------- 1 file changed, 11 insertions(+), 31 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index b0843120..7f311b3b 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -140,16 +140,6 @@ class Landscape(BaseEstimator, TransformerMixin): self.nan_in_range = np.isnan(np.array(self.sample_range)) self.new_resolution = self.resolution + self.nan_in_range.sum() - landscape_formula = "(-1)*ReLU(heights - Abs(x_values - midpoints))" - variables = [ - "heights = Vi(1)", - "midpoints = Vi(1)", - "x_values = Vj(1)", - ] - from pykeops.numpy import Genred - self.landscape = Genred(landscape_formula, variables, reduction_op="KMin", - axis=0, opt_arg=self.num_landscapes) - def fit(self, X, y=None): """ Fit the Landscape class on a list of persistence diagrams: if any of the values in **sample_range** is numpy.nan, replace it with the corresponding value computed on the given list of persistence diagrams. @@ -178,13 +168,13 @@ class Landscape(BaseEstimator, TransformerMixin): midpoints, heights = (diag[:, 0] + diag[:, 1]) / 2., (diag[:, 1] - diag[:, 0]) / 2. tent_functions = np.maximum(heights[None, :] - np.abs(x_values[:, None] - midpoints[None, :]), 0) tent_functions.partition(diag.shape[0] - self.num_landscapes, axis=1) - landscapes = np.sort(tent_functions[-self.num_landscapes:, :])[::-1].T + landscapes = np.sort(tent_functions, axis=1)[:, -self.num_landscapes:][:, ::-1].T if self.nan_in_range[0]: - landscapes = landscapes[:,1:] + landscapes = landscapes[:, 1:] if self.nan_in_range[1]: - landscapes = landscapes[:,:-1] - landscapes = np.sqrt(2)*np.reshape(landscapes, [1, -1]) + landscapes = landscapes[:, :-1] + landscapes = np.sqrt(2) * np.ravel(landscapes) Xfit.append(landscapes) return np.stack(Xfit, axis=0) @@ -217,16 +207,6 @@ class Silhouette(BaseEstimator, TransformerMixin): self.weight, self.resolution, self.sample_range = weight, resolution, sample_range self.im_range = None - from pykeops.numpy import Genred - silhouette_formula = "normalized_weights * ReLU(heights - Abs(x_values - midpoints))" - variables = [ - "normalized_weights = Vi(1)", - "heights = Vi(1)", - "midpoints = Vi(1)", - "x_values = Vj(1)", - ] - self.silhouette = Genred(silhouette_formula, variables, reduction_op="Sum", axis=0) - def fit(self, X, y=None): """ Fit the Silhouette class on a list of persistence diagrams: if any of the values in **sample_range** is numpy.nan, replace it with the corresponding value computed on the given list of persistence diagrams. @@ -249,19 +229,19 @@ class Silhouette(BaseEstimator, TransformerMixin): Returns: numpy array with shape (number of diagrams) x (**resolution**): output persistence silhouettes. """ - silhouettes_list = [] + Xfit = [] x_values = self.im_range + for i, diag in enumerate(X): midpoints, heights = (diag[:, 0] + diag[:, 1]) / 2., (diag[:, 1] - diag[:, 0]) / 2. weights = np.array([self.weight(point) for point in diag]) - weights /= np.sum(weights) + total_weight = np.sum(weights) - silhouettes_list.append( - np.sqrt(2) * self.silhouette(weights[:, None], heights[:, None], - midpoints[:, None], x_values[:, None])[:, 0] - ) + tent_functions = np.maximum(heights[None, :] - np.abs(x_values[:, None] - midpoints[None, :]), 0) + silhouette = np.sum(weights[None, :] / total_weight * tent_functions, axis=1) + Xfit.append(silhouette * np.sqrt(2)) - return np.stack(silhouettes_list, axis=0) + return np.stack(Xfit, axis=0) def __call__(self, diag): """ -- cgit v1.2.3 From 565cc1fb0c81b627fa6bec0e9ca76571663a7834 Mon Sep 17 00:00:00 2001 From: wreise Date: Fri, 7 Oct 2022 17:59:51 +0200 Subject: Test: resolution when nan in the range --- src/python/test/test_representations.py | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src') diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py index 2ca72d07..823e8620 100755 --- a/src/python/test/test_representations.py +++ b/src/python/test/test_representations.py @@ -241,3 +241,11 @@ def test_landscape_numeric(): lds = Landscape(num_landscapes=2, resolution=9, sample_range=[2., 6.]) lds_dgm = lds(dgm) assert np.all(np.isclose(lds_dgm, lds_ref)) + + +def test_landscape_nan_range(): + dgm = np.array([[2., 6.], [3., 5.]]) + lds = Landscape(num_landscapes=2, resolution=9, sample_range=[np.nan, 6.]) + lds_dgm = lds(dgm) + assert (lds.sample_range[0] == 2) & (lds.sample_range[1] == 6) + assert lds.new_resolution == 10 -- cgit v1.2.3 From d9dfffdb580ab865a829fce851779f33fa47e4f7 Mon Sep 17 00:00:00 2001 From: wreise Date: Fri, 7 Oct 2022 18:13:26 +0200 Subject: Add triming of the range; Marcs' comments --- src/python/gudhi/representations/vector_methods.py | 31 ++++++++++++++-------- src/python/test/test_representations.py | 2 +- 2 files changed, 21 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 8c8b46db..8114885e 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -123,6 +123,15 @@ def _automatic_sample_range(sample_range, X, y): pass return sample_range + +def trim_on_edges(x, are_endpoints_nan): + if are_endpoints_nan[0]: + x = x[1:] + if are_endpoints_nan[1]: + x = x[:-1] + return x + + class Landscape(BaseEstimator, TransformerMixin): """ This is a class for computing persistence landscapes from a list of persistence diagrams. A persistence landscape is a collection of 1D piecewise-linear functions computed from the rank function associated to the persistence diagram. These piecewise-linear functions are then sampled evenly on a given range and the corresponding vectors of samples are concatenated and returned. See http://jmlr.org/papers/v16/bubenik15a.html for more details. @@ -149,6 +158,8 @@ class Landscape(BaseEstimator, TransformerMixin): y (n x 1 array): persistence diagram labels (unused). """ self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) + self.im_range = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution) + self.im_range = trim_on_edges(self.im_range, self.nan_in_range) return self def transform(self, X): @@ -163,17 +174,13 @@ class Landscape(BaseEstimator, TransformerMixin): """ Xfit = [] - x_values = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution) + x_values = self.im_range for i, diag in enumerate(X): midpoints, heights = (diag[:, 0] + diag[:, 1]) / 2., (diag[:, 1] - diag[:, 0]) / 2. tent_functions = np.maximum(heights[None, :] - np.abs(x_values[:, None] - midpoints[None, :]), 0) tent_functions.partition(diag.shape[0] - self.num_landscapes, axis=1) - landscapes = np.sort(tent_functions, axis=1)[:, -self.num_landscapes:][:, ::-1].T + landscapes = tent_functions[:, -self.num_landscapes:][:, ::-1].T - if self.nan_in_range[0]: - landscapes = landscapes[:, 1:] - if self.nan_in_range[1]: - landscapes = landscapes[:, :-1] landscapes = np.sqrt(2) * np.ravel(landscapes) Xfit.append(landscapes) @@ -189,7 +196,7 @@ class Landscape(BaseEstimator, TransformerMixin): Returns: numpy array with shape (number of samples = **num_landscapes** x **resolution**): output persistence landscape. """ - return self.fit_transform([diag])[0,:] + return self.fit_transform([diag])[0, :] class Silhouette(BaseEstimator, TransformerMixin): """ @@ -205,7 +212,8 @@ class Silhouette(BaseEstimator, TransformerMixin): sample_range ([double, double]): minimum and maximum for the weighted average domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method. """ self.weight, self.resolution, self.sample_range = weight, resolution, sample_range - self.im_range = None + self.nan_in_range = np.isnan(np.array(self.sample_range)) + self.new_resolution = self.resolution + self.nan_in_range.sum() def fit(self, X, y=None): """ @@ -216,7 +224,8 @@ class Silhouette(BaseEstimator, TransformerMixin): y (n x 1 array): persistence diagram labels (unused). """ self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) - self.im_range = np.linspace(self.sample_range[0], self.sample_range[1], self.resolution) + self.im_range = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution) + self.im_range = trim_on_edges(self.im_range, self.nan_in_range) return self def transform(self, X): @@ -232,9 +241,9 @@ class Silhouette(BaseEstimator, TransformerMixin): Xfit = [] x_values = self.im_range - for i, diag in enumerate(X): + for diag in X: midpoints, heights = (diag[:, 0] + diag[:, 1]) / 2., (diag[:, 1] - diag[:, 0]) / 2. - weights = np.array([self.weight(point) for point in diag]) + weights = np.array([self.weight(pt) for pt in diag]) total_weight = np.sum(weights) tent_functions = np.maximum(heights[None, :] - np.abs(x_values[:, None] - midpoints[None, :]), 0) diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py index 823e8620..948d7804 100755 --- a/src/python/test/test_representations.py +++ b/src/python/test/test_representations.py @@ -209,7 +209,7 @@ def test_silhouette_multiplication_invariance(): def test_silhouette_numeric(): dgm = np.array([[2., 3.], [5., 6.]]) - slt = Silhouette(resolution=9, weight=pow(1)) + slt = Silhouette(resolution=9, weight=pow(1), sample_range=[2., 6.]) #slt.fit([dgm]) # x_values = array([2., 2.5, 3., 3.5, 4., 4.5, 5., 5.5, 6.]) -- cgit v1.2.3 From 059ff0c42a069c744ed121c948bc3d39b5cc7f10 Mon Sep 17 00:00:00 2001 From: wreise Date: Fri, 7 Oct 2022 18:24:32 +0200 Subject: Remove enumerate --- src/python/gudhi/representations/vector_methods.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 8114885e..5ea4ea48 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -175,7 +175,7 @@ class Landscape(BaseEstimator, TransformerMixin): Xfit = [] x_values = self.im_range - for i, diag in enumerate(X): + for diag in X: midpoints, heights = (diag[:, 0] + diag[:, 1]) / 2., (diag[:, 1] - diag[:, 0]) / 2. tent_functions = np.maximum(heights[None, :] - np.abs(x_values[:, None] - midpoints[None, :]), 0) tent_functions.partition(diag.shape[0] - self.num_landscapes, axis=1) -- cgit v1.2.3 From 4aac9e03c400bd43f237504cf4ff9d25f041e473 Mon Sep 17 00:00:00 2001 From: wreise Date: Wed, 12 Oct 2022 15:55:22 +0200 Subject: Clean argpartition --- src/python/gudhi/representations/vector_methods.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 5ea4ea48..3a91eccd 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -124,7 +124,7 @@ def _automatic_sample_range(sample_range, X, y): return sample_range -def trim_on_edges(x, are_endpoints_nan): +def _trim_on_edges(x, are_endpoints_nan): if are_endpoints_nan[0]: x = x[1:] if are_endpoints_nan[1]: @@ -159,7 +159,7 @@ class Landscape(BaseEstimator, TransformerMixin): """ self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) self.im_range = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution) - self.im_range = trim_on_edges(self.im_range, self.nan_in_range) + self.im_range = _trim_on_edges(self.im_range, self.nan_in_range) return self def transform(self, X): @@ -178,9 +178,17 @@ class Landscape(BaseEstimator, TransformerMixin): for diag in X: midpoints, heights = (diag[:, 0] + diag[:, 1]) / 2., (diag[:, 1] - diag[:, 0]) / 2. tent_functions = np.maximum(heights[None, :] - np.abs(x_values[:, None] - midpoints[None, :]), 0) - tent_functions.partition(diag.shape[0] - self.num_landscapes, axis=1) - landscapes = tent_functions[:, -self.num_landscapes:][:, ::-1].T - + n_points = diag.shape[0] + # Get indices of largest elements: can't take more than n_points - 1 (the last ones are in the right position) + argpartition = np.argpartition(-tent_functions, min(self.num_landscapes, n_points-1), axis=1) + landscapes = np.take_along_axis(tent_functions, argpartition, axis=1) + landscapes = landscapes[:, :min(self.num_landscapes, n_points)].T + + # Complete the array with zeros to get the right number of landscapes + if self.num_landscapes > n_points: + landscapes = np.concatenate([ + landscapes, np.zeros((self.num_landscapes-n_points, *landscapes.shape[1:])) + ], axis=0) landscapes = np.sqrt(2) * np.ravel(landscapes) Xfit.append(landscapes) @@ -225,7 +233,7 @@ class Silhouette(BaseEstimator, TransformerMixin): """ self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) self.im_range = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution) - self.im_range = trim_on_edges(self.im_range, self.nan_in_range) + self.im_range = _trim_on_edges(self.im_range, self.nan_in_range) return self def transform(self, X): -- cgit v1.2.3 From 80edbbaccc344abfa18ebc48a8493f747a775476 Mon Sep 17 00:00:00 2001 From: wreise Date: Sat, 15 Oct 2022 16:54:04 +0200 Subject: Simplify sorting in landscapes --- src/python/test/test_representations.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py index 948d7804..58caab21 100755 --- a/src/python/test/test_representations.py +++ b/src/python/test/test_representations.py @@ -235,10 +235,12 @@ def test_landscape_numeric(): dgm = np.array([[2., 6.], [3., 5.]]) lds_ref = np.array([ 0., 0.5, 1., 1.5, 2., 1.5, 1., 0.5, 0., # tent of [2, 6] - 0., 0., 0., 0.5, 1., 0.5, 0., 0., 0. + 0., 0., 0., 0.5, 1., 0.5, 0., 0., 0., + 0., 0., 0., 0., 0., 0., 0., 0., 0., + 0., 0., 0., 0., 0., 0., 0., 0., 0., ]) lds_ref *= np.sqrt(2) - lds = Landscape(num_landscapes=2, resolution=9, sample_range=[2., 6.]) + lds = Landscape(num_landscapes=4, resolution=9, sample_range=[2., 6.]) lds_dgm = lds(dgm) assert np.all(np.isclose(lds_dgm, lds_ref)) -- cgit v1.2.3 From 74617f0673aa13bce47833c366321a8838a7d123 Mon Sep 17 00:00:00 2001 From: wreise Date: Sat, 15 Oct 2022 17:00:25 +0200 Subject: Remove old lines --- src/python/gudhi/representations/vector_methods.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 3a91eccd..6267e077 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -179,10 +179,8 @@ class Landscape(BaseEstimator, TransformerMixin): midpoints, heights = (diag[:, 0] + diag[:, 1]) / 2., (diag[:, 1] - diag[:, 0]) / 2. tent_functions = np.maximum(heights[None, :] - np.abs(x_values[:, None] - midpoints[None, :]), 0) n_points = diag.shape[0] - # Get indices of largest elements: can't take more than n_points - 1 (the last ones are in the right position) - argpartition = np.argpartition(-tent_functions, min(self.num_landscapes, n_points-1), axis=1) - landscapes = np.take_along_axis(tent_functions, argpartition, axis=1) - landscapes = landscapes[:, :min(self.num_landscapes, n_points)].T + tent_functions.partition(n_points-self.num_landscapes, axis=1) + landscapes = np.sort(tent_functions[:, -self.num_landscapes:], axis=1)[:, ::-1].T # Complete the array with zeros to get the right number of landscapes if self.num_landscapes > n_points: -- cgit v1.2.3 From cd7dea8627f4b1c624e88d5ff28b32d1602f5e39 Mon Sep 17 00:00:00 2001 From: wreise Date: Sat, 15 Oct 2022 18:45:42 +0200 Subject: Treat the case when there are less points than landscape layers --- src/python/gudhi/representations/vector_methods.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 6267e077..a169aee8 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -179,14 +179,15 @@ class Landscape(BaseEstimator, TransformerMixin): midpoints, heights = (diag[:, 0] + diag[:, 1]) / 2., (diag[:, 1] - diag[:, 0]) / 2. tent_functions = np.maximum(heights[None, :] - np.abs(x_values[:, None] - midpoints[None, :]), 0) n_points = diag.shape[0] - tent_functions.partition(n_points-self.num_landscapes, axis=1) - landscapes = np.sort(tent_functions[:, -self.num_landscapes:], axis=1)[:, ::-1].T - # Complete the array with zeros to get the right number of landscapes if self.num_landscapes > n_points: - landscapes = np.concatenate([ - landscapes, np.zeros((self.num_landscapes-n_points, *landscapes.shape[1:])) - ], axis=0) + tent_functions = np.concatenate( + [tent_functions, np.zeros((tent_functions.shape[0], self.num_landscapes-n_points))], + axis=1 + ) + tent_functions.partition(tent_functions.shape[1]-self.num_landscapes, axis=1) + landscapes = np.sort(tent_functions[:, -self.num_landscapes:], axis=1)[:, ::-1].T + landscapes = np.sqrt(2) * np.ravel(landscapes) Xfit.append(landscapes) -- cgit v1.2.3