From 0afc650917ddf9fc4cf95fd86e0b6408f64a465d Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Mon, 11 Jan 2021 11:29:20 +0100 Subject: Remove sphinx doc test for atol as points order can be inverted and add it in a UT but sorted --- src/python/gudhi/representations/vector_methods.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index cdcb1fde..d4449e7d 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -606,16 +606,16 @@ class Atol(BaseEstimator, TransformerMixin): >>> c = np.array([[3, 2, -1], [1, 2, -1]]) >>> atol_vectoriser = Atol(quantiser=KMeans(n_clusters=2, random_state=202006)) >>> atol_vectoriser.fit(X=[a, b, c]).centers - array([[ 2. , 0.66666667, 3.33333333], - [ 2.6 , 2.8 , -0.4 ]]) + >>> # array([[ 2. , 0.66666667, 3.33333333], + >>> # [ 2.6 , 2.8 , -0.4 ]]) >>> atol_vectoriser(a) - array([1.18168665, 0.42375966]) + >>> # array([1.18168665, 0.42375966]) >>> atol_vectoriser(c) - array([0.02062512, 1.25157463]) + >>> # array([0.02062512, 1.25157463]) >>> atol_vectoriser.transform(X=[a, b, c]) - array([[1.18168665, 0.42375966], - [0.29861028, 1.06330156], - [0.02062512, 1.25157463]]) + >>> # array([[1.18168665, 0.42375966], + >>> # [0.29861028, 1.06330156], + >>> # [0.02062512, 1.25157463]]) """ def __init__(self, quantiser, weighting_method="cloud", contrast="gaussian"): """ -- cgit v1.2.3 From 2a29df7cd54e9689e93bab90e3f64c84e8e8790f Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Mon, 11 Jan 2021 13:59:01 +0100 Subject: skip doctest (but run them) --- src/python/gudhi/representations/vector_methods.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index d4449e7d..5ec2abd0 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -605,14 +605,14 @@ class Atol(BaseEstimator, TransformerMixin): >>> b = np.array([[4, 2, 0], [4, 4, 0], [4, 0, 2]]) >>> c = np.array([[3, 2, -1], [1, 2, -1]]) >>> atol_vectoriser = Atol(quantiser=KMeans(n_clusters=2, random_state=202006)) - >>> atol_vectoriser.fit(X=[a, b, c]).centers + >>> atol_vectoriser.fit(X=[a, b, c]).centers #doctest: +SKIP >>> # array([[ 2. , 0.66666667, 3.33333333], >>> # [ 2.6 , 2.8 , -0.4 ]]) >>> atol_vectoriser(a) - >>> # array([1.18168665, 0.42375966]) + >>> # array([1.18168665, 0.42375966]) #doctest: +SKIP >>> atol_vectoriser(c) - >>> # array([0.02062512, 1.25157463]) - >>> atol_vectoriser.transform(X=[a, b, c]) + >>> # array([0.02062512, 1.25157463]) #doctest: +SKIP + >>> atol_vectoriser.transform(X=[a, b, c]) #doctest: +SKIP >>> # array([[1.18168665, 0.42375966], >>> # [0.29861028, 1.06330156], >>> # [0.02062512, 1.25157463]]) -- cgit v1.2.3 From 60907b0104a2807667f175d9a8a328fd3f7f4ec8 Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Mon, 11 Jan 2021 16:25:18 +0100 Subject: Ignore doctest for atol doc. Rewrite unitary test for atol doc. To be synchronized --- src/python/gudhi/representations/vector_methods.py | 9 ++++---- src/python/test/test_representations.py | 26 ++++++++++++++-------- 2 files changed, 22 insertions(+), 13 deletions(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 5ec2abd0..84bc99a2 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -605,18 +605,19 @@ class Atol(BaseEstimator, TransformerMixin): >>> b = np.array([[4, 2, 0], [4, 4, 0], [4, 0, 2]]) >>> c = np.array([[3, 2, -1], [1, 2, -1]]) >>> atol_vectoriser = Atol(quantiser=KMeans(n_clusters=2, random_state=202006)) - >>> atol_vectoriser.fit(X=[a, b, c]).centers #doctest: +SKIP + >>> atol_vectoriser.fit(X=[a, b, c]).centers # doctest: +SKIP >>> # array([[ 2. , 0.66666667, 3.33333333], >>> # [ 2.6 , 2.8 , -0.4 ]]) >>> atol_vectoriser(a) - >>> # array([1.18168665, 0.42375966]) #doctest: +SKIP + >>> # array([1.18168665, 0.42375966]) # doctest: +SKIP >>> atol_vectoriser(c) - >>> # array([0.02062512, 1.25157463]) #doctest: +SKIP - >>> atol_vectoriser.transform(X=[a, b, c]) #doctest: +SKIP + >>> # array([0.02062512, 1.25157463]) # doctest: +SKIP + >>> atol_vectoriser.transform(X=[a, b, c]) # doctest: +SKIP >>> # array([[1.18168665, 0.42375966], >>> # [0.29861028, 1.06330156], >>> # [0.02062512, 1.25157463]]) """ + # Note the example above must be up to date with the one in tests called test_atol_doc def __init__(self, quantiser, weighting_method="cloud", contrast="gaussian"): """ Constructor for the Atol measure vectorisation class. diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py index 1c8f8cdb..cda1a15b 100755 --- a/src/python/test/test_representations.py +++ b/src/python/test/test_representations.py @@ -47,21 +47,29 @@ def test_multiple(): # Test sorted values as points order can be inverted, and sorted test is not documentation-friendly +# Note the test below must be up to date with the Atol class documentation def test_atol_doc(): a = np.array([[1, 2, 4], [1, 4, 0], [1, 0, 4]]) b = np.array([[4, 2, 0], [4, 4, 0], [4, 0, 2]]) c = np.array([[3, 2, -1], [1, 2, -1]]) atol_vectoriser = Atol(quantiser=KMeans(n_clusters=2, random_state=202006)) - assert np.sort(atol_vectoriser.fit(X=[a, b, c]).centers, axis=0) == \ - pytest.approx(np.array([[2. , 0.66666667, -0.4], \ - [2.6, 2.8 , 3.33333333]])) - assert np.sort(atol_vectoriser(a)) == pytest.approx(np.array([0.42375966, 1.18168665])) - assert np.sort(atol_vectoriser(c)) == pytest.approx(np.array([0.02062512, 1.25157463])) - assert np.sort(atol_vectoriser.transform(X=[a, b, c]), axis=0) == \ - pytest.approx(np.array([[0.02062512, 0.42375966], \ - [0.29861028, 1.06330156], \ - [1.18168665, 1.25157463]])) + # Atol will do + # X = np.concatenate([a,b,c]) + # kmeans = KMeans(n_clusters=2, random_state=202006).fit(X) + # kmeans.labels_ will be : array([1, 0, 1, 0, 0, 1, 0, 0]) + first_cluster = np.asarray([a[0], a[2], b[2]]) + second_cluster = np.asarray([a[1], b[0], b[2], c[0], c[1]]) + + # Check the center of the first_cluster and second_cluster are in Atol centers + centers = atol_vectoriser.fit(X=[a, b, c]).centers + np.isclose(centers, first_cluster.mean(axis=0)).all(1).any() + np.isclose(centers, second_cluster.mean(axis=0)).all(1).any() + + vectorization = atol_vectoriser.transform(X=[a, b, c]) + assert np.allclose(vectorization[0], atol_vectoriser(a)) + assert np.allclose(vectorization[1], atol_vectoriser(b)) + assert np.allclose(vectorization[2], atol_vectoriser(c)) def test_dummy_atol(): -- cgit v1.2.3 From ec06a9b9ae0a9ff1897249dcbc2b497764f54aaf Mon Sep 17 00:00:00 2001 From: Vincent Rouvreau Date: Mon, 18 Oct 2021 17:01:02 +0200 Subject: First part of the fix --- src/python/gudhi/cubical_complex.pyx | 7 ++- src/python/gudhi/periodic_cubical_complex.pyx | 7 ++- src/python/gudhi/representations/vector_methods.py | 60 ++++++++++++++-------- src/python/gudhi/simplex_tree.pyx | 26 ++++++---- src/python/test/test_cubical_complex.py | 25 +++++++++ src/python/test/test_representations.py | 37 +++++++++++++ 6 files changed, 129 insertions(+), 33 deletions(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/cubical_complex.pyx b/src/python/gudhi/cubical_complex.pyx index 97c69a2d..04569bd8 100644 --- a/src/python/gudhi/cubical_complex.pyx +++ b/src/python/gudhi/cubical_complex.pyx @@ -281,4 +281,9 @@ cdef class CubicalComplex: launched first. """ assert self.pcohptr != NULL, "compute_persistence() must be called before persistence_intervals_in_dimension()" - return np.array(self.pcohptr.intervals_in_dimension(dimension)) + piid = np.array(self.pcohptr.intervals_in_dimension(dimension)) + # Workaround https://github.com/GUDHI/gudhi-devel/issues/507 + if piid.shape[0] == 0: + return np.empty(shape = [0, 2]) + else: + return piid diff --git a/src/python/gudhi/periodic_cubical_complex.pyx b/src/python/gudhi/periodic_cubical_complex.pyx index ef1d3080..bd91ccde 100644 --- a/src/python/gudhi/periodic_cubical_complex.pyx +++ b/src/python/gudhi/periodic_cubical_complex.pyx @@ -280,4 +280,9 @@ cdef class PeriodicCubicalComplex: launched first. """ assert self.pcohptr != NULL, "compute_persistence() must be called before persistence_intervals_in_dimension()" - return np.array(self.pcohptr.intervals_in_dimension(dimension)) + piid = np.array(self.pcohptr.intervals_in_dimension(dimension)) + # Workaround https://github.com/GUDHI/gudhi-devel/issues/507 + if piid.shape[0] == 0: + return np.empty(shape = [0, 2]) + else: + return piid diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 84bc99a2..711c32a7 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -44,11 +44,15 @@ class PersistenceImage(BaseEstimator, TransformerMixin): X (list of n x 2 numpy arrays): input persistence diagrams. y (n x 1 array): persistence diagram labels (unused). """ - if np.isnan(np.array(self.im_range)).any(): - new_X = BirthPersistenceTransform().fit_transform(X) - pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(new_X,y) - [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] - self.im_range = np.where(np.isnan(np.array(self.im_range)), np.array([mx, Mx, my, My]), np.array(self.im_range)) + try: + if np.isnan(np.array(self.im_range)).any(): + new_X = BirthPersistenceTransform().fit_transform(X) + pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(new_X,y) + [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] + self.im_range = np.where(np.isnan(np.array(self.im_range)), np.array([mx, Mx, my, My]), np.array(self.im_range)) + except ValueError: + # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 + pass return self def transform(self, X): @@ -120,9 +124,13 @@ class Landscape(BaseEstimator, TransformerMixin): y (n x 1 array): persistence diagram labels (unused). """ if self.nan_in_range.any(): - pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) - [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] - self.sample_range = np.where(self.nan_in_range, np.array([mx, My]), np.array(self.sample_range)) + try: + pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) + [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] + self.sample_range = np.where(self.nan_in_range, np.array([mx, My]), np.array(self.sample_range)) + except ValueError: + # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 + pass return self def transform(self, X): @@ -218,10 +226,14 @@ class Silhouette(BaseEstimator, TransformerMixin): X (list of n x 2 numpy arrays): input persistence diagrams. y (n x 1 array): persistence diagram labels (unused). """ - if np.isnan(np.array(self.sample_range)).any(): - pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) - [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] - self.sample_range = np.where(np.isnan(np.array(self.sample_range)), np.array([mx, My]), np.array(self.sample_range)) + try: + if np.isnan(np.array(self.sample_range)).any(): + pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) + [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] + self.sample_range = np.where(np.isnan(np.array(self.sample_range)), np.array([mx, My]), np.array(self.sample_range)) + except ValueError: + # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 + pass return self def transform(self, X): @@ -307,10 +319,14 @@ class BettiCurve(BaseEstimator, TransformerMixin): X (list of n x 2 numpy arrays): input persistence diagrams. y (n x 1 array): persistence diagram labels (unused). """ - if np.isnan(np.array(self.sample_range)).any(): - pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) - [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] - self.sample_range = np.where(np.isnan(np.array(self.sample_range)), np.array([mx, My]), np.array(self.sample_range)) + try: + if np.isnan(np.array(self.sample_range)).any(): + pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) + [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] + self.sample_range = np.where(np.isnan(np.array(self.sample_range)), np.array([mx, My]), np.array(self.sample_range)) + except ValueError: + # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 + pass return self def transform(self, X): @@ -374,10 +390,14 @@ class Entropy(BaseEstimator, TransformerMixin): X (list of n x 2 numpy arrays): input persistence diagrams. y (n x 1 array): persistence diagram labels (unused). """ - if np.isnan(np.array(self.sample_range)).any(): - pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) - [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] - self.sample_range = np.where(np.isnan(np.array(self.sample_range)), np.array([mx, My]), np.array(self.sample_range)) + try: + if np.isnan(np.array(self.sample_range)).any(): + pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) + [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] + self.sample_range = np.where(np.isnan(np.array(self.sample_range)), np.array([mx, My]), np.array(self.sample_range)) + except ValueError: + # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 + pass return self def transform(self, X): diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index 9c51cb46..e9bac036 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -9,8 +9,7 @@ from cython.operator import dereference, preincrement from libc.stdint cimport intptr_t -import numpy -from numpy import array as np_array +import numpy as np cimport gudhi.simplex_tree __author__ = "Vincent Rouvreau" @@ -542,7 +541,12 @@ cdef class SimplexTree: function to be launched first. """ assert self.pcohptr != NULL, "compute_persistence() must be called before persistence_intervals_in_dimension()" - return np_array(self.pcohptr.intervals_in_dimension(dimension)) + piid = np.array(self.pcohptr.intervals_in_dimension(dimension)) + # Workaround https://github.com/GUDHI/gudhi-devel/issues/507 + if piid.shape[0] == 0: + return np.empty(shape = [0, 2]) + else: + return piid def persistence_pairs(self): """This function returns a list of persistence birth and death simplices pairs. @@ -583,8 +587,8 @@ cdef class SimplexTree: """ assert self.pcohptr != NULL, "lower_star_persistence_generators() requires that persistence() be called first." gen = self.pcohptr.lower_star_generators() - normal = [np_array(d).reshape(-1,2) for d in gen.first] - infinite = [np_array(d) for d in gen.second] + normal = [np.array(d).reshape(-1,2) for d in gen.first] + infinite = [np.array(d) for d in gen.second] return (normal, infinite) def flag_persistence_generators(self): @@ -602,19 +606,19 @@ cdef class SimplexTree: assert self.pcohptr != NULL, "flag_persistence_generators() requires that persistence() be called first." gen = self.pcohptr.flag_generators() if len(gen.first) == 0: - normal0 = numpy.empty((0,3)) + normal0 = np.empty((0,3)) normals = [] else: l = iter(gen.first) - normal0 = np_array(next(l)).reshape(-1,3) - normals = [np_array(d).reshape(-1,4) for d in l] + normal0 = np.array(next(l)).reshape(-1,3) + normals = [np.array(d).reshape(-1,4) for d in l] if len(gen.second) == 0: - infinite0 = numpy.empty(0) + infinite0 = np.empty(0) infinites = [] else: l = iter(gen.second) - infinite0 = np_array(next(l)) - infinites = [np_array(d).reshape(-1,2) for d in l] + infinite0 = np.array(next(l)) + infinites = [np.array(d).reshape(-1,2) for d in l] return (normal0, normals, infinite0, infinites) def collapse_edges(self, nb_iterations = 1): diff --git a/src/python/test/test_cubical_complex.py b/src/python/test/test_cubical_complex.py index d0e4e9e8..29d559b3 100755 --- a/src/python/test/test_cubical_complex.py +++ b/src/python/test/test_cubical_complex.py @@ -174,3 +174,28 @@ def test_periodic_cofaces_of_persistence_pairs_when_pd_has_no_paired_birth_and_d assert np.array_equal(pairs[1][0], np.array([0])) assert np.array_equal(pairs[1][1], np.array([0, 1])) assert np.array_equal(pairs[1][2], np.array([1])) + +def test_cubical_persistence_intervals_in_dimension(): + cub = CubicalComplex( + dimensions=[3, 3], + top_dimensional_cells=[1, 2, 3, 4, 5, 6, 7, 8, 9], + ) + cub.compute_persistence() + H0 = cub.persistence_intervals_in_dimension(0) + assert np.array_equal(H0, np.array([[ 1., float("inf")]])) + assert cub.persistence_intervals_in_dimension(1).shape == (0, 2) + +def test_periodic_cubical_persistence_intervals_in_dimension(): + cub = PeriodicCubicalComplex( + dimensions=[3, 3], + top_dimensional_cells=[1, 2, 3, 4, 5, 6, 7, 8, 9], + periodic_dimensions = [True, True] + ) + cub.compute_persistence() + H0 = cub.persistence_intervals_in_dimension(0) + assert np.array_equal(H0, np.array([[ 1., float("inf")]])) + H1 = cub.persistence_intervals_in_dimension(1) + assert np.array_equal(H1, np.array([[ 3., float("inf")], [ 7., float("inf")]])) + H2 = cub.persistence_intervals_in_dimension(2) + assert np.array_equal(H2, np.array([[ 9., float("inf")]])) + assert cub.persistence_intervals_in_dimension(3).shape == (0, 2) diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py index cda1a15b..c1f4df12 100755 --- a/src/python/test/test_representations.py +++ b/src/python/test/test_representations.py @@ -6,6 +6,12 @@ import pytest from sklearn.cluster import KMeans +from gudhi.representations import (DiagramSelector, Clamping, Landscape, Silhouette, BettiCurve, ComplexPolynomial,\ + TopologicalVector, DiagramScaler, BirthPersistenceTransform,\ + PersistenceImage, PersistenceWeightedGaussianKernel, Entropy, \ + PersistenceScaleSpaceKernel, SlicedWassersteinDistance,\ + SlicedWassersteinKernel, PersistenceFisherKernel, WassersteinDistance) + def test_representations_examples(): # Disable graphics for testing purposes @@ -98,3 +104,34 @@ def test_infinity(): assert c[1] == 0 assert c[7] == 3 assert c[9] == 2 + +def pow(n): + return lambda x: np.power(x[1]-x[0],n) + +def test_vectorization_empty_diagrams(): + empty_diag = np.empty(shape = [0, 2]) + Landscape(resolution=1000)(empty_diag) + Silhouette(resolution=1000, weight=pow(2))(empty_diag) + BettiCurve(resolution=1000)(empty_diag) + ComplexPolynomial(threshold=-1, polynomial_type="T")(empty_diag) + TopologicalVector(threshold=-1)(empty_diag) + PersistenceImage(bandwidth=.1, weight=lambda x: x[1], im_range=[0,1,0,1], resolution=[100,100])(empty_diag) + #Entropy(mode="scalar")(empty_diag) + #Entropy(mode="vector", normalized=False)(empty_diag) + +#def arctan(C,p): +# return lambda x: C*np.arctan(np.power(x[1], p)) +# +#def test_kernel_empty_diagrams(): +# empty_diag = np.empty(shape = [0, 2]) +# PersistenceWeightedGaussianKernel(bandwidth=1., kernel_approx=None, weight=arctan(1.,1.))(empty_diag, empty_diag) +# PersistenceWeightedGaussianKernel(kernel_approx=RBFSampler(gamma=1./2, n_components=100000).fit(np.ones([1,2])), weight=arctan(1.,1.))(empty_diag, empty_diag) +# PersistenceScaleSpaceKernel(bandwidth=1.)(empty_diag, empty_diag) +# PersistenceScaleSpaceKernel(kernel_approx=RBFSampler(gamma=1./2, n_components=100000).fit(np.ones([1,2])))(empty_diag, empty_diag) +# SlicedWassersteinDistance(num_directions=100)(empty_diag, empty_diag) +# SlicedWassersteinKernel(num_directions=100, bandwidth=1.)(empty_diag, empty_diag) +# WassersteinDistance(order=2, internal_p=2, mode="pot")(empty_diag, empty_diag) +# WassersteinDistance(order=2, internal_p=2, mode="hera", delta=0.0001)(empty_diag, empty_diag) +# BottleneckDistance(epsilon=.001)(empty_diag, empty_diag) +# PersistenceFisherKernel(bandwidth_fisher=1., bandwidth=1.)(empty_diag, empty_diag) +# PersistenceFisherKernel(bandwidth_fisher=1., bandwidth=1., kernel_approx=RBFSampler(gamma=1./2, n_components=100000).fit(np.ones([1,2])))(empty_diag, empty_diag) -- cgit v1.2.3 From e4122147ee4643dbca6c65efebf83eb2adad6aec Mon Sep 17 00:00:00 2001 From: Vincent Rouvreau Date: Wed, 20 Oct 2021 11:31:00 +0200 Subject: Make Entropy work and also fix a bug in the loop --- src/python/gudhi/representations/vector_methods.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 711c32a7..47c5224c 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -416,9 +416,12 @@ class Entropy(BaseEstimator, TransformerMixin): new_X = BirthPersistenceTransform().fit_transform(X) for i in range(num_diag): - orig_diagram, diagram, num_pts_in_diag = X[i], new_X[i], X[i].shape[0] - new_diagram = DiagramScaler(use=True, scalers=[([1], MaxAbsScaler())]).fit_transform([diagram])[0] + try: + new_diagram = DiagramScaler(use=True, scalers=[([1], MaxAbsScaler())]).fit_transform([diagram])[0] + except ValueError: + # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 + new_diagram = np.empty(shape = [0, 2]) if self.mode == "scalar": ent = - np.sum( np.multiply(new_diagram[:,1], np.log(new_diagram[:,1])) ) @@ -432,12 +435,11 @@ class Entropy(BaseEstimator, TransformerMixin): max_idx = np.clip(np.ceil((py - self.sample_range[0]) / step_x).astype(int), 0, self.resolution) for k in range(min_idx, max_idx): ent[k] += (-1) * new_diagram[j,1] * np.log(new_diagram[j,1]) - if self.normalized: - ent = ent / np.linalg.norm(ent, ord=1) - Xfit.append(np.reshape(ent,[1,-1])) - - Xfit = np.concatenate(Xfit, 0) + if self.normalized: + ent = ent / np.linalg.norm(ent, ord=1) + Xfit.append(np.reshape(ent,[1,-1])) + Xfit = np.concatenate(Xfit, axis=0) return Xfit def __call__(self, diag): -- cgit v1.2.3 From 4a0bc0fe1d6424da9bf979cfc322067a62f41cc9 Mon Sep 17 00:00:00 2001 From: Vincent Rouvreau Date: Fri, 22 Oct 2021 12:44:07 +0200 Subject: Fix exception management when sklearn version < 1.0 --- src/python/gudhi/representations/vector_methods.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 47c5224c..b83c2a87 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -500,7 +500,11 @@ class TopologicalVector(BaseEstimator, TransformerMixin): diagram, num_pts_in_diag = X[i], X[i].shape[0] pers = 0.5 * (diagram[:,1]-diagram[:,0]) min_pers = np.minimum(pers,np.transpose(pers)) - distances = DistanceMetric.get_metric("chebyshev").pairwise(diagram) + # Works fine with sklearn 1.0, but an ValueError exception is thrown on past versions + try: + distances = DistanceMetric.get_metric("chebyshev").pairwise(diagram) + except ValueError: + distances = np.empty(shape = [0, 0]) vect = np.flip(np.sort(np.triu(np.minimum(distances, min_pers)), axis=None), 0) dim = min(len(vect), thresh) Xfit[i, :dim] = vect[:dim] -- cgit v1.2.3 From a2761c01ceb26a057b94be1d45433335704c1581 Mon Sep 17 00:00:00 2001 From: Vincent Rouvreau Date: Thu, 4 Nov 2021 17:24:15 +0100 Subject: code review: try-except inside the if --- src/python/gudhi/representations/vector_methods.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index b83c2a87..e7ee57a4 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -44,15 +44,15 @@ class PersistenceImage(BaseEstimator, TransformerMixin): X (list of n x 2 numpy arrays): input persistence diagrams. y (n x 1 array): persistence diagram labels (unused). """ - try: - if np.isnan(np.array(self.im_range)).any(): + if np.isnan(np.array(self.im_range)).any(): + try: new_X = BirthPersistenceTransform().fit_transform(X) pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(new_X,y) [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] self.im_range = np.where(np.isnan(np.array(self.im_range)), np.array([mx, Mx, my, My]), np.array(self.im_range)) - except ValueError: - # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 - pass + except ValueError: + # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 + pass return self def transform(self, X): -- cgit v1.2.3 From 3094e1fe51acc49e4ea7e4f38648bb25d96784a4 Mon Sep 17 00:00:00 2001 From: Vincent Rouvreau Date: Fri, 5 Nov 2021 10:27:46 +0100 Subject: code review: factorize sample range computation --- src/python/gudhi/representations/vector_methods.py | 46 ++++++++++++---------- 1 file changed, 26 insertions(+), 20 deletions(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index e7ee57a4..140162af 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -6,6 +6,7 @@ # # Modification(s): # - 2020/06 Martin: ATOL integration +# - 2021/11 Vincent Rouvreau: factorize _automatic_sample_range import numpy as np from sklearn.base import BaseEstimator, TransformerMixin @@ -98,6 +99,23 @@ class PersistenceImage(BaseEstimator, TransformerMixin): """ return self.fit_transform([diag])[0,:] +def _automatic_sample_range(sample_range, X, y): + """ + Compute and returns sample range from the persistence diagrams if one of the sample_range values is numpy.nan. + + Parameters: + sample_range (a numpy array of 2 float): minimum and maximum of all piecewise-linear function domains, of + the form [x_min, x_max]. + X (list of n x 2 numpy arrays): input persistence diagrams. + y (n x 1 array): persistence diagram labels (unused). + """ + nan_in_range = np.isnan(sample_range) + if nan_in_range.any(): + pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) + [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] + return np.where(nan_in_range, np.array([mx, My]), sample_range) + return sample_range + class Landscape(BaseEstimator, TransformerMixin): """ This is a class for computing persistence landscapes from a list of persistence diagrams. A persistence landscape is a collection of 1D piecewise-linear functions computed from the rank function associated to the persistence diagram. These piecewise-linear functions are then sampled evenly on a given range and the corresponding vectors of samples are concatenated and returned. See http://jmlr.org/papers/v16/bubenik15a.html for more details. @@ -123,14 +141,11 @@ class Landscape(BaseEstimator, TransformerMixin): X (list of n x 2 numpy arrays): input persistence diagrams. y (n x 1 array): persistence diagram labels (unused). """ - if self.nan_in_range.any(): - try: - pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) - [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] - self.sample_range = np.where(self.nan_in_range, np.array([mx, My]), np.array(self.sample_range)) - except ValueError: - # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 - pass + try: + self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) + except ValueError: + # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 + pass return self def transform(self, X): @@ -227,10 +242,7 @@ class Silhouette(BaseEstimator, TransformerMixin): y (n x 1 array): persistence diagram labels (unused). """ try: - if np.isnan(np.array(self.sample_range)).any(): - pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) - [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] - self.sample_range = np.where(np.isnan(np.array(self.sample_range)), np.array([mx, My]), np.array(self.sample_range)) + self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) except ValueError: # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 pass @@ -320,10 +332,7 @@ class BettiCurve(BaseEstimator, TransformerMixin): y (n x 1 array): persistence diagram labels (unused). """ try: - if np.isnan(np.array(self.sample_range)).any(): - pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) - [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] - self.sample_range = np.where(np.isnan(np.array(self.sample_range)), np.array([mx, My]), np.array(self.sample_range)) + self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) except ValueError: # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 pass @@ -391,10 +400,7 @@ class Entropy(BaseEstimator, TransformerMixin): y (n x 1 array): persistence diagram labels (unused). """ try: - if np.isnan(np.array(self.sample_range)).any(): - pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) - [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] - self.sample_range = np.where(np.isnan(np.array(self.sample_range)), np.array([mx, My]), np.array(self.sample_range)) + self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) except ValueError: # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 pass -- cgit v1.2.3 From 37d7743a91f7fb970425a06798ac6cb61b0be109 Mon Sep 17 00:00:00 2001 From: Vincent Rouvreau Date: Fri, 5 Nov 2021 12:05:45 +0100 Subject: code review: try/except in function and assert on length of diagrams for error menagement --- src/python/gudhi/representations/vector_methods.py | 38 +++++++++------------- 1 file changed, 15 insertions(+), 23 deletions(-) (limited to 'src/python/gudhi/representations/vector_methods.py') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 140162af..e883b5dd 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -111,9 +111,14 @@ def _automatic_sample_range(sample_range, X, y): """ nan_in_range = np.isnan(sample_range) if nan_in_range.any(): - pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) - [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] - return np.where(nan_in_range, np.array([mx, My]), sample_range) + try: + pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) + [mx,my] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]] + [Mx,My] = [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] + return np.where(nan_in_range, np.array([mx, My]), sample_range) + except ValueError: + # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 + pass return sample_range class Landscape(BaseEstimator, TransformerMixin): @@ -141,11 +146,7 @@ class Landscape(BaseEstimator, TransformerMixin): X (list of n x 2 numpy arrays): input persistence diagrams. y (n x 1 array): persistence diagram labels (unused). """ - try: - self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) - except ValueError: - # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 - pass + self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) return self def transform(self, X): @@ -241,11 +242,7 @@ class Silhouette(BaseEstimator, TransformerMixin): X (list of n x 2 numpy arrays): input persistence diagrams. y (n x 1 array): persistence diagram labels (unused). """ - try: - self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) - except ValueError: - # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 - pass + self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) return self def transform(self, X): @@ -331,11 +328,7 @@ class BettiCurve(BaseEstimator, TransformerMixin): X (list of n x 2 numpy arrays): input persistence diagrams. y (n x 1 array): persistence diagram labels (unused). """ - try: - self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) - except ValueError: - # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 - pass + self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) return self def transform(self, X): @@ -399,11 +392,7 @@ class Entropy(BaseEstimator, TransformerMixin): X (list of n x 2 numpy arrays): input persistence diagrams. y (n x 1 array): persistence diagram labels (unused). """ - try: - self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) - except ValueError: - # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 - pass + self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) return self def transform(self, X): @@ -427,6 +416,7 @@ class Entropy(BaseEstimator, TransformerMixin): new_diagram = DiagramScaler(use=True, scalers=[([1], MaxAbsScaler())]).fit_transform([diagram])[0] except ValueError: # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 + assert len(diagram) == 0 new_diagram = np.empty(shape = [0, 2]) if self.mode == "scalar": @@ -510,6 +500,8 @@ class TopologicalVector(BaseEstimator, TransformerMixin): try: distances = DistanceMetric.get_metric("chebyshev").pairwise(diagram) except ValueError: + # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 + assert len(diagram) == 0 distances = np.empty(shape = [0, 0]) vect = np.flip(np.sort(np.triu(np.minimum(distances, min_pers)), axis=None), 0) dim = min(len(vect), thresh) -- cgit v1.2.3