From 34207229b3ab2936aecd953997286a0daab88a83 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Fri, 29 May 2020 17:56:45 +0200 Subject: convert matrix to SimplexTree --- src/python/test/test_simplex_tree.py | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src/python/test') diff --git a/src/python/test/test_simplex_tree.py b/src/python/test/test_simplex_tree.py index 2137d822..f75be58d 100755 --- a/src/python/test/test_simplex_tree.py +++ b/src/python/test/test_simplex_tree.py @@ -10,6 +10,7 @@ from gudhi import SimplexTree import pytest +import numpy as np __author__ = "Vincent Rouvreau" __copyright__ = "Copyright (C) 2016 Inria" @@ -340,3 +341,9 @@ def test_simplices_iterator(): assert st.find(simplex[0]) == True print("filtration is: ", simplex[1]) assert st.filtration(simplex[0]) == simplex[1] + +def test_insert_array(): + st = SimplexTree() + a = np.array([[1, 4, 13, 6], [4, 3, 11, 5], [13, 11, 10, 12], [6, 5, 12, 2]]) + st.insert_array(a, max_filtration=5) + assert list(st.get_filtration()) == [([0], 1.0), ([3], 2.0), ([1], 3.0), ([0, 1], 4.0), ([1, 3], 5.0)] -- cgit v1.2.3 From 75c94c33897342392e88d18e7dc0c4dbd3cade8b Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Wed, 8 Jul 2020 21:55:45 +0200 Subject: Rename insert_array -> insert_edges_from_array --- src/python/gudhi/simplex_tree.pyx | 4 ++-- src/python/test/test_simplex_tree.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src/python/test') diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index 557a80a9..d2d3f459 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -195,7 +195,7 @@ cdef class SimplexTree: """ return self.get_ptr().insert(simplex, filtration) - def insert_array(self, filtrations, double max_filtration=numpy.inf): + def insert_edges_from_array(self, filtrations, double max_filtration=numpy.inf): """Inserts edges in an empty complex. The vertices are numbered from 0 to n-1, and the filtration values are encoded in the array, with the diagonal representing the vertices. It is the caller's responsibility to ensure that this defines a filtration, which can be achieved with either:: @@ -214,7 +214,7 @@ cdef class SimplexTree: """ filtrations = numpy.asanyarray(filtrations, dtype=float) cdef double[:,:] F = filtrations - assert self.num_vertices() == 0, "insert_array requires an empty SimplexTree" + assert self.num_vertices() == 0, "insert_edges_from_array requires an empty SimplexTree" cdef int n = F.shape[0] assert n == F.shape[1] with nogil: diff --git a/src/python/test/test_simplex_tree.py b/src/python/test/test_simplex_tree.py index f75be58d..02ad63cc 100755 --- a/src/python/test/test_simplex_tree.py +++ b/src/python/test/test_simplex_tree.py @@ -342,8 +342,8 @@ def test_simplices_iterator(): print("filtration is: ", simplex[1]) assert st.filtration(simplex[0]) == simplex[1] -def test_insert_array(): +def test_insert_edges_from_array(): st = SimplexTree() a = np.array([[1, 4, 13, 6], [4, 3, 11, 5], [13, 11, 10, 12], [6, 5, 12, 2]]) - st.insert_array(a, max_filtration=5) + st.insert_edges_from_array(a, max_filtration=5) assert list(st.get_filtration()) == [([0], 1.0), ([3], 2.0), ([1], 3.0), ([0, 1], 4.0), ([1, 3], 5.0)] -- cgit v1.2.3 From 83acc2e74cd8a34f34d0082c85ea85b3260d2458 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Wed, 8 Jul 2020 23:35:42 +0200 Subject: insert edges from sparse matrix --- src/python/gudhi/simplex_tree.pyx | 20 +++- src/python/test/test_simplex_tree.py | 176 ++++++++++++++++++++++------------- 2 files changed, 128 insertions(+), 68 deletions(-) (limited to 'src/python/test') diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index d2d3f459..9d2c30a9 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -208,10 +208,11 @@ cdef class SimplexTree: filtrations = np.fmax(np.fmax(filtrations, diag[:, None]), diag[None, :]) :param filtrations: the filtration values of the vertices and edges to insert. The matrix is assumed to be symmetric. - :type filtrations: symmetric numpy.ndarray of shape (n,n) + :type filtrations: numpy.ndarray of shape (n,n) :param max_filtration: only insert vertices and edges with filtration values no larger than max_filtration :type max_filtration: float """ + # TODO: document which half of the matrix is actually read? filtrations = numpy.asanyarray(filtrations, dtype=float) cdef double[:,:] F = filtrations assert self.num_vertices() == 0, "insert_edges_from_array requires an empty SimplexTree" @@ -220,6 +221,23 @@ cdef class SimplexTree: with nogil: self.get_ptr().insert_matrix(&F[0,0], n, F.strides[0], F.strides[1], max_filtration) + def insert_edges_from_coo_matrix(self, edges): + """Inserts edges given by a sparse matrix in `COOrdinate format + `_. + Duplicate entries are not allowed. Missing entries are not inserted. Diagonal entries are interpreted as + vertices, although this is only useful if you want to insert vertices with a smaller filtration value than + the smallest edge containing it, since vertices are implicitly inserted together with the edges. + + :param edges: the edges to insert and their filtration values. + :type edges: scipy.sparse.coo_matrix of shape (n,n) + """ + # TODO: optimize this + for edge in zip(edges.row, edges.col, edges.data): + if edge[0] == edge[1]: + self.get_ptr().insert((edge[0],), edge[2]) + else: + self.get_ptr().insert((edge[0], edge[1]), edge[2]) + def get_simplices(self): """This function returns a generator with simplices and their given filtration values. diff --git a/src/python/test/test_simplex_tree.py b/src/python/test/test_simplex_tree.py index 02ad63cc..c6c5dc0e 100755 --- a/src/python/test/test_simplex_tree.py +++ b/src/python/test/test_simplex_tree.py @@ -249,6 +249,7 @@ def test_make_filtration_non_decreasing(): assert st.filtration([3, 4]) == 2.0 assert st.filtration([4, 5]) == 2.0 + def test_extend_filtration(): # Inserted simplex: @@ -257,82 +258,83 @@ def test_extend_filtration(): # / \ / # o o # /2\ /3 - # o o - # 1 0 - - st = SimplexTree() - st.insert([0,2]) - st.insert([1,2]) - st.insert([0,3]) - st.insert([2,5]) - st.insert([3,4]) - st.insert([3,5]) - st.assign_filtration([0], 1.) - st.assign_filtration([1], 2.) - st.assign_filtration([2], 3.) - st.assign_filtration([3], 4.) - st.assign_filtration([4], 5.) - st.assign_filtration([5], 6.) - - assert list(st.get_filtration()) == [ - ([0, 2], 0.0), - ([1, 2], 0.0), - ([0, 3], 0.0), - ([3, 4], 0.0), - ([2, 5], 0.0), - ([3, 5], 0.0), - ([0], 1.0), - ([1], 2.0), - ([2], 3.0), - ([3], 4.0), - ([4], 5.0), - ([5], 6.0) + # o o + # 1 0 + + st = SimplexTree() + st.insert([0, 2]) + st.insert([1, 2]) + st.insert([0, 3]) + st.insert([2, 5]) + st.insert([3, 4]) + st.insert([3, 5]) + st.assign_filtration([0], 1.0) + st.assign_filtration([1], 2.0) + st.assign_filtration([2], 3.0) + st.assign_filtration([3], 4.0) + st.assign_filtration([4], 5.0) + st.assign_filtration([5], 6.0) + + assert list(st.get_filtration()) == [ + ([0, 2], 0.0), + ([1, 2], 0.0), + ([0, 3], 0.0), + ([3, 4], 0.0), + ([2, 5], 0.0), + ([3, 5], 0.0), + ([0], 1.0), + ([1], 2.0), + ([2], 3.0), + ([3], 4.0), + ([4], 5.0), + ([5], 6.0), ] - + st.extend_filtration() - - assert list(st.get_filtration()) == [ - ([6], -3.0), - ([0], -2.0), - ([1], -1.8), - ([2], -1.6), - ([0, 2], -1.6), - ([1, 2], -1.6), - ([3], -1.4), - ([0, 3], -1.4), - ([4], -1.2), - ([3, 4], -1.2), - ([5], -1.0), - ([2, 5], -1.0), - ([3, 5], -1.0), - ([5, 6], 1.0), - ([4, 6], 1.2), - ([3, 6], 1.4), + + assert list(st.get_filtration()) == [ + ([6], -3.0), + ([0], -2.0), + ([1], -1.8), + ([2], -1.6), + ([0, 2], -1.6), + ([1, 2], -1.6), + ([3], -1.4), + ([0, 3], -1.4), + ([4], -1.2), + ([3, 4], -1.2), + ([5], -1.0), + ([2, 5], -1.0), + ([3, 5], -1.0), + ([5, 6], 1.0), + ([4, 6], 1.2), + ([3, 6], 1.4), ([3, 4, 6], 1.4), - ([3, 5, 6], 1.4), - ([2, 6], 1.6), - ([2, 5, 6], 1.6), - ([1, 6], 1.8), - ([1, 2, 6], 1.8), - ([0, 6], 2.0), - ([0, 2, 6], 2.0), - ([0, 3, 6], 2.0) + ([3, 5, 6], 1.4), + ([2, 6], 1.6), + ([2, 5, 6], 1.6), + ([1, 6], 1.8), + ([1, 2, 6], 1.8), + ([0, 6], 2.0), + ([0, 2, 6], 2.0), + ([0, 3, 6], 2.0), ] - dgms = st.extended_persistence(min_persistence=-1.) + dgms = st.extended_persistence(min_persistence=-1.0) + + assert dgms[0][0][1][0] == pytest.approx(2.0) + assert dgms[0][0][1][1] == pytest.approx(3.0) + assert dgms[1][0][1][0] == pytest.approx(5.0) + assert dgms[1][0][1][1] == pytest.approx(4.0) + assert dgms[2][0][1][0] == pytest.approx(1.0) + assert dgms[2][0][1][1] == pytest.approx(6.0) + assert dgms[3][0][1][0] == pytest.approx(6.0) + assert dgms[3][0][1][1] == pytest.approx(1.0) - assert dgms[0][0][1][0] == pytest.approx(2.) - assert dgms[0][0][1][1] == pytest.approx(3.) - assert dgms[1][0][1][0] == pytest.approx(5.) - assert dgms[1][0][1][1] == pytest.approx(4.) - assert dgms[2][0][1][0] == pytest.approx(1.) - assert dgms[2][0][1][1] == pytest.approx(6.) - assert dgms[3][0][1][0] == pytest.approx(6.) - assert dgms[3][0][1][1] == pytest.approx(1.) def test_simplices_iterator(): st = SimplexTree() - + assert st.insert([0, 1, 2], filtration=4.0) == True assert st.insert([2, 3, 4], filtration=2.0) == True @@ -342,8 +344,48 @@ def test_simplices_iterator(): print("filtration is: ", simplex[1]) assert st.filtration(simplex[0]) == simplex[1] + def test_insert_edges_from_array(): st = SimplexTree() a = np.array([[1, 4, 13, 6], [4, 3, 11, 5], [13, 11, 10, 12], [6, 5, 12, 2]]) st.insert_edges_from_array(a, max_filtration=5) assert list(st.get_filtration()) == [([0], 1.0), ([3], 2.0), ([1], 3.0), ([0, 1], 4.0), ([1, 3], 5.0)] + + +def test_insert_edges_from_coo_matrix(): + try: + from scipy.sparse import coo_matrix + from scipy.spatial import cKDTree + except ImportError: + print("Skipping, no SciPy") + return + + st = SimplexTree() + st.insert([1, 2, 7], 7) + row = np.array([2, 5, 3]) + col = np.array([1, 4, 6]) + dat = np.array([1, 2, 3]) + edges = coo_matrix((dat, (row, col))) + st.insert_edges_from_coo_matrix(edges) + assert list(st.get_filtration()) == [ + ([1], 1.0), + ([2], 1.0), + ([1, 2], 1.0), + ([4], 2.0), + ([5], 2.0), + ([4, 5], 2.0), + ([3], 3.0), + ([6], 3.0), + ([3, 6], 3.0), + ([7], 7.0), + ([1, 7], 7.0), + ([2, 7], 7.0), + ([1, 2, 7], 7.0), + ] + + pts = np.random.rand(100, 2) + tree = cKDTree(pts) + edges = tree.sparse_distance_matrix(tree, max_distance=0.15, output_type="coo_matrix") + st = SimplexTree() + st.insert_edges_from_coo_matrix(edges) + assert 100 < st.num_simplices() < 1000 -- cgit v1.2.3 From 2830010c74cc74d29691faeeb7bb3a31cc53d87d Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Fri, 31 Jul 2020 11:50:11 +0200 Subject: static construction from array --- src/python/gudhi/simplex_tree.pyx | 16 ++++++++++------ src/python/test/test_simplex_tree.py | 5 ++--- 2 files changed, 12 insertions(+), 9 deletions(-) (limited to 'src/python/test') diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index 1df2420e..42f1e635 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -195,10 +195,11 @@ cdef class SimplexTree: """ return self.get_ptr().insert(simplex, filtration) - def insert_edges_from_array(self, filtrations, double max_filtration=numpy.inf): - """Inserts edges in an empty complex. The vertices are numbered from 0 to n-1, and the filtration values are - encoded in the array, with the diagonal representing the vertices. It is the caller's responsibility to - ensure that this defines a filtration, which can be achieved with either:: + @staticmethod + def create_from_array(filtrations, double max_filtration=numpy.inf): + """Creates a new, empty complex and inserts vertices and edges. The vertices are numbered from 0 to n-1, and + the filtration values are encoded in the array, with the diagonal representing the vertices. It is the + caller's responsibility to ensure that this defines a filtration, which can be achieved with either:: filtrations[np.diag_indices_from(filtrations)] = filtrations.min(1) @@ -211,15 +212,18 @@ cdef class SimplexTree: :type filtrations: numpy.ndarray of shape (n,n) :param max_filtration: only insert vertices and edges with filtration values no larger than max_filtration :type max_filtration: float + :returns: the new complex + :rtype: SimplexTree """ # TODO: document which half of the matrix is actually read? filtrations = numpy.asanyarray(filtrations, dtype=float) cdef double[:,:] F = filtrations - assert self.num_vertices() == 0, "insert_edges_from_array requires an empty SimplexTree" + ret = SimplexTree() cdef int n = F.shape[0] assert n == F.shape[1] with nogil: - self.get_ptr().insert_matrix(&F[0,0], n, F.strides[0], F.strides[1], max_filtration) + ret.get_ptr().insert_matrix(&F[0,0], n, F.strides[0], F.strides[1], max_filtration) + return ret def insert_edges_from_coo_matrix(self, edges): """Inserts edges given by a sparse matrix in `COOrdinate format diff --git a/src/python/test/test_simplex_tree.py b/src/python/test/test_simplex_tree.py index c6c5dc0e..34173e78 100755 --- a/src/python/test/test_simplex_tree.py +++ b/src/python/test/test_simplex_tree.py @@ -345,10 +345,9 @@ def test_simplices_iterator(): assert st.filtration(simplex[0]) == simplex[1] -def test_insert_edges_from_array(): - st = SimplexTree() +def test_create_from_array(): a = np.array([[1, 4, 13, 6], [4, 3, 11, 5], [13, 11, 10, 12], [6, 5, 12, 2]]) - st.insert_edges_from_array(a, max_filtration=5) + st = SimplexTree.create_from_array(a, max_filtration=5) assert list(st.get_filtration()) == [([0], 1.0), ([3], 2.0), ([1], 3.0), ([0, 1], 4.0), ([1, 3], 5.0)] -- cgit v1.2.3 From 74948a7debebdce1ddb7afca169e2c9dc6456fa1 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Fri, 1 Apr 2022 22:31:43 +0200 Subject: SimplexTree.insert_batch for multiple simplices of the same dimension --- src/python/gudhi/simplex_tree.pyx | 42 +++++++++++- src/python/test/test_simplex_tree.py | 127 +++++++++++++++++++++++------------ 2 files changed, 123 insertions(+), 46 deletions(-) (limited to 'src/python/test') diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index 3eebfff7..3646d659 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -8,14 +8,23 @@ # - YYYY/MM Author: Description of the modification from cython.operator import dereference, preincrement -from libc.stdint cimport intptr_t +from libc.stdint cimport intptr_t, int32_t, int64_t import numpy as np cimport gudhi.simplex_tree +cimport cython __author__ = "Vincent Rouvreau" __copyright__ = "Copyright (C) 2016 Inria" __license__ = "MIT" +ctypedef fused some_int: + int32_t + int64_t + +ctypedef fused some_float: + float + double + # SimplexTree python interface cdef class SimplexTree: """The simplex tree is an efficient and flexible data structure for @@ -226,6 +235,7 @@ cdef class SimplexTree: return self.get_ptr().insert(simplex, filtration) @staticmethod + @cython.boundscheck(False) def create_from_array(filtrations, double max_filtration=np.inf): """Creates a new, empty complex and inserts vertices and edges. The vertices are numbered from 0 to n-1, and the filtration values are encoded in the array, with the diagonal representing the vertices. It is the @@ -265,10 +275,38 @@ cdef class SimplexTree: :param edges: the edges to insert and their filtration values. :type edges: scipy.sparse.coo_matrix of shape (n,n) """ - # TODO: optimize this + # TODO: optimize this? for edge in zip(edges.row, edges.col, edges.data): self.get_ptr().insert((edge[0], edge[1]), edge[2]) + @cython.boundscheck(False) + @cython.wraparound(False) + def insert_batch(self, some_int[:,:] vertex_array, some_float[:] filtrations): + """Inserts k-simplices given by a sparse array in a format similar + to `torch.sparse `_. + Duplicate entries are not allowed. Missing entries are not inserted. + Simplices with a repeated vertex are currently interpreted as lower + dimensional simplices, but we do not guarantee this behavior in the + future. Any time a simplex is inserted, its faces are inserted as well + if needed to preserve a simplicial complex. + + :param vertex_array: the k-simplices to insert. + :type vertex_array: numpy.array of shape (k+1,n) + :param filtrations: the filtration values. + :type filtrations: numpy.array of shape (n,) + """ + cdef Py_ssize_t k = vertex_array.shape[0] + cdef Py_ssize_t n = vertex_array.shape[1] + assert filtrations.shape[0] == n + cdef Py_ssize_t i + cdef Py_ssize_t j + cdef vector[int] v + for i in range(n): + for j in range(k): + v.push_back(vertex_array[j, i]) + self.get_ptr().insert(v, filtrations[i]) + v.clear() + def get_simplices(self): """This function returns a generator with simplices and their given filtration values. diff --git a/src/python/test/test_simplex_tree.py b/src/python/test/test_simplex_tree.py index 0436d891..a5b8ffe0 100755 --- a/src/python/test/test_simplex_tree.py +++ b/src/python/test/test_simplex_tree.py @@ -345,9 +345,10 @@ def test_simplices_iterator(): print("filtration is: ", simplex[1]) assert st.filtration(simplex[0]) == simplex[1] + def test_collapse_edges(): st = SimplexTree() - + assert st.insert([0, 1], filtration=1.0) == True assert st.insert([1, 2], filtration=1.0) == True assert st.insert([2, 3], filtration=1.0) == True @@ -362,33 +363,35 @@ def test_collapse_edges(): assert st.num_simplices() == 9 assert st.find([1, 3]) == False for simplex in st.get_skeleton(0): - assert simplex[1] == 1. + assert simplex[1] == 1.0 else: # If no Eigen3, collapse_edges throws an exception with pytest.raises(RuntimeError): st.collapse_edges() + def test_reset_filtration(): st = SimplexTree() - - assert st.insert([0, 1, 2], 3.) == True - assert st.insert([0, 3], 2.) == True - assert st.insert([3, 4, 5], 3.) == True - assert st.insert([0, 1, 6, 7], 4.) == True + + assert st.insert([0, 1, 2], 3.0) == True + assert st.insert([0, 3], 2.0) == True + assert st.insert([3, 4, 5], 3.0) == True + assert st.insert([0, 1, 6, 7], 4.0) == True # Guaranteed by construction for simplex in st.get_simplices(): - assert st.filtration(simplex[0]) >= 2. - + assert st.filtration(simplex[0]) >= 2.0 + # dimension until 5 even if simplex tree is of dimension 3 to test the limits for dimension in range(5, -1, -1): - st.reset_filtration(0., dimension) + st.reset_filtration(0.0, dimension) for simplex in st.get_skeleton(3): print(simplex) if len(simplex[0]) < (dimension) + 1: - assert st.filtration(simplex[0]) >= 2. + assert st.filtration(simplex[0]) >= 2.0 else: - assert st.filtration(simplex[0]) == 0. + assert st.filtration(simplex[0]) == 0.0 + def test_boundaries_iterator(): st = SimplexTree() @@ -404,16 +407,17 @@ def test_boundaries_iterator(): list(st.get_boundaries([])) with pytest.raises(RuntimeError): - list(st.get_boundaries([0, 4])) # (0, 4) does not exist + list(st.get_boundaries([0, 4])) # (0, 4) does not exist with pytest.raises(RuntimeError): - list(st.get_boundaries([6])) # (6) does not exist + list(st.get_boundaries([6])) # (6) does not exist + def test_persistence_intervals_in_dimension(): # Here is our triangulation of a 2-torus - taken from https://dioscuri-tda.org/Paris_TDA_Tutorial_2021.html # 0-----3-----4-----0 # | \ | \ | \ | \ | - # | \ | \ | \| \ | + # | \ | \ | \| \ | # 1-----8-----7-----1 # | \ | \ | \ | \ | # | \ | \ | \ | \ | @@ -422,50 +426,52 @@ def test_persistence_intervals_in_dimension(): # | \ | \ | \ | \ | # 0-----3-----4-----0 st = SimplexTree() - st.insert([0,1,8]) - st.insert([0,3,8]) - st.insert([3,7,8]) - st.insert([3,4,7]) - st.insert([1,4,7]) - st.insert([0,1,4]) - st.insert([1,2,5]) - st.insert([1,5,8]) - st.insert([5,6,8]) - st.insert([6,7,8]) - st.insert([2,6,7]) - st.insert([1,2,7]) - st.insert([0,2,3]) - st.insert([2,3,5]) - st.insert([3,4,5]) - st.insert([4,5,6]) - st.insert([0,4,6]) - st.insert([0,2,6]) + st.insert([0, 1, 8]) + st.insert([0, 3, 8]) + st.insert([3, 7, 8]) + st.insert([3, 4, 7]) + st.insert([1, 4, 7]) + st.insert([0, 1, 4]) + st.insert([1, 2, 5]) + st.insert([1, 5, 8]) + st.insert([5, 6, 8]) + st.insert([6, 7, 8]) + st.insert([2, 6, 7]) + st.insert([1, 2, 7]) + st.insert([0, 2, 3]) + st.insert([2, 3, 5]) + st.insert([3, 4, 5]) + st.insert([4, 5, 6]) + st.insert([0, 4, 6]) + st.insert([0, 2, 6]) st.compute_persistence(persistence_dim_max=True) - + H0 = st.persistence_intervals_in_dimension(0) - assert np.array_equal(H0, np.array([[ 0., float("inf")]])) + assert np.array_equal(H0, np.array([[0.0, float("inf")]])) H1 = st.persistence_intervals_in_dimension(1) - assert np.array_equal(H1, np.array([[ 0., float("inf")], [ 0., float("inf")]])) + assert np.array_equal(H1, np.array([[0.0, float("inf")], [0.0, float("inf")]])) H2 = st.persistence_intervals_in_dimension(2) - assert np.array_equal(H2, np.array([[ 0., float("inf")]])) + assert np.array_equal(H2, np.array([[0.0, float("inf")]])) # Test empty case assert st.persistence_intervals_in_dimension(3).shape == (0, 2) + def test_equality_operator(): st1 = SimplexTree() st2 = SimplexTree() assert st1 == st2 - st1.insert([1,2,3], 4.) + st1.insert([1, 2, 3], 4.0) assert st1 != st2 - st2.insert([1,2,3], 4.) + st2.insert([1, 2, 3], 4.0) assert st1 == st2 + def test_simplex_tree_deep_copy(): st = SimplexTree() - st.insert([1, 2, 3], 0.) + st.insert([1, 2, 3], 0.0) # compute persistence only on the original st.compute_persistence() @@ -484,14 +490,15 @@ def test_simplex_tree_deep_copy(): for a_splx in a_filt_list: assert a_splx in st_filt_list - + # test double free del st del st_copy + def test_simplex_tree_deep_copy_constructor(): st = SimplexTree() - st.insert([1, 2, 3], 0.) + st.insert([1, 2, 3], 0.0) # compute persistence only on the original st.compute_persistence() @@ -510,20 +517,23 @@ def test_simplex_tree_deep_copy_constructor(): for a_splx in a_filt_list: assert a_splx in st_filt_list - + # test double free del st del st_copy + def test_simplex_tree_constructor_exception(): with pytest.raises(TypeError): - st = SimplexTree(other = "Construction from a string shall raise an exception") + st = SimplexTree(other="Construction from a string shall raise an exception") + def test_create_from_array(): a = np.array([[1, 4, 13, 6], [4, 3, 11, 5], [13, 11, 10, 12], [6, 5, 12, 2]]) st = SimplexTree.create_from_array(a, max_filtration=5) assert list(st.get_filtration()) == [([0], 1.0), ([3], 2.0), ([1], 3.0), ([0, 1], 4.0), ([1, 3], 5.0)] + def test_insert_edges_from_coo_matrix(): try: from scipy.sparse import coo_matrix @@ -561,3 +571,32 @@ def test_insert_edges_from_coo_matrix(): st = SimplexTree() st.insert_edges_from_coo_matrix(edges) assert 100 < st.num_simplices() < 1000 + + +def test_insert_batch(): + st = SimplexTree() + # vertices + st.insert_batch(np.array([[6, 1, 5]]), np.array([-5.0, 2.0, -3.0])) + # triangles + st.insert_batch(np.array([[2, 10], [5, 0], [6, 11]]), np.array([4.0, 0.0])) + # edges + st.insert_batch(np.array([[1, 5], [2, 5]]), np.array([1.0, 3.0])) + + assert list(st.get_filtration()) == [ + ([6], -5.0), + ([5], -3.0), + ([0], 0.0), + ([10], 0.0), + ([0, 10], 0.0), + ([11], 0.0), + ([0, 11], 0.0), + ([10, 11], 0.0), + ([0, 10, 11], 0.0), + ([1], 1.0), + ([2], 1.0), + ([1, 2], 1.0), + ([2, 5], 4.0), + ([2, 6], 4.0), + ([5, 6], 4.0), + ([2, 5, 6], 4.0), + ] -- cgit v1.2.3 From f911ed3882c03b049a18f2a638758cb5ef994bcc Mon Sep 17 00:00:00 2001 From: wreise Date: Wed, 25 May 2022 11:59:37 +0200 Subject: Add tests for silhouettes --- src/python/test/test_representations.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'src/python/test') diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py index d219ce7a..8be3a1db 100755 --- a/src/python/test/test_representations.py +++ b/src/python/test/test_representations.py @@ -168,3 +168,32 @@ def test_kernel_empty_diagrams(): # PersistenceFisherKernel(bandwidth_fisher=1., bandwidth=1.)(empty_diag, empty_diag) # PersistenceFisherKernel(bandwidth_fisher=1., bandwidth=1., kernel_approx=RBFSampler(gamma=1./2, n_components=100000).fit(np.ones([1,2])))(empty_diag, empty_diag) + +def test_silhouette_permutation_invariance(): + dgm = _n_diags(1)[0] + dgm_permuted = dgm[np.random.permutation(dgm.shape[0]).astype(int)] + random_resolution = random.randint(50, 100) * 10 + slt = Silhouette(resolution=random_resolution, weight=pow(2)) + + assert np.all(np.isclose(slt(dgm), slt(dgm_permuted))) + + +def test_silhouette_multiplication_invariance(): + dgm = _n_diags(1)[0] + n_repetitions = np.random.randint(2, high=10) + dgm_augmented = np.repeat(dgm, repeats=n_repetitions, axis=0) + + random_resolution = random.randint(50, 100) * 10 + slt = Silhouette(resolution=random_resolution, weight=pow(2)) + assert np.all(np.isclose(slt(dgm), slt(dgm_augmented))) + + +def test_silhouette_numeric(): + dgm = np.array([[2., 3.], [5., 6.]]) + slt = Silhouette(resolution=9, weight=pow(1)) + #slt.fit([dgm]) + # x_values = array([2., 2.5, 3., 3.5, 4., 4.5, 5., 5.5, 6.]) + + expected_silhouette = np.array([0., 0.5, 0., 0., 0., 0., 0., 0.5, 0.])/np.sqrt(2) + output_silhouette = slt(dgm) + assert np.all(np.isclose(output_silhouette, expected_silhouette)) -- cgit v1.2.3 From 500de3b265c227eadd81fb860fade4f6c416cfbc Mon Sep 17 00:00:00 2001 From: wreise Date: Thu, 2 Jun 2022 10:49:07 +0200 Subject: Add tests for Landscapes --- src/python/test/test_representations.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'src/python/test') diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py index 8be3a1db..0324b026 100755 --- a/src/python/test/test_representations.py +++ b/src/python/test/test_representations.py @@ -197,3 +197,29 @@ def test_silhouette_numeric(): expected_silhouette = np.array([0., 0.5, 0., 0., 0., 0., 0., 0.5, 0.])/np.sqrt(2) output_silhouette = slt(dgm) assert np.all(np.isclose(output_silhouette, expected_silhouette)) + + +def test_landscape_small_persistence_invariance(): + dgm = np.array([[2., 6.], [2., 5.], [3., 7.]]) + small_persistence_pts = np.random.rand(10, 2) + small_persistence_pts[:, 1] += small_persistence_pts[:, 0] + small_persistence_pts += np.min(dgm) + dgm_augmented = np.concatenate([dgm, small_persistence_pts], axis=0) + + lds = Landscape(num_landscapes=2, resolution=5) + lds_dgm, lds_dgm_augmented = lds(dgm), lds(dgm_augmented) + + assert np.all(np.isclose(lds_dgm, lds_dgm_augmented)) + + +def test_landscape_numeric(): + dgm = np.array([[2., 6.], [3., 5.]]) + lds_ref = np.array([ + 0., 0.5, 1., 1.5, 2., 1.5, 1., 0.5, 0., # tent of [2, 6] + 0., 0., 0., 0.5, 1., 0.5, 0., 0., 0. + ]) + lds_ref *= np.sqrt(2) + lds = Landscape(num_landscapes=2, resolution=9, sample_range=[2., 6.]) + lds_dgm = lds(dgm) + print(lds.sample_range, lds.new_resolution, lds_dgm.shape) + assert np.all(np.isclose(lds_dgm, lds_ref)) -- cgit v1.2.3 From 775cd158c869f75a4925ac6f7a94a1847f7d7788 Mon Sep 17 00:00:00 2001 From: wreise Date: Thu, 2 Jun 2022 15:38:20 +0200 Subject: Remove print from test --- src/python/test/test_representations.py | 1 - 1 file changed, 1 deletion(-) (limited to 'src/python/test') diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py index 0324b026..5f29740f 100755 --- a/src/python/test/test_representations.py +++ b/src/python/test/test_representations.py @@ -221,5 +221,4 @@ def test_landscape_numeric(): lds_ref *= np.sqrt(2) lds = Landscape(num_landscapes=2, resolution=9, sample_range=[2., 6.]) lds_dgm = lds(dgm) - print(lds.sample_range, lds.new_resolution, lds_dgm.shape) assert np.all(np.isclose(lds_dgm, lds_ref)) -- cgit v1.2.3 From 565cc1fb0c81b627fa6bec0e9ca76571663a7834 Mon Sep 17 00:00:00 2001 From: wreise Date: Fri, 7 Oct 2022 17:59:51 +0200 Subject: Test: resolution when nan in the range --- src/python/test/test_representations.py | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src/python/test') diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py index 2ca72d07..823e8620 100755 --- a/src/python/test/test_representations.py +++ b/src/python/test/test_representations.py @@ -241,3 +241,11 @@ def test_landscape_numeric(): lds = Landscape(num_landscapes=2, resolution=9, sample_range=[2., 6.]) lds_dgm = lds(dgm) assert np.all(np.isclose(lds_dgm, lds_ref)) + + +def test_landscape_nan_range(): + dgm = np.array([[2., 6.], [3., 5.]]) + lds = Landscape(num_landscapes=2, resolution=9, sample_range=[np.nan, 6.]) + lds_dgm = lds(dgm) + assert (lds.sample_range[0] == 2) & (lds.sample_range[1] == 6) + assert lds.new_resolution == 10 -- cgit v1.2.3 From d9dfffdb580ab865a829fce851779f33fa47e4f7 Mon Sep 17 00:00:00 2001 From: wreise Date: Fri, 7 Oct 2022 18:13:26 +0200 Subject: Add triming of the range; Marcs' comments --- src/python/gudhi/representations/vector_methods.py | 31 ++++++++++++++-------- src/python/test/test_representations.py | 2 +- 2 files changed, 21 insertions(+), 12 deletions(-) (limited to 'src/python/test') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 8c8b46db..8114885e 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -123,6 +123,15 @@ def _automatic_sample_range(sample_range, X, y): pass return sample_range + +def trim_on_edges(x, are_endpoints_nan): + if are_endpoints_nan[0]: + x = x[1:] + if are_endpoints_nan[1]: + x = x[:-1] + return x + + class Landscape(BaseEstimator, TransformerMixin): """ This is a class for computing persistence landscapes from a list of persistence diagrams. A persistence landscape is a collection of 1D piecewise-linear functions computed from the rank function associated to the persistence diagram. These piecewise-linear functions are then sampled evenly on a given range and the corresponding vectors of samples are concatenated and returned. See http://jmlr.org/papers/v16/bubenik15a.html for more details. @@ -149,6 +158,8 @@ class Landscape(BaseEstimator, TransformerMixin): y (n x 1 array): persistence diagram labels (unused). """ self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) + self.im_range = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution) + self.im_range = trim_on_edges(self.im_range, self.nan_in_range) return self def transform(self, X): @@ -163,17 +174,13 @@ class Landscape(BaseEstimator, TransformerMixin): """ Xfit = [] - x_values = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution) + x_values = self.im_range for i, diag in enumerate(X): midpoints, heights = (diag[:, 0] + diag[:, 1]) / 2., (diag[:, 1] - diag[:, 0]) / 2. tent_functions = np.maximum(heights[None, :] - np.abs(x_values[:, None] - midpoints[None, :]), 0) tent_functions.partition(diag.shape[0] - self.num_landscapes, axis=1) - landscapes = np.sort(tent_functions, axis=1)[:, -self.num_landscapes:][:, ::-1].T + landscapes = tent_functions[:, -self.num_landscapes:][:, ::-1].T - if self.nan_in_range[0]: - landscapes = landscapes[:, 1:] - if self.nan_in_range[1]: - landscapes = landscapes[:, :-1] landscapes = np.sqrt(2) * np.ravel(landscapes) Xfit.append(landscapes) @@ -189,7 +196,7 @@ class Landscape(BaseEstimator, TransformerMixin): Returns: numpy array with shape (number of samples = **num_landscapes** x **resolution**): output persistence landscape. """ - return self.fit_transform([diag])[0,:] + return self.fit_transform([diag])[0, :] class Silhouette(BaseEstimator, TransformerMixin): """ @@ -205,7 +212,8 @@ class Silhouette(BaseEstimator, TransformerMixin): sample_range ([double, double]): minimum and maximum for the weighted average domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method. """ self.weight, self.resolution, self.sample_range = weight, resolution, sample_range - self.im_range = None + self.nan_in_range = np.isnan(np.array(self.sample_range)) + self.new_resolution = self.resolution + self.nan_in_range.sum() def fit(self, X, y=None): """ @@ -216,7 +224,8 @@ class Silhouette(BaseEstimator, TransformerMixin): y (n x 1 array): persistence diagram labels (unused). """ self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) - self.im_range = np.linspace(self.sample_range[0], self.sample_range[1], self.resolution) + self.im_range = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution) + self.im_range = trim_on_edges(self.im_range, self.nan_in_range) return self def transform(self, X): @@ -232,9 +241,9 @@ class Silhouette(BaseEstimator, TransformerMixin): Xfit = [] x_values = self.im_range - for i, diag in enumerate(X): + for diag in X: midpoints, heights = (diag[:, 0] + diag[:, 1]) / 2., (diag[:, 1] - diag[:, 0]) / 2. - weights = np.array([self.weight(point) for point in diag]) + weights = np.array([self.weight(pt) for pt in diag]) total_weight = np.sum(weights) tent_functions = np.maximum(heights[None, :] - np.abs(x_values[:, None] - midpoints[None, :]), 0) diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py index 823e8620..948d7804 100755 --- a/src/python/test/test_representations.py +++ b/src/python/test/test_representations.py @@ -209,7 +209,7 @@ def test_silhouette_multiplication_invariance(): def test_silhouette_numeric(): dgm = np.array([[2., 3.], [5., 6.]]) - slt = Silhouette(resolution=9, weight=pow(1)) + slt = Silhouette(resolution=9, weight=pow(1), sample_range=[2., 6.]) #slt.fit([dgm]) # x_values = array([2., 2.5, 3., 3.5, 4., 4.5, 5., 5.5, 6.]) -- cgit v1.2.3 From 80edbbaccc344abfa18ebc48a8493f747a775476 Mon Sep 17 00:00:00 2001 From: wreise Date: Sat, 15 Oct 2022 16:54:04 +0200 Subject: Simplify sorting in landscapes --- src/python/test/test_representations.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/python/test') diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py index 948d7804..58caab21 100755 --- a/src/python/test/test_representations.py +++ b/src/python/test/test_representations.py @@ -235,10 +235,12 @@ def test_landscape_numeric(): dgm = np.array([[2., 6.], [3., 5.]]) lds_ref = np.array([ 0., 0.5, 1., 1.5, 2., 1.5, 1., 0.5, 0., # tent of [2, 6] - 0., 0., 0., 0.5, 1., 0.5, 0., 0., 0. + 0., 0., 0., 0.5, 1., 0.5, 0., 0., 0., + 0., 0., 0., 0., 0., 0., 0., 0., 0., + 0., 0., 0., 0., 0., 0., 0., 0., 0., ]) lds_ref *= np.sqrt(2) - lds = Landscape(num_landscapes=2, resolution=9, sample_range=[2., 6.]) + lds = Landscape(num_landscapes=4, resolution=9, sample_range=[2., 6.]) lds_dgm = lds(dgm) assert np.all(np.isclose(lds_dgm, lds_ref)) -- cgit v1.2.3 From 2ebdeb905d3ca90e2ba2d24e6d3aac52240f6c86 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Fri, 4 Nov 2022 14:05:42 +0100 Subject: More consistent choice of a grid for diagram representations --- src/python/gudhi/representations/vector_methods.py | 46 +++++++++++++++------- src/python/test/test_representations.py | 12 ++++++ 2 files changed, 44 insertions(+), 14 deletions(-) (limited to 'src/python/test') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index a169aee8..212fa9f5 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -124,7 +124,7 @@ def _automatic_sample_range(sample_range, X, y): return sample_range -def _trim_on_edges(x, are_endpoints_nan): +def _trim_endpoints(x, are_endpoints_nan): if are_endpoints_nan[0]: x = x[1:] if are_endpoints_nan[1]: @@ -136,7 +136,7 @@ class Landscape(BaseEstimator, TransformerMixin): """ This is a class for computing persistence landscapes from a list of persistence diagrams. A persistence landscape is a collection of 1D piecewise-linear functions computed from the rank function associated to the persistence diagram. These piecewise-linear functions are then sampled evenly on a given range and the corresponding vectors of samples are concatenated and returned. See http://jmlr.org/papers/v16/bubenik15a.html for more details. """ - def __init__(self, num_landscapes=5, resolution=100, sample_range=[np.nan, np.nan]): + def __init__(self, num_landscapes=5, resolution=100, sample_range=[np.nan, np.nan], *, keep_endpoints=False): """ Constructor for the Landscape class. @@ -144,10 +144,14 @@ class Landscape(BaseEstimator, TransformerMixin): num_landscapes (int): number of piecewise-linear functions to output (default 5). resolution (int): number of sample for all piecewise-linear functions (default 100). sample_range ([double, double]): minimum and maximum of all piecewise-linear function domains, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method. + keep_endpoints (bool): when guessing `sample_range`, use the exact extremities (where the value is always 0). This is mostly useful for plotting, the default is to use a slightly smaller range. """ self.num_landscapes, self.resolution, self.sample_range = num_landscapes, resolution, sample_range self.nan_in_range = np.isnan(np.array(self.sample_range)) - self.new_resolution = self.resolution + self.nan_in_range.sum() + self.new_resolution = self.resolution + if not keep_endpoints: + self.new_resolution += self.nan_in_range.sum() + self.keep_endpoints = keep_endpoints def fit(self, X, y=None): """ @@ -158,8 +162,9 @@ class Landscape(BaseEstimator, TransformerMixin): y (n x 1 array): persistence diagram labels (unused). """ self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) - self.im_range = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution) - self.im_range = _trim_on_edges(self.im_range, self.nan_in_range) + self.grid_ = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution) + if not self.keep_endpoints: + self.grid_ = _trim_endpoints(self.grid_, self.nan_in_range) return self def transform(self, X): @@ -174,7 +179,7 @@ class Landscape(BaseEstimator, TransformerMixin): """ Xfit = [] - x_values = self.im_range + x_values = self.grid_ for diag in X: midpoints, heights = (diag[:, 0] + diag[:, 1]) / 2., (diag[:, 1] - diag[:, 0]) / 2. tent_functions = np.maximum(heights[None, :] - np.abs(x_values[:, None] - midpoints[None, :]), 0) @@ -209,7 +214,7 @@ class Silhouette(BaseEstimator, TransformerMixin): """ This is a class for computing persistence silhouettes from a list of persistence diagrams. A persistence silhouette is computed by taking a weighted average of the collection of 1D piecewise-linear functions given by the persistence landscapes, and then by evenly sampling this average on a given range. Finally, the corresponding vector of samples is returned. See https://arxiv.org/abs/1312.0308 for more details. """ - def __init__(self, weight=lambda x: 1, resolution=100, sample_range=[np.nan, np.nan]): + def __init__(self, weight=lambda x: 1, resolution=100, sample_range=[np.nan, np.nan], *, keep_endpoints=False): """ Constructor for the Silhouette class. @@ -217,10 +222,14 @@ class Silhouette(BaseEstimator, TransformerMixin): weight (function): weight function for the persistence diagram points (default constant function, ie lambda x: 1). This function must be defined on 2D points, ie on lists or numpy arrays of the form [p_x,p_y]. resolution (int): number of samples for the weighted average (default 100). sample_range ([double, double]): minimum and maximum for the weighted average domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method. + keep_endpoints (bool): when guessing `sample_range`, use the exact extremities (where the value is always 0). This is mostly useful for plotting, the default is to use a slightly smaller range. """ self.weight, self.resolution, self.sample_range = weight, resolution, sample_range self.nan_in_range = np.isnan(np.array(self.sample_range)) - self.new_resolution = self.resolution + self.nan_in_range.sum() + self.new_resolution = self.resolution + if not keep_endpoints: + self.new_resolution += self.nan_in_range.sum() + self.keep_endpoints = keep_endpoints def fit(self, X, y=None): """ @@ -231,8 +240,9 @@ class Silhouette(BaseEstimator, TransformerMixin): y (n x 1 array): persistence diagram labels (unused). """ self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) - self.im_range = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution) - self.im_range = _trim_on_edges(self.im_range, self.nan_in_range) + self.grid_ = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution) + if not self.keep_endpoints: + self.grid_ = _trim_endpoints(self.grid_, self.nan_in_range) return self def transform(self, X): @@ -246,7 +256,7 @@ class Silhouette(BaseEstimator, TransformerMixin): numpy array with shape (number of diagrams) x (**resolution**): output persistence silhouettes. """ Xfit = [] - x_values = self.im_range + x_values = self.grid_ for diag in X: midpoints, heights = (diag[:, 0] + diag[:, 1]) / 2., (diag[:, 1] - diag[:, 0]) / 2. @@ -277,14 +287,15 @@ class BettiCurve(BaseEstimator, TransformerMixin): Compute Betti curves from persistence diagrams. There are several modes of operation: with a given resolution (with or without a sample_range), with a predefined grid, and with none of the previous. With a predefined grid, the class computes the Betti numbers at those grid points. Without a predefined grid, if the resolution is set to None, it can be fit to a list of persistence diagrams and produce a grid that consists of (at least) the filtration values at which at least one of those persistence diagrams changes Betti numbers, and then compute the Betti numbers at those grid points. In the latter mode, the exact Betti curve is computed for the entire real line. Otherwise, if the resolution is given, the Betti curve is obtained by sampling evenly using either the given sample_range or based on the persistence diagrams. """ - def __init__(self, resolution=100, sample_range=[np.nan, np.nan], predefined_grid=None): + def __init__(self, resolution=100, sample_range=[np.nan, np.nan], predefined_grid=None, *, keep_endpoints=False): """ Constructor for the BettiCurve class. Parameters: - resolution (int): number of sample for the piecewise-constant function (default 100). + resolution (int): number of samples for the piecewise-constant function (default 100), or None for the exact curve. sample_range ([double, double]): minimum and maximum of the piecewise-constant function domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method. predefined_grid (1d array or None, default=None): Predefined filtration grid points at which to compute the Betti curves. Must be strictly ordered. Infinities are ok. If None (default), and resolution is given, the grid will be uniform from x_min to x_max in 'resolution' steps, otherwise a grid will be computed that captures all changes in Betti numbers in the provided data. + keep_endpoints (bool): when guessing `sample_range` (fixed `resolution`, no `predefined_grid`), use the exact extremities. This is mostly useful for plotting, the default is to use a slightly smaller range. Attributes: grid_ (1d array): The grid on which the Betti numbers are computed. If predefined_grid was specified, `grid_` will always be that grid, independently of data. If not, the grid is fitted to capture all filtration values at which the Betti numbers change. @@ -313,6 +324,7 @@ class BettiCurve(BaseEstimator, TransformerMixin): self.predefined_grid = predefined_grid self.resolution = resolution self.sample_range = sample_range + self.keep_endpoints = keep_endpoints def is_fitted(self): return hasattr(self, "grid_") @@ -331,8 +343,14 @@ class BettiCurve(BaseEstimator, TransformerMixin): events = np.unique(np.concatenate([pd.flatten() for pd in X] + [[-np.inf]], axis=0)) self.grid_ = np.array(events) else: + self.nan_in_range = np.isnan(np.array(self.sample_range)) + self.new_resolution = self.resolution + if not self.keep_endpoints: + self.new_resolution += self.nan_in_range.sum() self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) - self.grid_ = np.linspace(self.sample_range[0], self.sample_range[1], self.resolution) + self.grid_ = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution) + if not self.keep_endpoints: + self.grid_ = _trim_endpoints(self.grid_, self.nan_in_range) else: self.grid_ = self.predefined_grid # Get the predefined grid from user diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py index 58caab21..9e94feeb 100755 --- a/src/python/test/test_representations.py +++ b/src/python/test/test_representations.py @@ -251,3 +251,15 @@ def test_landscape_nan_range(): lds_dgm = lds(dgm) assert (lds.sample_range[0] == 2) & (lds.sample_range[1] == 6) assert lds.new_resolution == 10 + +def test_endpoints(): + diags = [ np.array([[2., 3.]]) ] + for vec in [ Landscape(), Silhouette(), BettiCurve() ]: + vec.fit(diags) + assert vec.grid_[0] > 2 and vec.grid_[-1] < 3 + for vec in [ Landscape(keep_endpoints=True), Silhouette(keep_endpoints=True), BettiCurve(keep_endpoints=True) ]: + vec.fit(diags) + assert vec.grid_[0] == 2 and vec.grid_[-1] == 3 + vec = BettiCurve(resolution=None) + vec.fit(diags) + assert np.equal(vec.grid_, [-np.inf, 2., 3.]).all() -- cgit v1.2.3 From 7c17408897a95a1f74626e8ff0ec8101ac4f92fd Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Tue, 8 Nov 2022 22:36:16 +0100 Subject: Reject positional arguments in RipsComplex.__init__ --- .github/next_release.md | 3 +++ src/python/gudhi/rips_complex.pyx | 4 ++-- src/python/test/test_simplex_generators.py | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'src/python/test') diff --git a/.github/next_release.md b/.github/next_release.md index 81599b2c..d5fcef1c 100644 --- a/.github/next_release.md +++ b/.github/next_release.md @@ -9,6 +9,9 @@ Below is a list of changes made since GUDHI 3.6.0: - [Module](link) - ... +- [Rips complex](https://gudhi.inria.fr/python/latest/rips_complex_user.html) + - Construction now rejects positional arguments, you need to specify `points=X`. + - Installation - c++17 is the new minimal standard to compile the library. This implies Visual Studio minimal version is now 2017. diff --git a/src/python/gudhi/rips_complex.pyx b/src/python/gudhi/rips_complex.pyx index a0924cd6..d748f91e 100644 --- a/src/python/gudhi/rips_complex.pyx +++ b/src/python/gudhi/rips_complex.pyx @@ -41,7 +41,7 @@ cdef class RipsComplex: cdef Rips_complex_interface thisref # Fake constructor that does nothing but documenting the constructor - def __init__(self, points=None, distance_matrix=None, + def __init__(self, *, points=None, distance_matrix=None, max_edge_length=float('inf'), sparse=None): """RipsComplex constructor. @@ -64,7 +64,7 @@ cdef class RipsComplex: """ # The real cython constructor - def __cinit__(self, points=None, distance_matrix=None, + def __cinit__(self, *, points=None, distance_matrix=None, max_edge_length=float('inf'), sparse=None): if sparse is not None: if distance_matrix is not None: diff --git a/src/python/test/test_simplex_generators.py b/src/python/test/test_simplex_generators.py index 8a9b4844..c567d4c1 100755 --- a/src/python/test/test_simplex_generators.py +++ b/src/python/test/test_simplex_generators.py @@ -14,7 +14,7 @@ import numpy as np def test_flag_generators(): pts = np.array([[0, 0], [0, 1.01], [1, 0], [1.02, 1.03], [100, 0], [100, 3.01], [103, 0], [103.02, 3.03]]) - r = gudhi.RipsComplex(pts, max_edge_length=4) + r = gudhi.RipsComplex(points=pts, max_edge_length=4) st = r.create_simplex_tree(max_dimension=50) st.persistence() g = st.flag_persistence_generators() -- cgit v1.2.3 From ad1123c3c7cfddc1c15e9933b96af08ef3398b3c Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Wed, 9 Nov 2022 17:46:39 +0100 Subject: New write_points_to_off_file --- src/python/CMakeLists.txt | 4 +- src/python/doc/installation.rst | 4 +- src/python/doc/point_cloud.rst | 5 ++ src/python/gudhi/off_reader.pyx | 41 -------------- src/python/gudhi/off_utils.pyx | 57 ++++++++++++++++++++ src/python/test/test_subsampling.py | 103 ++++++++---------------------------- 6 files changed, 88 insertions(+), 126 deletions(-) delete mode 100644 src/python/gudhi/off_reader.pyx create mode 100644 src/python/gudhi/off_utils.pyx (limited to 'src/python/test') diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index 8f8df138..35ddb778 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -53,7 +53,7 @@ if(PYTHONINTERP_FOUND) set(GUDHI_PYTHON_MODULES_EXTRA "${GUDHI_PYTHON_MODULES_EXTRA}'datasets', ") # Cython modules - set(GUDHI_PYTHON_MODULES "${GUDHI_PYTHON_MODULES}'off_reader', ") + set(GUDHI_PYTHON_MODULES "${GUDHI_PYTHON_MODULES}'off_utils', ") set(GUDHI_PYTHON_MODULES "${GUDHI_PYTHON_MODULES}'simplex_tree', ") set(GUDHI_PYTHON_MODULES "${GUDHI_PYTHON_MODULES}'rips_complex', ") set(GUDHI_PYTHON_MODULES "${GUDHI_PYTHON_MODULES}'cubical_complex', ") @@ -152,7 +152,7 @@ if(PYTHONINTERP_FOUND) set(GUDHI_PYTHON_EXTRA_COMPILE_ARGS "${GUDHI_PYTHON_EXTRA_COMPILE_ARGS}'-DCGAL_EIGEN3_ENABLED', ") endif (EIGEN3_FOUND) - set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'off_reader', ") + set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'off_utils', ") set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'simplex_tree', ") set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'rips_complex', ") set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'cubical_complex', ") diff --git a/src/python/doc/installation.rst b/src/python/doc/installation.rst index 276ac4e2..5491542f 100644 --- a/src/python/doc/installation.rst +++ b/src/python/doc/installation.rst @@ -150,7 +150,7 @@ You shall have something like: Cython version 0.29.25 Numpy version 1.21.4 Boost version 1.77.0 - + Installed modules are: off_reader;simplex_tree;rips_complex;cubical_complex;periodic_cubical_complex; + + Installed modules are: off_utils;simplex_tree;rips_complex;cubical_complex;periodic_cubical_complex; persistence_graphical_tools;reader_utils;witness_complex;strong_witness_complex; + Missing modules are: bottleneck;nerve_gic;subsampling;tangential_complex;alpha_complex;euclidean_witness_complex; euclidean_strong_witness_complex; @@ -188,7 +188,7 @@ A complete configuration would be : GMPXX_LIBRARIES = /usr/lib/x86_64-linux-gnu/libgmpxx.so MPFR_LIBRARIES = /usr/lib/x86_64-linux-gnu/libmpfr.so TBB version 9107 found and used - + Installed modules are: bottleneck;off_reader;simplex_tree;rips_complex;cubical_complex;periodic_cubical_complex; + + Installed modules are: bottleneck;off_utils;simplex_tree;rips_complex;cubical_complex;periodic_cubical_complex; persistence_graphical_tools;reader_utils;witness_complex;strong_witness_complex;nerve_gic;subsampling; tangential_complex;alpha_complex;euclidean_witness_complex;euclidean_strong_witness_complex; + Missing modules are: diff --git a/src/python/doc/point_cloud.rst b/src/python/doc/point_cloud.rst index ffd8f85b..473b303f 100644 --- a/src/python/doc/point_cloud.rst +++ b/src/python/doc/point_cloud.rst @@ -13,6 +13,11 @@ File Readers .. autofunction:: gudhi.read_lower_triangular_matrix_from_csv_file +File Writers +------------ + +.. autofunction:: gudhi.write_points_to_off_file + Subsampling ----------- diff --git a/src/python/gudhi/off_reader.pyx b/src/python/gudhi/off_reader.pyx deleted file mode 100644 index a3200704..00000000 --- a/src/python/gudhi/off_reader.pyx +++ /dev/null @@ -1,41 +0,0 @@ -# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - -# which is released under MIT. -# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full -# license details. -# Author(s): Vincent Rouvreau -# -# Copyright (C) 2016 Inria -# -# Modification(s): -# - YYYY/MM Author: Description of the modification - -from __future__ import print_function -from cython cimport numeric -from libcpp.vector cimport vector -from libcpp.string cimport string -import errno -import os - -__author__ = "Vincent Rouvreau" -__copyright__ = "Copyright (C) 2016 Inria" -__license__ = "MIT" - -cdef extern from "Off_reader_interface.h" namespace "Gudhi": - vector[vector[double]] read_points_from_OFF_file(string off_file) - -def read_points_from_off_file(off_file=''): - """Read points from OFF file. - - :param off_file: An OFF file style name. - :type off_file: string - - :returns: The point set. - :rtype: List[List[float]] - """ - if off_file: - if os.path.isfile(off_file): - return read_points_from_OFF_file(off_file.encode('utf-8')) - else: - raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), - off_file) - diff --git a/src/python/gudhi/off_utils.pyx b/src/python/gudhi/off_utils.pyx new file mode 100644 index 00000000..155575d5 --- /dev/null +++ b/src/python/gudhi/off_utils.pyx @@ -0,0 +1,57 @@ +# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - +# which is released under MIT. +# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full +# license details. +# Author(s): Vincent Rouvreau +# +# Copyright (C) 2016 Inria +# +# Modification(s): +# - YYYY/MM Author: Description of the modification + +from __future__ import print_function +from cython cimport numeric +from libcpp.vector cimport vector +from libcpp.string cimport string +cimport cython +import errno +import os +import numpy as np + +__author__ = "Vincent Rouvreau" +__copyright__ = "Copyright (C) 2016 Inria" +__license__ = "MIT" + +cdef extern from "Off_reader_interface.h" namespace "Gudhi": + vector[vector[double]] read_points_from_OFF_file(string off_file) + +def read_points_from_off_file(off_file=''): + """Read points from OFF file. + + :param off_file: An OFF file style name. + :type off_file: string + + :returns: The point set. + :rtype: List[List[float]] + """ + if off_file: + if os.path.isfile(off_file): + return read_points_from_OFF_file(off_file.encode('utf-8')) + else: + raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), + off_file) + +@cython.embedsignature(True) +def write_points_to_off_file(fname, points): + """Write points to an OFF file. + + A simple wrapper for `numpy.savetxt`. + + :param fname: Name of the OFF file. + :type fname: str or file handle + :param points: Point coordinates. + :type points: numpy array of shape (n, dim) + """ + points = np.array(points, copy=False) + assert len(points.shape) == 2 + np.savetxt(fname, points, header='nOFF\n{} {} 0 0'.format(points.shape[1], points.shape[0]), comments='') diff --git a/src/python/test/test_subsampling.py b/src/python/test/test_subsampling.py index 3431f372..c1cb4e3f 100755 --- a/src/python/test/test_subsampling.py +++ b/src/python/test/test_subsampling.py @@ -16,17 +16,9 @@ __license__ = "MIT" def test_write_off_file_for_tests(): - file = open("subsample.off", "w") - file.write("nOFF\n") - file.write("2 7 0 0\n") - file.write("1.0 1.0\n") - file.write("7.0 0.0\n") - file.write("4.0 6.0\n") - file.write("9.0 6.0\n") - file.write("0.0 14.0\n") - file.write("2.0 19.0\n") - file.write("9.0 17.0\n") - file.close() + gudhi.write_points_to_off_file( + "subsample.off", [[1.0, 1.0], [7.0, 0.0], [4.0, 6.0], [9.0, 6.0], [0.0, 14.0], [2.0, 19.0], [9.0, 17.0]] + ) def test_simple_choose_n_farthest_points_with_a_starting_point(): @@ -34,54 +26,29 @@ def test_simple_choose_n_farthest_points_with_a_starting_point(): i = 0 for point in point_set: # The iteration starts with the given starting point - sub_set = gudhi.choose_n_farthest_points( - points=point_set, nb_points=1, starting_point=i - ) + sub_set = gudhi.choose_n_farthest_points(points=point_set, nb_points=1, starting_point=i) assert sub_set[0] == point_set[i] i = i + 1 # The iteration finds then the farthest - sub_set = gudhi.choose_n_farthest_points( - points=point_set, nb_points=2, starting_point=1 - ) + sub_set = gudhi.choose_n_farthest_points(points=point_set, nb_points=2, starting_point=1) assert sub_set[1] == point_set[3] - sub_set = gudhi.choose_n_farthest_points( - points=point_set, nb_points=2, starting_point=3 - ) + sub_set = gudhi.choose_n_farthest_points(points=point_set, nb_points=2, starting_point=3) assert sub_set[1] == point_set[1] - sub_set = gudhi.choose_n_farthest_points( - points=point_set, nb_points=2, starting_point=0 - ) + sub_set = gudhi.choose_n_farthest_points(points=point_set, nb_points=2, starting_point=0) assert sub_set[1] == point_set[2] - sub_set = gudhi.choose_n_farthest_points( - points=point_set, nb_points=2, starting_point=2 - ) + sub_set = gudhi.choose_n_farthest_points(points=point_set, nb_points=2, starting_point=2) assert sub_set[1] == point_set[0] # Test the limits - assert ( - gudhi.choose_n_farthest_points(points=[], nb_points=0, starting_point=0) == [] - ) - assert ( - gudhi.choose_n_farthest_points(points=[], nb_points=1, starting_point=0) == [] - ) - assert ( - gudhi.choose_n_farthest_points(points=[], nb_points=0, starting_point=1) == [] - ) - assert ( - gudhi.choose_n_farthest_points(points=[], nb_points=1, starting_point=1) == [] - ) + assert gudhi.choose_n_farthest_points(points=[], nb_points=0, starting_point=0) == [] + assert gudhi.choose_n_farthest_points(points=[], nb_points=1, starting_point=0) == [] + assert gudhi.choose_n_farthest_points(points=[], nb_points=0, starting_point=1) == [] + assert gudhi.choose_n_farthest_points(points=[], nb_points=1, starting_point=1) == [] # From off file test for i in range(0, 7): - assert ( - len( - gudhi.choose_n_farthest_points( - off_file="subsample.off", nb_points=i, starting_point=i - ) - ) - == i - ) + assert len(gudhi.choose_n_farthest_points(off_file="subsample.off", nb_points=i, starting_point=i)) == i def test_simple_choose_n_farthest_points_randomed(): @@ -104,10 +71,7 @@ def test_simple_choose_n_farthest_points_randomed(): # From off file test for i in range(0, 7): - assert ( - len(gudhi.choose_n_farthest_points(off_file="subsample.off", nb_points=i)) - == i - ) + assert len(gudhi.choose_n_farthest_points(off_file="subsample.off", nb_points=i)) == i def test_simple_pick_n_random_points(): @@ -130,9 +94,7 @@ def test_simple_pick_n_random_points(): # From off file test for i in range(0, 7): - assert ( - len(gudhi.pick_n_random_points(off_file="subsample.off", nb_points=i)) == i - ) + assert len(gudhi.pick_n_random_points(off_file="subsample.off", nb_points=i)) == i def test_simple_sparsify_points(): @@ -152,31 +114,10 @@ def test_simple_sparsify_points(): ] assert gudhi.sparsify_point_set(points=point_set, min_squared_dist=2.001) == [[0, 1]] - assert ( - len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=0.0)) - == 7 - ) - assert ( - len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=30.0)) - == 5 - ) - assert ( - len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=40.1)) - == 4 - ) - assert ( - len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=89.9)) - == 3 - ) - assert ( - len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=100.0)) - == 2 - ) - assert ( - len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=324.9)) - == 2 - ) - assert ( - len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=325.01)) - == 1 - ) + assert len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=0.0)) == 7 + assert len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=30.0)) == 5 + assert len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=40.1)) == 4 + assert len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=89.9)) == 3 + assert len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=100.0)) == 2 + assert len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=324.9)) == 2 + assert len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=325.01)) == 1 -- cgit v1.2.3 From 2d5039b7eeb16116ab859076aa0a93f092250d88 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Fri, 11 Nov 2022 18:59:42 +0100 Subject: Special case for writing 3d OFF --- src/python/CMakeLists.txt | 1 + src/python/gudhi/off_utils.pyx | 7 ++++++- src/python/test/test_off.py | 21 +++++++++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 src/python/test/test_off.py (limited to 'src/python/test') diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index 35ddb778..32ec13bd 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -546,6 +546,7 @@ if(PYTHONINTERP_FOUND) # Reader utils add_gudhi_py_test(test_reader_utils) + add_gudhi_py_test(test_off) # Wasserstein if(OT_FOUND) diff --git a/src/python/gudhi/off_utils.pyx b/src/python/gudhi/off_utils.pyx index a8142791..9276c7b0 100644 --- a/src/python/gudhi/off_utils.pyx +++ b/src/python/gudhi/off_utils.pyx @@ -54,4 +54,9 @@ def write_points_to_off_file(fname, points): """ points = np.array(points, copy=False) assert len(points.shape) == 2 - np.savetxt(fname, points, header='nOFF\n{} {} 0 0'.format(points.shape[1], points.shape[0]), comments='') + dim = points.shape[1] + if dim == 3: + head = 'OFF\n{} 0 0'.format(points.shape[0]) + else: + head = 'nOFF\n{} {} 0 0'.format(dim, points.shape[0]) + np.savetxt(fname, points, header=head, comments='') diff --git a/src/python/test/test_off.py b/src/python/test/test_off.py new file mode 100644 index 00000000..69bfa1f9 --- /dev/null +++ b/src/python/test/test_off.py @@ -0,0 +1,21 @@ +""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. + See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. + Author(s): Marc Glisse + + Copyright (C) 2022 Inria + + Modification(s): + - YYYY/MM Author: Description of the modification +""" + +import gudhi as gd +import numpy as np +import pytest + + +def test_off_rw(): + for dim in range(2, 6): + X = np.random.rand(123, dim) + gd.write_points_to_off_file('rand.off', X) + Y = gd.read_points_from_off_file('rand.off') + assert Y == pytest.approx(X) -- cgit v1.2.3 From f4f930169fbf2db2707d69b334cfe5b941c64350 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Fri, 11 Nov 2022 19:03:54 +0100 Subject: black --- src/python/test/test_off.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/python/test') diff --git a/src/python/test/test_off.py b/src/python/test/test_off.py index 69bfa1f9..aea1941b 100644 --- a/src/python/test/test_off.py +++ b/src/python/test/test_off.py @@ -16,6 +16,6 @@ import pytest def test_off_rw(): for dim in range(2, 6): X = np.random.rand(123, dim) - gd.write_points_to_off_file('rand.off', X) - Y = gd.read_points_from_off_file('rand.off') + gd.write_points_to_off_file("rand.off", X) + Y = gd.read_points_from_off_file("rand.off") assert Y == pytest.approx(X) -- cgit v1.2.3 From 2f0db9e495afe774409f4b0acb823e1b984aeb71 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 14 Nov 2022 16:24:30 +0100 Subject: endpoints for Entropy, idempotent fit(), refactor grid_ --- src/python/gudhi/representations/vector_methods.py | 65 ++++++++++------------ src/python/test/test_representations.py | 8 +-- 2 files changed, 33 insertions(+), 40 deletions(-) (limited to 'src/python/test') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 212fa9f5..f0bc9f95 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -101,7 +101,7 @@ class PersistenceImage(BaseEstimator, TransformerMixin): """ return self.fit_transform([diag])[0,:] -def _automatic_sample_range(sample_range, X, y): +def _automatic_sample_range(sample_range, X): """ Compute and returns sample range from the persistence diagrams if one of the sample_range values is numpy.nan. @@ -114,7 +114,7 @@ def _automatic_sample_range(sample_range, X, y): nan_in_range = np.isnan(sample_range) if nan_in_range.any(): try: - pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y) + pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X) [mx,my] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]] [Mx,My] = [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]] return np.where(nan_in_range, np.array([mx, My]), sample_range) @@ -132,6 +132,18 @@ def _trim_endpoints(x, are_endpoints_nan): return x +def _grid_from_sample_range(self, X): + sample_range = np.array(self.sample_range_init) + self.nan_in_range = np.isnan(sample_range) + self.new_resolution = self.resolution + if not self.keep_endpoints: + self.new_resolution += self.nan_in_range.sum() + self.sample_range = _automatic_sample_range(sample_range, X) + self.grid_ = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution) + if not self.keep_endpoints: + self.grid_ = _trim_endpoints(self.grid_, self.nan_in_range) + + class Landscape(BaseEstimator, TransformerMixin): """ This is a class for computing persistence landscapes from a list of persistence diagrams. A persistence landscape is a collection of 1D piecewise-linear functions computed from the rank function associated to the persistence diagram. These piecewise-linear functions are then sampled evenly on a given range and the corresponding vectors of samples are concatenated and returned. See http://jmlr.org/papers/v16/bubenik15a.html for more details. @@ -146,11 +158,7 @@ class Landscape(BaseEstimator, TransformerMixin): sample_range ([double, double]): minimum and maximum of all piecewise-linear function domains, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method. keep_endpoints (bool): when guessing `sample_range`, use the exact extremities (where the value is always 0). This is mostly useful for plotting, the default is to use a slightly smaller range. """ - self.num_landscapes, self.resolution, self.sample_range = num_landscapes, resolution, sample_range - self.nan_in_range = np.isnan(np.array(self.sample_range)) - self.new_resolution = self.resolution - if not keep_endpoints: - self.new_resolution += self.nan_in_range.sum() + self.num_landscapes, self.resolution, self.sample_range_init = num_landscapes, resolution, sample_range self.keep_endpoints = keep_endpoints def fit(self, X, y=None): @@ -161,10 +169,7 @@ class Landscape(BaseEstimator, TransformerMixin): X (list of n x 2 numpy arrays): input persistence diagrams. y (n x 1 array): persistence diagram labels (unused). """ - self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) - self.grid_ = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution) - if not self.keep_endpoints: - self.grid_ = _trim_endpoints(self.grid_, self.nan_in_range) + _grid_from_sample_range(self, X) return self def transform(self, X): @@ -224,11 +229,7 @@ class Silhouette(BaseEstimator, TransformerMixin): sample_range ([double, double]): minimum and maximum for the weighted average domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method. keep_endpoints (bool): when guessing `sample_range`, use the exact extremities (where the value is always 0). This is mostly useful for plotting, the default is to use a slightly smaller range. """ - self.weight, self.resolution, self.sample_range = weight, resolution, sample_range - self.nan_in_range = np.isnan(np.array(self.sample_range)) - self.new_resolution = self.resolution - if not keep_endpoints: - self.new_resolution += self.nan_in_range.sum() + self.weight, self.resolution, self.sample_range_init = weight, resolution, sample_range self.keep_endpoints = keep_endpoints def fit(self, X, y=None): @@ -239,10 +240,7 @@ class Silhouette(BaseEstimator, TransformerMixin): X (list of n x 2 numpy arrays): input persistence diagrams. y (n x 1 array): persistence diagram labels (unused). """ - self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) - self.grid_ = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution) - if not self.keep_endpoints: - self.grid_ = _trim_endpoints(self.grid_, self.nan_in_range) + _grid_from_sample_range(self, X) return self def transform(self, X): @@ -323,7 +321,7 @@ class BettiCurve(BaseEstimator, TransformerMixin): self.predefined_grid = predefined_grid self.resolution = resolution - self.sample_range = sample_range + self.sample_range_init = sample_range self.keep_endpoints = keep_endpoints def is_fitted(self): @@ -343,14 +341,7 @@ class BettiCurve(BaseEstimator, TransformerMixin): events = np.unique(np.concatenate([pd.flatten() for pd in X] + [[-np.inf]], axis=0)) self.grid_ = np.array(events) else: - self.nan_in_range = np.isnan(np.array(self.sample_range)) - self.new_resolution = self.resolution - if not self.keep_endpoints: - self.new_resolution += self.nan_in_range.sum() - self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) - self.grid_ = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution) - if not self.keep_endpoints: - self.grid_ = _trim_endpoints(self.grid_, self.nan_in_range) + _grid_from_sample_range(self, X) else: self.grid_ = self.predefined_grid # Get the predefined grid from user @@ -450,7 +441,7 @@ class Entropy(BaseEstimator, TransformerMixin): """ This is a class for computing persistence entropy. Persistence entropy is a statistic for persistence diagrams inspired from Shannon entropy. This statistic can also be used to compute a feature vector, called the entropy summary function. See https://arxiv.org/pdf/1803.08304.pdf for more details. Note that a previous implementation was contributed by Manuel Soriano-Trigueros. """ - def __init__(self, mode="scalar", normalized=True, resolution=100, sample_range=[np.nan, np.nan]): + def __init__(self, mode="scalar", normalized=True, resolution=100, sample_range=[np.nan, np.nan], *, keep_endpoints=False): """ Constructor for the Entropy class. @@ -459,8 +450,10 @@ class Entropy(BaseEstimator, TransformerMixin): normalized (bool): whether to normalize the entropy summary function (default True). Used only if **mode** = "vector". resolution (int): number of sample for the entropy summary function (default 100). Used only if **mode** = "vector". sample_range ([double, double]): minimum and maximum of the entropy summary function domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method. Used only if **mode** = "vector". + keep_endpoints (bool): when guessing `sample_range`, use the exact extremities. This is mostly useful for plotting, the default is to use a slightly smaller range. """ - self.mode, self.normalized, self.resolution, self.sample_range = mode, normalized, resolution, sample_range + self.mode, self.normalized, self.resolution, self.sample_range_init = mode, normalized, resolution, sample_range + self.keep_endpoints = keep_endpoints def fit(self, X, y=None): """ @@ -470,7 +463,9 @@ class Entropy(BaseEstimator, TransformerMixin): X (list of n x 2 numpy arrays): input persistence diagrams. y (n x 1 array): persistence diagram labels (unused). """ - self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y) + if self.mode == "vector": + _grid_from_sample_range(self, X) + self.step_ = self.grid_[1] - self.grid_[0] return self def transform(self, X): @@ -484,8 +479,6 @@ class Entropy(BaseEstimator, TransformerMixin): numpy array with shape (number of diagrams) x (1 if **mode** = "scalar" else **resolution**): output entropy. """ num_diag, Xfit = len(X), [] - x_values = np.linspace(self.sample_range[0], self.sample_range[1], self.resolution) - step_x = x_values[1] - x_values[0] new_X = BirthPersistenceTransform().fit_transform(X) for i in range(num_diag): @@ -500,8 +493,8 @@ class Entropy(BaseEstimator, TransformerMixin): ent = np.zeros(self.resolution) for j in range(num_pts_in_diag): [px,py] = orig_diagram[j,:2] - min_idx = np.clip(np.ceil((px - self.sample_range[0]) / step_x).astype(int), 0, self.resolution) - max_idx = np.clip(np.ceil((py - self.sample_range[0]) / step_x).astype(int), 0, self.resolution) + min_idx = np.clip(np.ceil((px - self.sample_range[0]) / self.step_).astype(int), 0, self.resolution) + max_idx = np.clip(np.ceil((py - self.sample_range[0]) / self.step_).astype(int), 0, self.resolution) ent[min_idx:max_idx]-=p[j]*np.log(p[j]) if self.normalized: ent = ent / np.linalg.norm(ent, ord=1) diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py index 9e94feeb..ae0362f8 100755 --- a/src/python/test/test_representations.py +++ b/src/python/test/test_representations.py @@ -161,7 +161,7 @@ def test_entropy_miscalculation(): return -np.dot(l, np.log(l)) sce = Entropy(mode="scalar") assert [[pe(diag_ex)]] == sce.fit_transform([diag_ex]) - sce = Entropy(mode="vector", resolution=4, normalized=False) + sce = Entropy(mode="vector", resolution=4, normalized=False, keep_endpoints=True) pef = [-1/4*np.log(1/4)-1/4*np.log(1/4)-1/2*np.log(1/2), -1/4*np.log(1/4)-1/4*np.log(1/4)-1/2*np.log(1/2), -1/2*np.log(1/2), @@ -170,7 +170,7 @@ def test_entropy_miscalculation(): sce = Entropy(mode="vector", resolution=4, normalized=True) pefN = (sce.fit_transform([diag_ex]))[0] area = np.linalg.norm(pefN, ord=1) - assert area==1 + assert area==pytest.approx(1) def test_kernel_empty_diagrams(): empty_diag = np.empty(shape = [0, 2]) @@ -254,10 +254,10 @@ def test_landscape_nan_range(): def test_endpoints(): diags = [ np.array([[2., 3.]]) ] - for vec in [ Landscape(), Silhouette(), BettiCurve() ]: + for vec in [ Landscape(), Silhouette(), BettiCurve(), Entropy(mode="vector") ]: vec.fit(diags) assert vec.grid_[0] > 2 and vec.grid_[-1] < 3 - for vec in [ Landscape(keep_endpoints=True), Silhouette(keep_endpoints=True), BettiCurve(keep_endpoints=True) ]: + for vec in [ Landscape(keep_endpoints=True), Silhouette(keep_endpoints=True), BettiCurve(keep_endpoints=True), Entropy(mode="vector", keep_endpoints=True)]: vec.fit(diags) assert vec.grid_[0] == 2 and vec.grid_[-1] == 3 vec = BettiCurve(resolution=None) -- cgit v1.2.3 From 02b59e350fac3688b7dee24abe3c058739508aa5 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Wed, 16 Nov 2022 00:59:11 +0100 Subject: Use float for max_filtration in test Co-authored-by: Vincent Rouvreau <10407034+VincentRouvreau@users.noreply.github.com> --- src/python/test/test_simplex_tree.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/python/test') diff --git a/src/python/test/test_simplex_tree.py b/src/python/test/test_simplex_tree.py index 59fd889a..2ccbfbf5 100755 --- a/src/python/test/test_simplex_tree.py +++ b/src/python/test/test_simplex_tree.py @@ -528,7 +528,7 @@ def test_simplex_tree_constructor_exception(): def test_create_from_array(): a = np.array([[1, 4, 13, 6], [4, 3, 11, 5], [13, 11, 10, 12], [6, 5, 12, 2]]) - st = SimplexTree.create_from_array(a, max_filtration=5) + st = SimplexTree.create_from_array(a, max_filtration=5.0) assert list(st.get_filtration()) == [([0], 1.0), ([3], 2.0), ([1], 3.0), ([0, 1], 4.0), ([1, 3], 5.0)] -- cgit v1.2.3 From ed6decde6114988606f53d2a69e48afb5316cd77 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Fri, 18 Nov 2022 16:08:30 +0100 Subject: pytest.warns(None) deprecated The warnings points to https://docs.pytest.org/en/latest/how-to/capture-warnings.html#additional-use-cases-of-warnings-in-tests --- src/python/test/test_persistence_graphical_tools.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/python/test') diff --git a/src/python/test/test_persistence_graphical_tools.py b/src/python/test/test_persistence_graphical_tools.py index c19836b7..0e2ac3f8 100644 --- a/src/python/test/test_persistence_graphical_tools.py +++ b/src/python/test/test_persistence_graphical_tools.py @@ -12,6 +12,7 @@ import gudhi as gd import numpy as np import matplotlib as plt import pytest +import warnings def test_array_handler(): @@ -71,13 +72,13 @@ def test_limit_to_max_intervals(): (0, (0.0, 0.106382)), ] # check no warnings if max_intervals equals to the diagrams number - with pytest.warns(None) as record: + with warnings.catch_warnings(): + warnings.simplefilter("error") truncated_diags = gd.persistence_graphical_tools._limit_to_max_intervals( diags, 10, key=lambda life_time: life_time[1][1] - life_time[1][0] ) # check diagrams are not sorted assert truncated_diags == diags - assert len(record) == 0 # check warning if max_intervals lower than the diagrams number with pytest.warns(UserWarning) as record: -- cgit v1.2.3 From 1d90a301af29608da143be70b75d5a4fddc88e57 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Fri, 18 Nov 2022 16:19:29 +0100 Subject: Catch warnings in wasserstein test --- src/python/test/test_wasserstein_distance.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'src/python/test') diff --git a/src/python/test/test_wasserstein_distance.py b/src/python/test/test_wasserstein_distance.py index 3a004d77..a76b6ce7 100755 --- a/src/python/test/test_wasserstein_distance.py +++ b/src/python/test/test_wasserstein_distance.py @@ -90,10 +90,11 @@ def test_get_essential_parts(): def test_warn_infty(): - assert _warn_infty(matching=False)==np.inf - c, m = _warn_infty(matching=True) - assert (c == np.inf) - assert (m is None) + with pytest.warns(UserWarning): + assert _warn_infty(matching=False)==np.inf + c, m = _warn_infty(matching=True) + assert (c == np.inf) + assert (m is None) def _basic_wasserstein(wasserstein_distance, delta, test_infinity=True, test_matching=True): -- cgit v1.2.3 From 86689f89bf896e41683fd7b1a4568f2b34ea505d Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Tue, 3 Jan 2023 21:23:49 +0100 Subject: fix get_params --- src/python/gudhi/representations/vector_methods.py | 18 +++++++++--------- src/python/test/test_representations.py | 6 +++++- 2 files changed, 14 insertions(+), 10 deletions(-) (limited to 'src/python/test') diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 745fe1e5..ce74aee5 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -138,13 +138,13 @@ def _trim_endpoints(x, are_endpoints_nan): def _grid_from_sample_range(self, X): - sample_range = np.array(self.sample_range_init) + sample_range = np.array(self.sample_range) self.nan_in_range = np.isnan(sample_range) self.new_resolution = self.resolution if not self.keep_endpoints: self.new_resolution += self.nan_in_range.sum() - self.sample_range = _automatic_sample_range(sample_range, X) - self.grid_ = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution) + self.sample_range_fixed = _automatic_sample_range(sample_range, X) + self.grid_ = np.linspace(self.sample_range_fixed[0], self.sample_range_fixed[1], self.new_resolution) if not self.keep_endpoints: self.grid_ = _trim_endpoints(self.grid_, self.nan_in_range) @@ -166,7 +166,7 @@ class Landscape(BaseEstimator, TransformerMixin): sample_range ([double, double]): minimum and maximum of all piecewise-linear function domains, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method. keep_endpoints (bool): when computing `sample_range`, use the exact extremities (where the value is always 0). This is mostly useful for plotting, the default is to use a slightly smaller range. """ - self.num_landscapes, self.resolution, self.sample_range_init = num_landscapes, resolution, sample_range + self.num_landscapes, self.resolution, self.sample_range = num_landscapes, resolution, sample_range self.keep_endpoints = keep_endpoints def fit(self, X, y=None): @@ -240,7 +240,7 @@ class Silhouette(BaseEstimator, TransformerMixin): sample_range ([double, double]): minimum and maximum for the weighted average domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method. keep_endpoints (bool): when computing `sample_range`, use the exact extremities (where the value is always 0). This is mostly useful for plotting, the default is to use a slightly smaller range. """ - self.weight, self.resolution, self.sample_range_init = weight, resolution, sample_range + self.weight, self.resolution, self.sample_range = weight, resolution, sample_range self.keep_endpoints = keep_endpoints def fit(self, X, y=None): @@ -334,7 +334,7 @@ class BettiCurve(BaseEstimator, TransformerMixin): self.predefined_grid = predefined_grid self.resolution = resolution - self.sample_range_init = sample_range + self.sample_range = sample_range self.keep_endpoints = keep_endpoints def is_fitted(self): @@ -468,7 +468,7 @@ class Entropy(BaseEstimator, TransformerMixin): sample_range ([double, double]): minimum and maximum of the entropy summary function domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method. Used only if **mode** = "vector". keep_endpoints (bool): when computing `sample_range`, use the exact extremities. This is mostly useful for plotting, the default is to use a slightly smaller range. """ - self.mode, self.normalized, self.resolution, self.sample_range_init = mode, normalized, resolution, sample_range + self.mode, self.normalized, self.resolution, self.sample_range = mode, normalized, resolution, sample_range self.keep_endpoints = keep_endpoints def fit(self, X, y=None): @@ -509,8 +509,8 @@ class Entropy(BaseEstimator, TransformerMixin): ent = np.zeros(self.resolution) for j in range(num_pts_in_diag): [px,py] = orig_diagram[j,:2] - min_idx = np.clip(np.ceil((px - self.sample_range[0]) / self.step_).astype(int), 0, self.resolution) - max_idx = np.clip(np.ceil((py - self.sample_range[0]) / self.step_).astype(int), 0, self.resolution) + min_idx = np.clip(np.ceil((px - self.sample_range_fixed[0]) / self.step_).astype(int), 0, self.resolution) + max_idx = np.clip(np.ceil((py - self.sample_range_fixed[0]) / self.step_).astype(int), 0, self.resolution) ent[min_idx:max_idx]-=p[j]*np.log(p[j]) if self.normalized: ent = ent / np.linalg.norm(ent, ord=1) diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py index ae0362f8..f4ffbdc1 100755 --- a/src/python/test/test_representations.py +++ b/src/python/test/test_representations.py @@ -249,7 +249,7 @@ def test_landscape_nan_range(): dgm = np.array([[2., 6.], [3., 5.]]) lds = Landscape(num_landscapes=2, resolution=9, sample_range=[np.nan, 6.]) lds_dgm = lds(dgm) - assert (lds.sample_range[0] == 2) & (lds.sample_range[1] == 6) + assert (lds.sample_range_fixed[0] == 2) & (lds.sample_range_fixed[1] == 6) assert lds.new_resolution == 10 def test_endpoints(): @@ -263,3 +263,7 @@ def test_endpoints(): vec = BettiCurve(resolution=None) vec.fit(diags) assert np.equal(vec.grid_, [-np.inf, 2., 3.]).all() + +def test_get_params(): + for vec in [ Landscape(), Silhouette(), BettiCurve(), Entropy(mode="vector") ]: + vec.get_params() -- cgit v1.2.3