From 34207229b3ab2936aecd953997286a0daab88a83 Mon Sep 17 00:00:00 2001
From: Marc Glisse <marc.glisse@inria.fr>
Date: Fri, 29 May 2020 17:56:45 +0200
Subject: convert matrix to SimplexTree

---
 src/python/gudhi/simplex_tree.pxd |  1 +
 src/python/gudhi/simplex_tree.pyx | 25 +++++++++++++++++++++++++
 2 files changed, 26 insertions(+)

(limited to 'src/python/gudhi')
diff --git a/src/python/gudhi/simplex_tree.pxd b/src/python/gudhi/simplex_tree.pxd
index e748ac40..3bd9b080 100644
--- a/src/python/gudhi/simplex_tree.pxd
+++ b/src/python/gudhi/simplex_tree.pxd
@@ -49,6 +49,7 @@ cdef extern from "Simplex_tree_interface.h" namespace "Gudhi":
         int upper_bound_dimension() nogil
         bool find_simplex(vector[int] simplex) nogil
         bool insert(vector[int] simplex, double filtration) nogil
+        void insert_matrix(double* filtrations, int n, int stride0, int stride1, double max_filtration) nogil
         vector[pair[vector[int], double]] get_star(vector[int] simplex) nogil
         vector[pair[vector[int], double]] get_cofaces(vector[int] simplex, int dimension) nogil
         void expansion(int max_dim) nogil except +
diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx
index 9cb24221..261f7e1b 100644
--- a/src/python/gudhi/simplex_tree.pyx
+++ b/src/python/gudhi/simplex_tree.pyx
@@ -195,6 +195,31 @@ cdef class SimplexTree:
         """
         return self.get_ptr().insert(simplex, <double>filtration)
 
+    def insert_array(self, filtrations, double max_filtration=numpy.inf):
+        """Inserts edges in an empty complex. The vertices are numbered from 0 to n, and the filtration values are
+        encoded in the array, with the diagonal representing the vertices. It is the caller's responsibility to
+        ensure that this defines a filtration, which can be achieved with either::
+
+            filtrations[np.diag_indices_from(filtrations)] = filtrations.min(1)
+
+        or::
+
+            diag = filtrations.diagonal()
+            filtrations = np.fmax(np.fmax(filtrations, diag[:, None]), diag[None, :])
+
+        :param filtrations: the filtration values of the vertices and edges to insert. The matrix is assumed to be symmetric.
+        :type filtrations: numpy.ndarray of shape (n,n)
+        :param max_filtration: only insert vertices and edges with filtration values no larger than max_filtration
+        :type max_filtration: float
+        """
+        filtrations = numpy.asanyarray(filtrations, dtype=float)
+        cdef double[:,:] F = filtrations
+        assert self.num_vertices() == 0, "insert_array requires an empty SimplexTree"
+        cdef int n = F.shape[0]
+        assert n == F.shape[1]
+        with nogil:
+            self.get_ptr().insert_matrix(&F[0,0], n, F.strides[0], F.strides[1], max_filtration)
+
     def get_simplices(self):
         """This function returns a generator with simplices and their given
         filtration values.
-- 
cgit v1.2.3


From 598a6bb607edd079c3c6c49dc631f987621b12cb Mon Sep 17 00:00:00 2001
From: Marc Glisse <marc.glisse@inria.fr>
Date: Wed, 8 Jul 2020 21:31:10 +0200
Subject: Minor doc tweak

---
 src/python/gudhi/simplex_tree.pyx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx
index 5e032e2f..557a80a9 100644
--- a/src/python/gudhi/simplex_tree.pyx
+++ b/src/python/gudhi/simplex_tree.pyx
@@ -196,7 +196,7 @@ cdef class SimplexTree:
         return self.get_ptr().insert(simplex, <double>filtration)
 
     def insert_array(self, filtrations, double max_filtration=numpy.inf):
-        """Inserts edges in an empty complex. The vertices are numbered from 0 to n, and the filtration values are
+        """Inserts edges in an empty complex. The vertices are numbered from 0 to n-1, and the filtration values are
         encoded in the array, with the diagonal representing the vertices. It is the caller's responsibility to
         ensure that this defines a filtration, which can be achieved with either::
 
@@ -208,7 +208,7 @@ cdef class SimplexTree:
             filtrations = np.fmax(np.fmax(filtrations, diag[:, None]), diag[None, :])
 
         :param filtrations: the filtration values of the vertices and edges to insert. The matrix is assumed to be symmetric.
-        :type filtrations: numpy.ndarray of shape (n,n)
+        :type filtrations: symmetric numpy.ndarray of shape (n,n)
         :param max_filtration: only insert vertices and edges with filtration values no larger than max_filtration
         :type max_filtration: float
         """
-- 
cgit v1.2.3


From 75c94c33897342392e88d18e7dc0c4dbd3cade8b Mon Sep 17 00:00:00 2001
From: Marc Glisse <marc.glisse@inria.fr>
Date: Wed, 8 Jul 2020 21:55:45 +0200
Subject: Rename insert_array -> insert_edges_from_array

---
 src/python/gudhi/simplex_tree.pyx    | 4 ++--
 src/python/test/test_simplex_tree.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx
index 557a80a9..d2d3f459 100644
--- a/src/python/gudhi/simplex_tree.pyx
+++ b/src/python/gudhi/simplex_tree.pyx
@@ -195,7 +195,7 @@ cdef class SimplexTree:
         """
         return self.get_ptr().insert(simplex, <double>filtration)
 
-    def insert_array(self, filtrations, double max_filtration=numpy.inf):
+    def insert_edges_from_array(self, filtrations, double max_filtration=numpy.inf):
         """Inserts edges in an empty complex. The vertices are numbered from 0 to n-1, and the filtration values are
         encoded in the array, with the diagonal representing the vertices. It is the caller's responsibility to
         ensure that this defines a filtration, which can be achieved with either::
@@ -214,7 +214,7 @@ cdef class SimplexTree:
         """
         filtrations = numpy.asanyarray(filtrations, dtype=float)
         cdef double[:,:] F = filtrations
-        assert self.num_vertices() == 0, "insert_array requires an empty SimplexTree"
+        assert self.num_vertices() == 0, "insert_edges_from_array requires an empty SimplexTree"
         cdef int n = F.shape[0]
         assert n == F.shape[1]
         with nogil:
diff --git a/src/python/test/test_simplex_tree.py b/src/python/test/test_simplex_tree.py
index f75be58d..02ad63cc 100755
--- a/src/python/test/test_simplex_tree.py
+++ b/src/python/test/test_simplex_tree.py
@@ -342,8 +342,8 @@ def test_simplices_iterator():
         print("filtration is: ", simplex[1])
         assert st.filtration(simplex[0]) == simplex[1]
 
-def test_insert_array():
+def test_insert_edges_from_array():
     st = SimplexTree()
     a = np.array([[1, 4, 13, 6], [4, 3, 11, 5], [13, 11, 10, 12], [6, 5, 12, 2]])
-    st.insert_array(a, max_filtration=5)
+    st.insert_edges_from_array(a, max_filtration=5)
     assert list(st.get_filtration()) == [([0], 1.0), ([3], 2.0), ([1], 3.0), ([0, 1], 4.0), ([1, 3], 5.0)]
-- 
cgit v1.2.3


From 83acc2e74cd8a34f34d0082c85ea85b3260d2458 Mon Sep 17 00:00:00 2001
From: Marc Glisse <marc.glisse@inria.fr>
Date: Wed, 8 Jul 2020 23:35:42 +0200
Subject: insert edges from sparse matrix

---
 src/python/gudhi/simplex_tree.pyx    |  20 +++-
 src/python/test/test_simplex_tree.py | 176 ++++++++++++++++++++++-------------
 2 files changed, 128 insertions(+), 68 deletions(-)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx
index d2d3f459..9d2c30a9 100644
--- a/src/python/gudhi/simplex_tree.pyx
+++ b/src/python/gudhi/simplex_tree.pyx
@@ -208,10 +208,11 @@ cdef class SimplexTree:
             filtrations = np.fmax(np.fmax(filtrations, diag[:, None]), diag[None, :])
 
         :param filtrations: the filtration values of the vertices and edges to insert. The matrix is assumed to be symmetric.
-        :type filtrations: symmetric numpy.ndarray of shape (n,n)
+        :type filtrations: numpy.ndarray of shape (n,n)
         :param max_filtration: only insert vertices and edges with filtration values no larger than max_filtration
         :type max_filtration: float
         """
+        # TODO: document which half of the matrix is actually read?
         filtrations = numpy.asanyarray(filtrations, dtype=float)
         cdef double[:,:] F = filtrations
         assert self.num_vertices() == 0, "insert_edges_from_array requires an empty SimplexTree"
@@ -220,6 +221,23 @@ cdef class SimplexTree:
         with nogil:
             self.get_ptr().insert_matrix(&F[0,0], n, F.strides[0], F.strides[1], max_filtration)
 
+    def insert_edges_from_coo_matrix(self, edges):
+        """Inserts edges given by a sparse matrix in `COOrdinate format
+        <https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.coo_matrix.html>`_.
+        Duplicate entries are not allowed. Missing entries are not inserted. Diagonal entries are interpreted as
+        vertices, although this is only useful if you want to insert vertices with a smaller filtration value than
+        the smallest edge containing it, since vertices are implicitly inserted together with the edges.
+
+        :param edges: the edges to insert and their filtration values.
+        :type edges: scipy.sparse.coo_matrix of shape (n,n)
+        """
+        # TODO: optimize this
+        for edge in zip(edges.row, edges.col, edges.data):
+            if edge[0] == edge[1]:
+                self.get_ptr().insert((edge[0],), edge[2])
+            else:
+                self.get_ptr().insert((edge[0], edge[1]), edge[2])
+
     def get_simplices(self):
         """This function returns a generator with simplices and their given
         filtration values.
diff --git a/src/python/test/test_simplex_tree.py b/src/python/test/test_simplex_tree.py
index 02ad63cc..c6c5dc0e 100755
--- a/src/python/test/test_simplex_tree.py
+++ b/src/python/test/test_simplex_tree.py
@@ -249,6 +249,7 @@ def test_make_filtration_non_decreasing():
     assert st.filtration([3, 4]) == 2.0
     assert st.filtration([4, 5]) == 2.0
 
+
 def test_extend_filtration():
 
     # Inserted simplex:
@@ -257,82 +258,83 @@ def test_extend_filtration():
     #     / \ /
     #    o   o
     #   /2\ /3
-    #  o   o        
-    #  1   0        
-
-    st = SimplexTree()                                                                                                                     
-    st.insert([0,2])
-    st.insert([1,2])
-    st.insert([0,3])
-    st.insert([2,5])
-    st.insert([3,4])
-    st.insert([3,5])                                                                                                         
-    st.assign_filtration([0], 1.)                                                                                                                
-    st.assign_filtration([1], 2.)                                                                                                                
-    st.assign_filtration([2], 3.)                                                                                                                
-    st.assign_filtration([3], 4.)                                                                                                                
-    st.assign_filtration([4], 5.)                                                                                                                
-    st.assign_filtration([5], 6.)                                                                                                                
-
-    assert list(st.get_filtration()) == [                                                                                                                         
-        ([0, 2], 0.0), 
-        ([1, 2], 0.0), 
-        ([0, 3], 0.0), 
-        ([3, 4], 0.0), 
-        ([2, 5], 0.0), 
-        ([3, 5], 0.0), 
-        ([0], 1.0), 
-        ([1], 2.0), 
-        ([2], 3.0), 
-        ([3], 4.0), 
-        ([4], 5.0), 
-        ([5], 6.0)
+    #  o   o
+    #  1   0
+
+    st = SimplexTree()
+    st.insert([0, 2])
+    st.insert([1, 2])
+    st.insert([0, 3])
+    st.insert([2, 5])
+    st.insert([3, 4])
+    st.insert([3, 5])
+    st.assign_filtration([0], 1.0)
+    st.assign_filtration([1], 2.0)
+    st.assign_filtration([2], 3.0)
+    st.assign_filtration([3], 4.0)
+    st.assign_filtration([4], 5.0)
+    st.assign_filtration([5], 6.0)
+
+    assert list(st.get_filtration()) == [
+        ([0, 2], 0.0),
+        ([1, 2], 0.0),
+        ([0, 3], 0.0),
+        ([3, 4], 0.0),
+        ([2, 5], 0.0),
+        ([3, 5], 0.0),
+        ([0], 1.0),
+        ([1], 2.0),
+        ([2], 3.0),
+        ([3], 4.0),
+        ([4], 5.0),
+        ([5], 6.0),
     ]
-        
+
     st.extend_filtration()
-    
-    assert list(st.get_filtration()) == [                                                                                                                         
-        ([6], -3.0), 
-        ([0], -2.0), 
-        ([1], -1.8), 
-        ([2], -1.6), 
-        ([0, 2], -1.6), 
-        ([1, 2], -1.6), 
-        ([3], -1.4), 
-        ([0, 3], -1.4), 
-        ([4], -1.2), 
-        ([3, 4], -1.2), 
-        ([5], -1.0), 
-        ([2, 5], -1.0), 
-        ([3, 5], -1.0), 
-        ([5, 6], 1.0), 
-        ([4, 6], 1.2), 
-        ([3, 6], 1.4), 
+
+    assert list(st.get_filtration()) == [
+        ([6], -3.0),
+        ([0], -2.0),
+        ([1], -1.8),
+        ([2], -1.6),
+        ([0, 2], -1.6),
+        ([1, 2], -1.6),
+        ([3], -1.4),
+        ([0, 3], -1.4),
+        ([4], -1.2),
+        ([3, 4], -1.2),
+        ([5], -1.0),
+        ([2, 5], -1.0),
+        ([3, 5], -1.0),
+        ([5, 6], 1.0),
+        ([4, 6], 1.2),
+        ([3, 6], 1.4),
         ([3, 4, 6], 1.4),
-        ([3, 5, 6], 1.4), 
-        ([2, 6], 1.6), 
-        ([2, 5, 6], 1.6), 
-        ([1, 6], 1.8), 
-        ([1, 2, 6], 1.8), 
-        ([0, 6], 2.0), 
-        ([0, 2, 6], 2.0), 
-        ([0, 3, 6], 2.0)
+        ([3, 5, 6], 1.4),
+        ([2, 6], 1.6),
+        ([2, 5, 6], 1.6),
+        ([1, 6], 1.8),
+        ([1, 2, 6], 1.8),
+        ([0, 6], 2.0),
+        ([0, 2, 6], 2.0),
+        ([0, 3, 6], 2.0),
     ]
 
-    dgms = st.extended_persistence(min_persistence=-1.)
+    dgms = st.extended_persistence(min_persistence=-1.0)
+
+    assert dgms[0][0][1][0] == pytest.approx(2.0)
+    assert dgms[0][0][1][1] == pytest.approx(3.0)
+    assert dgms[1][0][1][0] == pytest.approx(5.0)
+    assert dgms[1][0][1][1] == pytest.approx(4.0)
+    assert dgms[2][0][1][0] == pytest.approx(1.0)
+    assert dgms[2][0][1][1] == pytest.approx(6.0)
+    assert dgms[3][0][1][0] == pytest.approx(6.0)
+    assert dgms[3][0][1][1] == pytest.approx(1.0)
 
-    assert dgms[0][0][1][0] == pytest.approx(2.)
-    assert dgms[0][0][1][1] == pytest.approx(3.)
-    assert dgms[1][0][1][0] == pytest.approx(5.)
-    assert dgms[1][0][1][1] == pytest.approx(4.)
-    assert dgms[2][0][1][0] == pytest.approx(1.)
-    assert dgms[2][0][1][1] == pytest.approx(6.)
-    assert dgms[3][0][1][0] == pytest.approx(6.)
-    assert dgms[3][0][1][1] == pytest.approx(1.) 
 
 def test_simplices_iterator():
     st = SimplexTree()
-    
+
     assert st.insert([0, 1, 2], filtration=4.0) == True
     assert st.insert([2, 3, 4], filtration=2.0) == True
 
@@ -342,8 +344,48 @@ def test_simplices_iterator():
         print("filtration is: ", simplex[1])
         assert st.filtration(simplex[0]) == simplex[1]
 
+
 def test_insert_edges_from_array():
     st = SimplexTree()
     a = np.array([[1, 4, 13, 6], [4, 3, 11, 5], [13, 11, 10, 12], [6, 5, 12, 2]])
     st.insert_edges_from_array(a, max_filtration=5)
     assert list(st.get_filtration()) == [([0], 1.0), ([3], 2.0), ([1], 3.0), ([0, 1], 4.0), ([1, 3], 5.0)]
+
+
+def test_insert_edges_from_coo_matrix():
+    try:
+        from scipy.sparse import coo_matrix
+        from scipy.spatial import cKDTree
+    except ImportError:
+        print("Skipping, no SciPy")
+        return
+
+    st = SimplexTree()
+    st.insert([1, 2, 7], 7)
+    row = np.array([2, 5, 3])
+    col = np.array([1, 4, 6])
+    dat = np.array([1, 2, 3])
+    edges = coo_matrix((dat, (row, col)))
+    st.insert_edges_from_coo_matrix(edges)
+    assert list(st.get_filtration()) == [
+        ([1], 1.0),
+        ([2], 1.0),
+        ([1, 2], 1.0),
+        ([4], 2.0),
+        ([5], 2.0),
+        ([4, 5], 2.0),
+        ([3], 3.0),
+        ([6], 3.0),
+        ([3, 6], 3.0),
+        ([7], 7.0),
+        ([1, 7], 7.0),
+        ([2, 7], 7.0),
+        ([1, 2, 7], 7.0),
+    ]
+
+    pts = np.random.rand(100, 2)
+    tree = cKDTree(pts)
+    edges = tree.sparse_distance_matrix(tree, max_distance=0.15, output_type="coo_matrix")
+    st = SimplexTree()
+    st.insert_edges_from_coo_matrix(edges)
+    assert 100 < st.num_simplices() < 1000
-- 
cgit v1.2.3


From 31acee0a124afc628b906349e468a9aa9fac4a2a Mon Sep 17 00:00:00 2001
From: Marc Glisse <marc.glisse@inria.fr>
Date: Thu, 30 Jul 2020 19:45:36 +0200
Subject: Review comments

---
 src/python/gudhi/simplex_tree.pyx           | 5 +----
 src/python/include/Simplex_tree_interface.h | 6 +++---
 2 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx
index 9d2c30a9..1df2420e 100644
--- a/src/python/gudhi/simplex_tree.pyx
+++ b/src/python/gudhi/simplex_tree.pyx
@@ -233,10 +233,7 @@ cdef class SimplexTree:
         """
         # TODO: optimize this
         for edge in zip(edges.row, edges.col, edges.data):
-            if edge[0] == edge[1]:
-                self.get_ptr().insert((edge[0],), edge[2])
-            else:
-                self.get_ptr().insert((edge[0], edge[1]), edge[2])
+            self.get_ptr().insert((edge[0], edge[1]), edge[2])
 
     def get_simplices(self):
         """This function returns a generator with simplices and their given
diff --git a/src/python/include/Simplex_tree_interface.h b/src/python/include/Simplex_tree_interface.h
index f570c5fc..3061884f 100644
--- a/src/python/include/Simplex_tree_interface.h
+++ b/src/python/include/Simplex_tree_interface.h
@@ -64,14 +64,14 @@ class Simplex_tree_interface : public Simplex_tree<SimplexTreeOptions> {
     // and this is a bit more efficient.
     auto& rm = this->root()->members_;
     for(int i=0; i<n; ++i) {
-      char* p = (char*)filtrations + i * stride0;
-      double fv = *(double*)(p + i * stride1);
+      char* p = reinterpret_cast<char*>(filtrations) + i * stride0;
+      double fv = *reinterpret_cast<double*>(p + i * stride1);
       if(fv > max_filtration) continue;
       auto sh = rm.emplace_hint(rm.end(), i, Node(this->root(), fv));
       Siblings* children = nullptr;
       // Should we make a first pass to count the number of edges so we can reserve the right space?
       for(int j=i+1; j<n; ++j) {
-        double fe = *(double*)(p + j * stride1);
+        double fe = *reinterpret_cast<double*>(p + j * stride1);
         if(fe > max_filtration) continue;
         if(!children) {
           children = new Siblings(this->root(), i);
-- 
cgit v1.2.3


From 2830010c74cc74d29691faeeb7bb3a31cc53d87d Mon Sep 17 00:00:00 2001
From: Marc Glisse <marc.glisse@inria.fr>
Date: Fri, 31 Jul 2020 11:50:11 +0200
Subject: static construction from array

---
 src/python/gudhi/simplex_tree.pyx    | 16 ++++++++++------
 src/python/test/test_simplex_tree.py |  5 ++---
 2 files changed, 12 insertions(+), 9 deletions(-)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx
index 1df2420e..42f1e635 100644
--- a/src/python/gudhi/simplex_tree.pyx
+++ b/src/python/gudhi/simplex_tree.pyx
@@ -195,10 +195,11 @@ cdef class SimplexTree:
         """
         return self.get_ptr().insert(simplex, <double>filtration)
 
-    def insert_edges_from_array(self, filtrations, double max_filtration=numpy.inf):
-        """Inserts edges in an empty complex. The vertices are numbered from 0 to n-1, and the filtration values are
-        encoded in the array, with the diagonal representing the vertices. It is the caller's responsibility to
-        ensure that this defines a filtration, which can be achieved with either::
+    @staticmethod
+    def create_from_array(filtrations, double max_filtration=numpy.inf):
+        """Creates a new, empty complex and inserts vertices and edges. The vertices are numbered from 0 to n-1, and
+        the filtration values are encoded in the array, with the diagonal representing the vertices. It is the
+        caller's responsibility to ensure that this defines a filtration, which can be achieved with either::
 
             filtrations[np.diag_indices_from(filtrations)] = filtrations.min(1)
 
@@ -211,15 +212,18 @@ cdef class SimplexTree:
         :type filtrations: numpy.ndarray of shape (n,n)
         :param max_filtration: only insert vertices and edges with filtration values no larger than max_filtration
         :type max_filtration: float
+        :returns: the new complex
+        :rtype: SimplexTree
         """
         # TODO: document which half of the matrix is actually read?
         filtrations = numpy.asanyarray(filtrations, dtype=float)
         cdef double[:,:] F = filtrations
-        assert self.num_vertices() == 0, "insert_edges_from_array requires an empty SimplexTree"
+        ret = SimplexTree()
         cdef int n = F.shape[0]
         assert n == F.shape[1]
         with nogil:
-            self.get_ptr().insert_matrix(&F[0,0], n, F.strides[0], F.strides[1], max_filtration)
+            ret.get_ptr().insert_matrix(&F[0,0], n, F.strides[0], F.strides[1], max_filtration)
+        return ret
 
     def insert_edges_from_coo_matrix(self, edges):
         """Inserts edges given by a sparse matrix in `COOrdinate format
diff --git a/src/python/test/test_simplex_tree.py b/src/python/test/test_simplex_tree.py
index c6c5dc0e..34173e78 100755
--- a/src/python/test/test_simplex_tree.py
+++ b/src/python/test/test_simplex_tree.py
@@ -345,10 +345,9 @@ def test_simplices_iterator():
         assert st.filtration(simplex[0]) == simplex[1]
 
 
-def test_insert_edges_from_array():
-    st = SimplexTree()
+def test_create_from_array():
     a = np.array([[1, 4, 13, 6], [4, 3, 11, 5], [13, 11, 10, 12], [6, 5, 12, 2]])
-    st.insert_edges_from_array(a, max_filtration=5)
+    st = SimplexTree.create_from_array(a, max_filtration=5)
     assert list(st.get_filtration()) == [([0], 1.0), ([3], 2.0), ([1], 3.0), ([0, 1], 4.0), ([1, 3], 5.0)]
 
 
-- 
cgit v1.2.3


From 74948a7debebdce1ddb7afca169e2c9dc6456fa1 Mon Sep 17 00:00:00 2001
From: Marc Glisse <marc.glisse@inria.fr>
Date: Fri, 1 Apr 2022 22:31:43 +0200
Subject: SimplexTree.insert_batch for multiple simplices of the same dimension

---
 src/python/gudhi/simplex_tree.pyx    |  42 +++++++++++-
 src/python/test/test_simplex_tree.py | 127 +++++++++++++++++++++++------------
 2 files changed, 123 insertions(+), 46 deletions(-)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx
index 3eebfff7..3646d659 100644
--- a/src/python/gudhi/simplex_tree.pyx
+++ b/src/python/gudhi/simplex_tree.pyx
@@ -8,14 +8,23 @@
 #   - YYYY/MM Author: Description of the modification
 
 from cython.operator import dereference, preincrement
-from libc.stdint cimport intptr_t
+from libc.stdint cimport intptr_t, int32_t, int64_t
 import numpy as np
 cimport gudhi.simplex_tree
+cimport cython
 
 __author__ = "Vincent Rouvreau"
 __copyright__ = "Copyright (C) 2016 Inria"
 __license__ = "MIT"
 
+ctypedef fused some_int:
+    int32_t
+    int64_t
+
+ctypedef fused some_float:
+    float
+    double
+
 # SimplexTree python interface
 cdef class SimplexTree:
     """The simplex tree is an efficient and flexible data structure for
@@ -226,6 +235,7 @@ cdef class SimplexTree:
         return self.get_ptr().insert(simplex, <double>filtration)
 
     @staticmethod
+    @cython.boundscheck(False)
     def create_from_array(filtrations, double max_filtration=np.inf):
         """Creates a new, empty complex and inserts vertices and edges. The vertices are numbered from 0 to n-1, and
         the filtration values are encoded in the array, with the diagonal representing the vertices. It is the
@@ -265,10 +275,38 @@ cdef class SimplexTree:
         :param edges: the edges to insert and their filtration values.
         :type edges: scipy.sparse.coo_matrix of shape (n,n)
         """
-        # TODO: optimize this
+        # TODO: optimize this?
         for edge in zip(edges.row, edges.col, edges.data):
             self.get_ptr().insert((edge[0], edge[1]), edge[2])
 
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def insert_batch(self, some_int[:,:] vertex_array, some_float[:] filtrations):
+        """Inserts k-simplices given by a sparse array in a format similar
+        to `torch.sparse <https://pytorch.org/docs/stable/sparse.html>`_.
+        Duplicate entries are not allowed. Missing entries are not inserted.
+        Simplices with a repeated vertex are currently interpreted as lower
+        dimensional simplices, but we do not guarantee this behavior in the
+        future. Any time a simplex is inserted, its faces are inserted as well
+        if needed to preserve a simplicial complex.
+
+        :param vertex_array: the k-simplices to insert.
+        :type vertex_array: numpy.array of shape (k+1,n)
+        :param filtrations: the filtration values.
+        :type filtrations: numpy.array of shape (n,)
+        """
+        cdef Py_ssize_t k = vertex_array.shape[0]
+        cdef Py_ssize_t n = vertex_array.shape[1]
+        assert filtrations.shape[0] == n
+        cdef Py_ssize_t i
+        cdef Py_ssize_t j
+        cdef vector[int] v
+        for i in range(n):
+            for j in range(k):
+                v.push_back(vertex_array[j, i])
+            self.get_ptr().insert(v, filtrations[i])
+            v.clear()
+
     def get_simplices(self):
         """This function returns a generator with simplices and their given
         filtration values.
diff --git a/src/python/test/test_simplex_tree.py b/src/python/test/test_simplex_tree.py
index 0436d891..a5b8ffe0 100755
--- a/src/python/test/test_simplex_tree.py
+++ b/src/python/test/test_simplex_tree.py
@@ -345,9 +345,10 @@ def test_simplices_iterator():
         print("filtration is: ", simplex[1])
         assert st.filtration(simplex[0]) == simplex[1]
 
+
 def test_collapse_edges():
     st = SimplexTree()
-    
+
     assert st.insert([0, 1], filtration=1.0) == True
     assert st.insert([1, 2], filtration=1.0) == True
     assert st.insert([2, 3], filtration=1.0) == True
@@ -362,33 +363,35 @@ def test_collapse_edges():
         assert st.num_simplices() == 9
         assert st.find([1, 3]) == False
         for simplex in st.get_skeleton(0):
-            assert simplex[1] == 1.
+            assert simplex[1] == 1.0
     else:
         # If no Eigen3, collapse_edges throws an exception
         with pytest.raises(RuntimeError):
             st.collapse_edges()
 
+
 def test_reset_filtration():
     st = SimplexTree()
-    
-    assert st.insert([0, 1, 2], 3.) == True
-    assert st.insert([0, 3], 2.) == True
-    assert st.insert([3, 4, 5], 3.) == True
-    assert st.insert([0, 1, 6, 7], 4.) == True
+
+    assert st.insert([0, 1, 2], 3.0) == True
+    assert st.insert([0, 3], 2.0) == True
+    assert st.insert([3, 4, 5], 3.0) == True
+    assert st.insert([0, 1, 6, 7], 4.0) == True
 
     # Guaranteed by construction
     for simplex in st.get_simplices():
-        assert st.filtration(simplex[0]) >= 2.
-    
+        assert st.filtration(simplex[0]) >= 2.0
+
     # dimension until 5 even if simplex tree is of dimension 3 to test the limits
     for dimension in range(5, -1, -1):
-        st.reset_filtration(0., dimension)
+        st.reset_filtration(0.0, dimension)
         for simplex in st.get_skeleton(3):
             print(simplex)
             if len(simplex[0]) < (dimension) + 1:
-                assert st.filtration(simplex[0]) >= 2.
+                assert st.filtration(simplex[0]) >= 2.0
             else:
-                assert st.filtration(simplex[0]) == 0.
+                assert st.filtration(simplex[0]) == 0.0
+
 
 def test_boundaries_iterator():
     st = SimplexTree()
@@ -404,16 +407,17 @@ def test_boundaries_iterator():
         list(st.get_boundaries([]))
 
     with pytest.raises(RuntimeError):
-        list(st.get_boundaries([0, 4])) # (0, 4) does not exist
+        list(st.get_boundaries([0, 4]))  # (0, 4) does not exist
 
     with pytest.raises(RuntimeError):
-        list(st.get_boundaries([6])) # (6) does not exist
+        list(st.get_boundaries([6]))  # (6) does not exist
+
 
 def test_persistence_intervals_in_dimension():
     # Here is our triangulation of a 2-torus - taken from https://dioscuri-tda.org/Paris_TDA_Tutorial_2021.html
     #   0-----3-----4-----0
     #   | \   | \   | \   | \   |
-    #   |   \ |   \ |    \|   \ | 
+    #   |   \ |   \ |    \|   \ |
     #   1-----8-----7-----1
     #   | \   | \   | \   | \   |
     #   |   \ |   \ |   \ |   \ |
@@ -422,50 +426,52 @@ def test_persistence_intervals_in_dimension():
     #   |   \ |   \ |   \ |   \ |
     #   0-----3-----4-----0
     st = SimplexTree()
-    st.insert([0,1,8])
-    st.insert([0,3,8])
-    st.insert([3,7,8])
-    st.insert([3,4,7])
-    st.insert([1,4,7])
-    st.insert([0,1,4])
-    st.insert([1,2,5])
-    st.insert([1,5,8])
-    st.insert([5,6,8])
-    st.insert([6,7,8])
-    st.insert([2,6,7])
-    st.insert([1,2,7])
-    st.insert([0,2,3])
-    st.insert([2,3,5])
-    st.insert([3,4,5])
-    st.insert([4,5,6])
-    st.insert([0,4,6])
-    st.insert([0,2,6])
+    st.insert([0, 1, 8])
+    st.insert([0, 3, 8])
+    st.insert([3, 7, 8])
+    st.insert([3, 4, 7])
+    st.insert([1, 4, 7])
+    st.insert([0, 1, 4])
+    st.insert([1, 2, 5])
+    st.insert([1, 5, 8])
+    st.insert([5, 6, 8])
+    st.insert([6, 7, 8])
+    st.insert([2, 6, 7])
+    st.insert([1, 2, 7])
+    st.insert([0, 2, 3])
+    st.insert([2, 3, 5])
+    st.insert([3, 4, 5])
+    st.insert([4, 5, 6])
+    st.insert([0, 4, 6])
+    st.insert([0, 2, 6])
     st.compute_persistence(persistence_dim_max=True)
-    
+
     H0 = st.persistence_intervals_in_dimension(0)
-    assert np.array_equal(H0, np.array([[ 0., float("inf")]]))
+    assert np.array_equal(H0, np.array([[0.0, float("inf")]]))
     H1 = st.persistence_intervals_in_dimension(1)
-    assert np.array_equal(H1, np.array([[ 0., float("inf")], [ 0., float("inf")]]))
+    assert np.array_equal(H1, np.array([[0.0, float("inf")], [0.0, float("inf")]]))
     H2 = st.persistence_intervals_in_dimension(2)
-    assert np.array_equal(H2, np.array([[ 0., float("inf")]]))
+    assert np.array_equal(H2, np.array([[0.0, float("inf")]]))
     # Test empty case
     assert st.persistence_intervals_in_dimension(3).shape == (0, 2)
 
+
 def test_equality_operator():
     st1 = SimplexTree()
     st2 = SimplexTree()
 
     assert st1 == st2
 
-    st1.insert([1,2,3], 4.)
+    st1.insert([1, 2, 3], 4.0)
     assert st1 != st2
 
-    st2.insert([1,2,3], 4.)
+    st2.insert([1, 2, 3], 4.0)
     assert st1 == st2
 
+
 def test_simplex_tree_deep_copy():
     st = SimplexTree()
-    st.insert([1, 2, 3], 0.)
+    st.insert([1, 2, 3], 0.0)
     # compute persistence only on the original
     st.compute_persistence()
 
@@ -484,14 +490,15 @@ def test_simplex_tree_deep_copy():
 
     for a_splx in a_filt_list:
         assert a_splx in st_filt_list
-    
+
     # test double free
     del st
     del st_copy
 
+
 def test_simplex_tree_deep_copy_constructor():
     st = SimplexTree()
-    st.insert([1, 2, 3], 0.)
+    st.insert([1, 2, 3], 0.0)
     # compute persistence only on the original
     st.compute_persistence()
 
@@ -510,20 +517,23 @@ def test_simplex_tree_deep_copy_constructor():
 
     for a_splx in a_filt_list:
         assert a_splx in st_filt_list
-    
+
     # test double free
     del st
     del st_copy
 
+
 def test_simplex_tree_constructor_exception():
     with pytest.raises(TypeError):
-        st = SimplexTree(other = "Construction from a string shall raise an exception")
+        st = SimplexTree(other="Construction from a string shall raise an exception")
+
 
 def test_create_from_array():
     a = np.array([[1, 4, 13, 6], [4, 3, 11, 5], [13, 11, 10, 12], [6, 5, 12, 2]])
     st = SimplexTree.create_from_array(a, max_filtration=5)
     assert list(st.get_filtration()) == [([0], 1.0), ([3], 2.0), ([1], 3.0), ([0, 1], 4.0), ([1, 3], 5.0)]
 
+
 def test_insert_edges_from_coo_matrix():
     try:
         from scipy.sparse import coo_matrix
@@ -561,3 +571,32 @@ def test_insert_edges_from_coo_matrix():
     st = SimplexTree()
     st.insert_edges_from_coo_matrix(edges)
     assert 100 < st.num_simplices() < 1000
+
+
+def test_insert_batch():
+    st = SimplexTree()
+    # vertices
+    st.insert_batch(np.array([[6, 1, 5]]), np.array([-5.0, 2.0, -3.0]))
+    # triangles
+    st.insert_batch(np.array([[2, 10], [5, 0], [6, 11]]), np.array([4.0, 0.0]))
+    # edges
+    st.insert_batch(np.array([[1, 5], [2, 5]]), np.array([1.0, 3.0]))
+
+    assert list(st.get_filtration()) == [
+        ([6], -5.0),
+        ([5], -3.0),
+        ([0], 0.0),
+        ([10], 0.0),
+        ([0, 10], 0.0),
+        ([11], 0.0),
+        ([0, 11], 0.0),
+        ([10, 11], 0.0),
+        ([0, 10, 11], 0.0),
+        ([1], 1.0),
+        ([2], 1.0),
+        ([1, 2], 1.0),
+        ([2, 5], 4.0),
+        ([2, 6], 4.0),
+        ([5, 6], 4.0),
+        ([2, 5, 6], 4.0),
+    ]
-- 
cgit v1.2.3


From 67b1e0ae09d8a975fb72faad1ee9b2f15f22e635 Mon Sep 17 00:00:00 2001
From: Marc Glisse <marc.glisse@inria.fr>
Date: Fri, 1 Apr 2022 23:38:10 +0200
Subject: Doc repeated simplex + nogil

---
 src/python/gudhi/simplex_tree.pyx | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx
index 3646d659..83d7b092 100644
--- a/src/python/gudhi/simplex_tree.pyx
+++ b/src/python/gudhi/simplex_tree.pyx
@@ -268,9 +268,10 @@ cdef class SimplexTree:
     def insert_edges_from_coo_matrix(self, edges):
         """Inserts edges given by a sparse matrix in `COOrdinate format
         <https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.coo_matrix.html>`_.
-        Duplicate entries are not allowed. Missing entries are not inserted. Diagonal entries are interpreted as
-        vertices, although this is only useful if you want to insert vertices with a smaller filtration value than
-        the smallest edge containing it, since vertices are implicitly inserted together with the edges.
+        If an edge is repeated, the smallest filtration value is used. Missing entries are not inserted.
+        Diagonal entries are interpreted as vertices, although this is only useful if you want to insert
+        vertices with a smaller filtration value than the smallest edge containing it, since vertices are
+        implicitly inserted together with the edges.
 
         :param edges: the edges to insert and their filtration values.
         :type edges: scipy.sparse.coo_matrix of shape (n,n)
@@ -284,7 +285,7 @@ cdef class SimplexTree:
     def insert_batch(self, some_int[:,:] vertex_array, some_float[:] filtrations):
         """Inserts k-simplices given by a sparse array in a format similar
         to `torch.sparse <https://pytorch.org/docs/stable/sparse.html>`_.
-        Duplicate entries are not allowed. Missing entries are not inserted.
+        If a simplex is repeated, the smallest filtration value is used.
         Simplices with a repeated vertex are currently interpreted as lower
         dimensional simplices, but we do not guarantee this behavior in the
         future. Any time a simplex is inserted, its faces are inserted as well
@@ -301,11 +302,12 @@ cdef class SimplexTree:
         cdef Py_ssize_t i
         cdef Py_ssize_t j
         cdef vector[int] v
-        for i in range(n):
-            for j in range(k):
-                v.push_back(vertex_array[j, i])
-            self.get_ptr().insert(v, filtrations[i])
-            v.clear()
+        with nogil:
+            for i in range(n):
+                for j in range(k):
+                    v.push_back(vertex_array[j, i])
+                self.get_ptr().insert(v, filtrations[i])
+                v.clear()
 
     def get_simplices(self):
         """This function returns a generator with simplices and their given
-- 
cgit v1.2.3


From 7b7d71e3a8d1302dc81eb020114fe4c4d767ccb0 Mon Sep 17 00:00:00 2001
From: Marc Glisse <marc.glisse@inria.fr>
Date: Fri, 8 Apr 2022 18:25:32 +0200
Subject: name argument, assert message

---
 src/python/gudhi/simplex_tree.pyx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx
index 43461e02..1ac03afa 100644
--- a/src/python/gudhi/simplex_tree.pyx
+++ b/src/python/gudhi/simplex_tree.pyx
@@ -244,7 +244,7 @@ cdef class SimplexTree:
         the filtration values are encoded in the array, with the diagonal representing the vertices. It is the
         caller's responsibility to ensure that this defines a filtration, which can be achieved with either::
 
-            filtrations[np.diag_indices_from(filtrations)] = filtrations.min(1)
+            filtrations[np.diag_indices_from(filtrations)] = filtrations.min(axis=1)
 
         or::
 
@@ -263,7 +263,7 @@ cdef class SimplexTree:
         cdef double[:,:] F = filtrations
         ret = SimplexTree()
         cdef int n = F.shape[0]
-        assert n == F.shape[1]
+        assert n == F.shape[1], 'create_from_array() expects a square array'
         with nogil:
             ret.get_ptr().insert_matrix(&F[0,0], n, F.strides[0], F.strides[1], max_filtration)
         return ret
@@ -301,7 +301,7 @@ cdef class SimplexTree:
         """
         cdef Py_ssize_t k = vertex_array.shape[0]
         cdef Py_ssize_t n = vertex_array.shape[1]
-        assert filtrations.shape[0] == n
+        assert filtrations.shape[0] == n, 'inconsistent sizes for vertex_array and filtrations'
         cdef Py_ssize_t i
         cdef Py_ssize_t j
         cdef vector[int] v
-- 
cgit v1.2.3


From 3aa89676d1dc2cafcc692480bbf424a97dbbd501 Mon Sep 17 00:00:00 2001
From: wreise <wojciech.reise@epfl.ch>
Date: Wed, 25 May 2022 14:20:12 +0200
Subject: Vectorize Silhouette implementation

---
 src/python/gudhi/representations/vector_methods.py | 48 ++++++----------------
 1 file changed, 13 insertions(+), 35 deletions(-)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index f8078d03..62b35389 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -85,7 +85,7 @@ class PersistenceImage(BaseEstimator, TransformerMixin):
 
             Xfit.append(image.flatten()[np.newaxis,:])
 
-        Xfit = np.concatenate(Xfit,0)
+        Xfit = np.concatenate(Xfit, 0)
 
         return Xfit
 
@@ -235,6 +235,7 @@ class Silhouette(BaseEstimator, TransformerMixin):
             sample_range ([double, double]): minimum and maximum for the weighted average domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
         """
         self.weight, self.resolution, self.sample_range = weight, resolution, sample_range
+        self.im_range = None
 
     def fit(self, X, y=None):
         """
@@ -245,6 +246,7 @@ class Silhouette(BaseEstimator, TransformerMixin):
             y (n x 1 array): persistence diagram labels (unused).
         """
         self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
+        self.im_range = np.linspace(self.sample_range[0], self.sample_range[1], self.resolution)
         return self
 
     def transform(self, X):
@@ -257,44 +259,20 @@ class Silhouette(BaseEstimator, TransformerMixin):
         Returns:
             numpy array with shape (number of diagrams) x (**resolution**): output persistence silhouettes.
         """
-        num_diag, Xfit = len(X), []
-        x_values = np.linspace(self.sample_range[0], self.sample_range[1], self.resolution)
-        step_x = x_values[1] - x_values[0]
+        Xfit = []
+        x_values = self.im_range
 
-        for i in range(num_diag):
-
-            diagram, num_pts_in_diag = X[i], X[i].shape[0]
-
-            sh, weights = np.zeros(self.resolution), np.zeros(num_pts_in_diag)
-            for j in range(num_pts_in_diag):
-                weights[j] = self.weight(diagram[j,:])
+        for i, diag in enumerate(X):
+            midpoints, heights = (diag[:, 0] + diag[:, 1])/2., (diag[:, 1] - diag[:, 0])/2.
+            weights = np.array([self.weight(point) for point in diag])
             total_weight = np.sum(weights)
 
-            for j in range(num_pts_in_diag):
-
-                [px,py] = diagram[j,:2]
-                weight  = weights[j] / total_weight
-                min_idx = np.clip(np.ceil((px          - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
-                mid_idx = np.clip(np.ceil((0.5*(py+px) - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
-                max_idx = np.clip(np.ceil((py          - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
-
-                if min_idx < self.resolution and max_idx > 0:
-
-                    silhouette_value = self.sample_range[0] + min_idx * step_x - px
-                    for k in range(min_idx, mid_idx):
-                        sh[k] += weight * silhouette_value
-                        silhouette_value += step_x
-
-                    silhouette_value = py - self.sample_range[0] - mid_idx * step_x
-                    for k in range(mid_idx, max_idx):
-                        sh[k] += weight * silhouette_value
-                        silhouette_value -= step_x
-
-            Xfit.append(np.reshape(np.sqrt(2) * sh, [1,-1]))
-
-        Xfit = np.concatenate(Xfit, 0)
+            tent_functions = heights[None, :] - np.abs(x_values[:, None] - midpoints[None, :])
+            tent_functions[tent_functions < 0.] = 0.
+            silhouette = np.sum(weights[None, :]/total_weight * tent_functions, axis=1)
+            Xfit.append(silhouette * np.sqrt(2))
 
-        return Xfit 
+        return np.stack(Xfit, axis=0)
 
     def __call__(self, diag):
         """
-- 
cgit v1.2.3


From 1a76ecc3e7459e3461e1f182004362dcb663addd Mon Sep 17 00:00:00 2001
From: wreise <wojciech.reise@epfl.ch>
Date: Wed, 25 May 2022 14:34:11 +0200
Subject: Compactify

---
 src/python/gudhi/representations/vector_methods.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 62b35389..e6289a37 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -267,8 +267,7 @@ class Silhouette(BaseEstimator, TransformerMixin):
             weights = np.array([self.weight(point) for point in diag])
             total_weight = np.sum(weights)
 
-            tent_functions = heights[None, :] - np.abs(x_values[:, None] - midpoints[None, :])
-            tent_functions[tent_functions < 0.] = 0.
+            tent_functions = np.maximum(heights[None, :] - np.abs(x_values[:, None] - midpoints[None, :]), 0)
             silhouette = np.sum(weights[None, :]/total_weight * tent_functions, axis=1)
             Xfit.append(silhouette * np.sqrt(2))
 
-- 
cgit v1.2.3


From e8d0cbc3311765900e098b472608dc40b84d07d8 Mon Sep 17 00:00:00 2001
From: wreise <wojciech.reise@epfl.ch>
Date: Wed, 25 May 2022 15:14:15 +0200
Subject: Optimize using keops

---
 src/python/gudhi/representations/vector_methods.py | 27 +++++++++++++++-------
 1 file changed, 19 insertions(+), 8 deletions(-)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index e6289a37..55dc2c5b 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -10,6 +10,7 @@
 #   - 2021/11 Vincent Rouvreau: factorize _automatic_sample_range
 
 import numpy as np
+from pykeops.numpy import Genred
 from sklearn.base          import BaseEstimator, TransformerMixin
 from sklearn.exceptions    import NotFittedError
 from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler
@@ -259,19 +260,29 @@ class Silhouette(BaseEstimator, TransformerMixin):
         Returns:
             numpy array with shape (number of diagrams) x (**resolution**): output persistence silhouettes.
         """
-        Xfit = []
-        x_values = self.im_range
 
+        silhouette_formula = "normalized_weights * ReLU(heights - Abs(x_values - midpoints))"
+        variables = [
+            "normalized_weights = Vi(1)",
+            "heights = Vi(1)",
+            "midpoints = Vi(1)",
+            "x_values = Vj(1)",
+        ]
+        silhouette = Genred(silhouette_formula, variables, reduction_op="Sum", axis=0)
+
+        silhouettes_list = []
+        x_values = self.im_range
         for i, diag in enumerate(X):
-            midpoints, heights = (diag[:, 0] + diag[:, 1])/2., (diag[:, 1] - diag[:, 0])/2.
+            midpoints, heights = (diag[:, 0] + diag[:, 1]) / 2., (diag[:, 1] - diag[:, 0]) / 2.
             weights = np.array([self.weight(point) for point in diag])
-            total_weight = np.sum(weights)
+            weights /= np.sum(weights)
 
-            tent_functions = np.maximum(heights[None, :] - np.abs(x_values[:, None] - midpoints[None, :]), 0)
-            silhouette = np.sum(weights[None, :]/total_weight * tent_functions, axis=1)
-            Xfit.append(silhouette * np.sqrt(2))
+            silhouettes_list.append(
+                np.sqrt(2) * silhouette(weights[:, None], heights[:, None],
+                                        midpoints[:, None], x_values[:, None])[:, 0]
+            )
 
-        return np.stack(Xfit, axis=0)
+        return np.stack(silhouettes_list, axis=0)
 
     def __call__(self, diag):
         """
-- 
cgit v1.2.3


From 912156b36da1dce1f73f8d2a63cc18e67c173d54 Mon Sep 17 00:00:00 2001
From: wreise <wojciech.reise@epfl.ch>
Date: Wed, 25 May 2022 16:43:15 +0200
Subject: Move the initialisation of the Genred method to the constructor of
 Silhouette

---
 src/python/gudhi/representations/vector_methods.py | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 55dc2c5b..c250c98c 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -238,6 +238,15 @@ class Silhouette(BaseEstimator, TransformerMixin):
         self.weight, self.resolution, self.sample_range = weight, resolution, sample_range
         self.im_range = None
 
+        silhouette_formula = "normalized_weights * ReLU(heights - Abs(x_values - midpoints))"
+        variables = [
+            "normalized_weights = Vi(1)",
+            "heights = Vi(1)",
+            "midpoints = Vi(1)",
+            "x_values = Vj(1)",
+        ]
+        self.silhouette = Genred(silhouette_formula, variables, reduction_op="Sum", axis=0)
+
     def fit(self, X, y=None):
         """
         Fit the Silhouette class on a list of persistence diagrams: if any of the values in **sample_range** is numpy.nan, replace it with the corresponding value computed on the given list of persistence diagrams.
@@ -260,16 +269,6 @@ class Silhouette(BaseEstimator, TransformerMixin):
         Returns:
             numpy array with shape (number of diagrams) x (**resolution**): output persistence silhouettes.
         """
-
-        silhouette_formula = "normalized_weights * ReLU(heights - Abs(x_values - midpoints))"
-        variables = [
-            "normalized_weights = Vi(1)",
-            "heights = Vi(1)",
-            "midpoints = Vi(1)",
-            "x_values = Vj(1)",
-        ]
-        silhouette = Genred(silhouette_formula, variables, reduction_op="Sum", axis=0)
-
         silhouettes_list = []
         x_values = self.im_range
         for i, diag in enumerate(X):
@@ -278,7 +277,7 @@ class Silhouette(BaseEstimator, TransformerMixin):
             weights /= np.sum(weights)
 
             silhouettes_list.append(
-                np.sqrt(2) * silhouette(weights[:, None], heights[:, None],
+                np.sqrt(2) * self.silhouette(weights[:, None], heights[:, None],
                                         midpoints[:, None], x_values[:, None])[:, 0]
             )
 
-- 
cgit v1.2.3


From a70ad064ac3e0aee5c9b8084d35b7ce329c6bddc Mon Sep 17 00:00:00 2001
From: wreise <wojciech.reise@epfl.ch>
Date: Thu, 2 Jun 2022 15:38:38 +0200
Subject: Start landscape optimization

---
 src/python/gudhi/representations/vector_methods.py | 64 ++++++++--------------
 1 file changed, 22 insertions(+), 42 deletions(-)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index c250c98c..acc62943 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -141,6 +141,16 @@ class Landscape(BaseEstimator, TransformerMixin):
         self.nan_in_range = np.isnan(np.array(self.sample_range))
         self.new_resolution = self.resolution + self.nan_in_range.sum()
 
+        landscape_formula = "(-1)*ReLU(heights - Abs(x_values - midpoints))"
+        variables = [
+            "heights = Vi(1)",
+            "midpoints = Vi(1)",
+            "x_values = Vj(1)",
+        ]
+        from pykeops.numpy import Genred
+        self.landscape = Genred(landscape_formula, variables, reduction_op="KMin",
+                                axis=0, opt_arg=self.num_landscapes)
+
     def fit(self, X, y=None):
         """
         Fit the Landscape class on a list of persistence diagrams: if any of the values in **sample_range** is numpy.nan, replace it with the corresponding value computed on the given list of persistence diagrams.
@@ -162,53 +172,23 @@ class Landscape(BaseEstimator, TransformerMixin):
         Returns:
             numpy array with shape (number of diagrams) x (number of samples = **num_landscapes** x **resolution**): output persistence landscapes.
         """
-        num_diag, Xfit = len(X), []
-        x_values = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution)
-        step_x = x_values[1] - x_values[0]
-
-        for i in range(num_diag):
-
-            diagram, num_pts_in_diag = X[i], X[i].shape[0]
 
-            ls = np.zeros([self.num_landscapes, self.new_resolution])
-
-            events = []
-            for j in range(self.new_resolution):
-                events.append([])
-
-            for j in range(num_pts_in_diag):
-                [px,py] = diagram[j,:2]
-                min_idx = np.clip(np.ceil((px          - self.sample_range[0]) / step_x).astype(int), 0, self.new_resolution)
-                mid_idx = np.clip(np.ceil((0.5*(py+px) - self.sample_range[0]) / step_x).astype(int), 0, self.new_resolution)
-                max_idx = np.clip(np.ceil((py          - self.sample_range[0]) / step_x).astype(int), 0, self.new_resolution)
-
-                if min_idx < self.new_resolution and max_idx > 0:
-
-                    landscape_value = self.sample_range[0] + min_idx * step_x - px
-                    for k in range(min_idx, mid_idx):
-                        events[k].append(landscape_value)
-                        landscape_value += step_x
-
-                    landscape_value = py - self.sample_range[0] - mid_idx * step_x
-                    for k in range(mid_idx, max_idx):
-                        events[k].append(landscape_value)
-                        landscape_value -= step_x
-
-            for j in range(self.new_resolution):
-                events[j].sort(reverse=True)
-                for k in range( min(self.num_landscapes, len(events[j])) ):
-                    ls[k,j] = events[j][k]
+        Xfit = []
+        x_values = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution)
+        for i, diag in enumerate(X):
+            midpoints, heights = (diag[:, 0] + diag[:, 1]) / 2., (diag[:, 1] - diag[:, 0]) / 2.
+            tent_functions = np.maximum(heights[None, :] - np.abs(x_values[:, None] - midpoints[None, :]), 0)
+            tent_functions.partition(diag.shape[0] - self.num_landscapes, axis=1)
+            landscapes = np.sort(tent_functions[-self.num_landscapes:, :])[::-1].T
 
             if self.nan_in_range[0]:
-                ls = ls[:,1:]
+                landscapes = landscapes[:,1:]
             if self.nan_in_range[1]:
-                ls = ls[:,:-1]
-            ls = np.sqrt(2)*np.reshape(ls,[1,-1])
-            Xfit.append(ls)
-
-        Xfit = np.concatenate(Xfit,0)
+                landscapes = landscapes[:,:-1]
+            landscapes = np.sqrt(2)*np.reshape(landscapes, [1, -1])
+            Xfit.append(landscapes)
 
-        return Xfit
+        return np.stack(Xfit, axis=0)
 
     def __call__(self, diag):
         """
-- 
cgit v1.2.3


From 42b18e60e418f4078cd6406dcc202b696798c844 Mon Sep 17 00:00:00 2001
From: wreise <wojciech.reise@epfl.ch>
Date: Sun, 5 Jun 2022 16:59:02 +0200
Subject: Import pykeops locally in Silhouette and Lanscapes

---
 src/python/gudhi/representations/vector_methods.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index acc62943..b0843120 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -10,7 +10,6 @@
 #   - 2021/11 Vincent Rouvreau: factorize _automatic_sample_range
 
 import numpy as np
-from pykeops.numpy import Genred
 from sklearn.base          import BaseEstimator, TransformerMixin
 from sklearn.exceptions    import NotFittedError
 from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler
@@ -218,6 +217,7 @@ class Silhouette(BaseEstimator, TransformerMixin):
         self.weight, self.resolution, self.sample_range = weight, resolution, sample_range
         self.im_range = None
 
+        from pykeops.numpy import Genred
         silhouette_formula = "normalized_weights * ReLU(heights - Abs(x_values - midpoints))"
         variables = [
             "normalized_weights = Vi(1)",
-- 
cgit v1.2.3


From 60e57f9c86a7aae67c2931200066aba059ec2721 Mon Sep 17 00:00:00 2001
From: wreise <wojciech.reise@epfl.ch>
Date: Fri, 5 Aug 2022 22:19:30 +0200
Subject: Test the numpy version

---
 src/python/gudhi/representations/vector_methods.py | 42 ++++++----------------
 1 file changed, 11 insertions(+), 31 deletions(-)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index b0843120..7f311b3b 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -140,16 +140,6 @@ class Landscape(BaseEstimator, TransformerMixin):
         self.nan_in_range = np.isnan(np.array(self.sample_range))
         self.new_resolution = self.resolution + self.nan_in_range.sum()
 
-        landscape_formula = "(-1)*ReLU(heights - Abs(x_values - midpoints))"
-        variables = [
-            "heights = Vi(1)",
-            "midpoints = Vi(1)",
-            "x_values = Vj(1)",
-        ]
-        from pykeops.numpy import Genred
-        self.landscape = Genred(landscape_formula, variables, reduction_op="KMin",
-                                axis=0, opt_arg=self.num_landscapes)
-
     def fit(self, X, y=None):
         """
         Fit the Landscape class on a list of persistence diagrams: if any of the values in **sample_range** is numpy.nan, replace it with the corresponding value computed on the given list of persistence diagrams.
@@ -178,13 +168,13 @@ class Landscape(BaseEstimator, TransformerMixin):
             midpoints, heights = (diag[:, 0] + diag[:, 1]) / 2., (diag[:, 1] - diag[:, 0]) / 2.
             tent_functions = np.maximum(heights[None, :] - np.abs(x_values[:, None] - midpoints[None, :]), 0)
             tent_functions.partition(diag.shape[0] - self.num_landscapes, axis=1)
-            landscapes = np.sort(tent_functions[-self.num_landscapes:, :])[::-1].T
+            landscapes = np.sort(tent_functions, axis=1)[:, -self.num_landscapes:][:, ::-1].T
 
             if self.nan_in_range[0]:
-                landscapes = landscapes[:,1:]
+                landscapes = landscapes[:, 1:]
             if self.nan_in_range[1]:
-                landscapes = landscapes[:,:-1]
-            landscapes = np.sqrt(2)*np.reshape(landscapes, [1, -1])
+                landscapes = landscapes[:, :-1]
+            landscapes = np.sqrt(2) * np.ravel(landscapes)
             Xfit.append(landscapes)
 
         return np.stack(Xfit, axis=0)
@@ -217,16 +207,6 @@ class Silhouette(BaseEstimator, TransformerMixin):
         self.weight, self.resolution, self.sample_range = weight, resolution, sample_range
         self.im_range = None
 
-        from pykeops.numpy import Genred
-        silhouette_formula = "normalized_weights * ReLU(heights - Abs(x_values - midpoints))"
-        variables = [
-            "normalized_weights = Vi(1)",
-            "heights = Vi(1)",
-            "midpoints = Vi(1)",
-            "x_values = Vj(1)",
-        ]
-        self.silhouette = Genred(silhouette_formula, variables, reduction_op="Sum", axis=0)
-
     def fit(self, X, y=None):
         """
         Fit the Silhouette class on a list of persistence diagrams: if any of the values in **sample_range** is numpy.nan, replace it with the corresponding value computed on the given list of persistence diagrams.
@@ -249,19 +229,19 @@ class Silhouette(BaseEstimator, TransformerMixin):
         Returns:
             numpy array with shape (number of diagrams) x (**resolution**): output persistence silhouettes.
         """
-        silhouettes_list = []
+        Xfit = []
         x_values = self.im_range
+
         for i, diag in enumerate(X):
             midpoints, heights = (diag[:, 0] + diag[:, 1]) / 2., (diag[:, 1] - diag[:, 0]) / 2.
             weights = np.array([self.weight(point) for point in diag])
-            weights /= np.sum(weights)
+            total_weight = np.sum(weights)
 
-            silhouettes_list.append(
-                np.sqrt(2) * self.silhouette(weights[:, None], heights[:, None],
-                                        midpoints[:, None], x_values[:, None])[:, 0]
-            )
+            tent_functions = np.maximum(heights[None, :] - np.abs(x_values[:, None] - midpoints[None, :]), 0)
+            silhouette = np.sum(weights[None, :] / total_weight * tent_functions, axis=1)
+            Xfit.append(silhouette * np.sqrt(2))
 
-        return np.stack(silhouettes_list, axis=0)
+        return np.stack(Xfit, axis=0)
 
     def __call__(self, diag):
         """
-- 
cgit v1.2.3


From 26997dfccab9e8340417aaa33cf517af1ec32562 Mon Sep 17 00:00:00 2001
From: albert-github <albert.tests@gmail.com>
Date: Thu, 25 Aug 2022 14:01:50 +0200
Subject: Spelling corrections

Correction of some obvious spelling errors
---
 src/Contraction/example/Rips_contraction.cpp                        | 2 +-
 src/Contraction/include/gudhi/Edge_contraction.h                    | 2 +-
 src/GudhUI/view/Viewer_instructor.h                                 | 2 +-
 src/Persistence_representations/test/persistence_lanscapes_test.cpp | 2 +-
 src/Persistent_cohomology/include/gudhi/Persistent_cohomology.h     | 2 +-
 src/Skeleton_blocker/include/gudhi/Skeleton_blocker_complex.h       | 2 +-
 src/Skeleton_blocker/include/gudhi/Skeleton_blocker_link_complex.h  | 2 +-
 src/cmake/modules/GUDHI_options.cmake                               | 4 ++--
 src/python/gudhi/tensorflow/cubical_layer.py                        | 2 +-
 9 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'src/python/gudhi')

diff --git a/src/Contraction/example/Rips_contraction.cpp b/src/Contraction/example/Rips_contraction.cpp
index 42dd0910..547c290e 100644
--- a/src/Contraction/example/Rips_contraction.cpp
+++ b/src/Contraction/example/Rips_contraction.cpp
@@ -39,7 +39,7 @@ void build_rips(ComplexType& complex, double offset) {
 int main(int argc, char *argv[]) {
   if (argc != 3) {
     std::cerr << "Usage " << argv[0] << " ../../../data/meshes/SO3_10000.off 0.3 to load the file " <<
-        "../../data/SO3_10000.off and contract the Rips complex built with paremeter 0.3.\n";
+        "../../data/SO3_10000.off and contract the Rips complex built with parameter 0.3.\n";
     return -1;
   }
 
diff --git a/src/Contraction/include/gudhi/Edge_contraction.h b/src/Contraction/include/gudhi/Edge_contraction.h
index 0b43c3b3..dff6dc14 100644
--- a/src/Contraction/include/gudhi/Edge_contraction.h
+++ b/src/Contraction/include/gudhi/Edge_contraction.h
@@ -48,7 +48,7 @@ Therefore, the simplification can be done without enumerating the set of simplic
 
 A typical application of this package is homology group computation. It is illustrated in the next figure where a Rips complex is built upon a set of high-dimensional points and
 simplified with edge contractions.
-It has initially a big number of simplices (around 20 millions) but simplifying it to a much reduced form with only 15 vertices (and 714 simplices) takes only few seconds on a desktop machine (see the example bellow).
+It has initially a big number of simplices (around 20 millions) but simplifying it to a much reduced form with only 15 vertices (and 714 simplices) takes only few seconds on a desktop machine (see the example below).
 One can then compute homology group with a simplicial complex having very few simplices instead of running the homology algorithm on the much bigger initial set of 
 simplices which would take much more time and memory.
 
diff --git a/src/GudhUI/view/Viewer_instructor.h b/src/GudhUI/view/Viewer_instructor.h
index 58cbcd31..09ed102f 100644
--- a/src/GudhUI/view/Viewer_instructor.h
+++ b/src/GudhUI/view/Viewer_instructor.h
@@ -11,7 +11,7 @@
 #ifndef VIEW_VIEWER_INSTRUCTOR_H_
 #define VIEW_VIEWER_INSTRUCTOR_H_
 
-// todo do a viewer instructor that have directely a pointer to a QGLviewer and buffer ot not triangles
+// todo do a viewer instructor that has directly a pointer to a QGLviewer and buffer ot not triangles
 
 #include <QFileDialog>
 #include <QKeyEvent>
diff --git a/src/Persistence_representations/test/persistence_lanscapes_test.cpp b/src/Persistence_representations/test/persistence_lanscapes_test.cpp
index 21ef18a0..59924f16 100644
--- a/src/Persistence_representations/test/persistence_lanscapes_test.cpp
+++ b/src/Persistence_representations/test/persistence_lanscapes_test.cpp
@@ -238,7 +238,7 @@ if ( argc != 2 )
         double integral = p.compute_integral_of_landscape();
         cout << "integral : " << integral <<endl;
 
-        //compute integral for each level separatelly
+        //compute integral for each level separately
         for ( size_t level = 0 ; level != p.size() ; ++level )
         {
                 cout << p.compute_integral_of_landscape( level ) << endl;
diff --git a/src/Persistent_cohomology/include/gudhi/Persistent_cohomology.h b/src/Persistent_cohomology/include/gudhi/Persistent_cohomology.h
index 2301a66b..c00bd33d 100644
--- a/src/Persistent_cohomology/include/gudhi/Persistent_cohomology.h
+++ b/src/Persistent_cohomology/include/gudhi/Persistent_cohomology.h
@@ -723,7 +723,7 @@ class Persistent_cohomology {
   boost::disjoint_sets<int *, Simplex_key *> dsets_;
   /* The compressed annotation matrix fields.*/
   Cam cam_;
-  /*  Dictionary establishing the correspondance between the Simplex_key of
+  /*  Dictionary establishing the correspondence between the Simplex_key of
    * the root vertex in the union-find ds and the Simplex_key of the vertex which
    * created the connected component as a 0-dimension homology feature.*/
   std::map<Simplex_key, Simplex_key> zero_cocycles_;
diff --git a/src/Skeleton_blocker/include/gudhi/Skeleton_blocker_complex.h b/src/Skeleton_blocker/include/gudhi/Skeleton_blocker_complex.h
index 8ceaa480..b4ffc756 100644
--- a/src/Skeleton_blocker/include/gudhi/Skeleton_blocker_complex.h
+++ b/src/Skeleton_blocker/include/gudhi/Skeleton_blocker_complex.h
@@ -1291,7 +1291,7 @@ class Skeleton_blocker_complex {
   typedef boost::iterator_range<Complex_neighbors_vertices_iterator> Complex_neighbors_vertices_range;
 
   /**
-   * @brief Returns a Complex_edge_range over all edges of the simplicial complex that passes trough v
+   * @brief Returns a Complex_edge_range over all edges of the simplicial complex that passes through v
    */
   Complex_neighbors_vertices_range vertex_range(Vertex_handle v) const {
     auto begin = Complex_neighbors_vertices_iterator(this, v);
diff --git a/src/Skeleton_blocker/include/gudhi/Skeleton_blocker_link_complex.h b/src/Skeleton_blocker/include/gudhi/Skeleton_blocker_link_complex.h
index a2637da3..b3bf0382 100644
--- a/src/Skeleton_blocker/include/gudhi/Skeleton_blocker_link_complex.h
+++ b/src/Skeleton_blocker/include/gudhi/Skeleton_blocker_link_complex.h
@@ -164,7 +164,7 @@ ComplexType> {
         Vertex_handle y_parent = *parent_complex.get_address(
                                                              this->get_id(*y_link));
         if (parent_complex.contains_edge(x_parent, y_parent)) {
-          // we check that there is no blocker subset of alpha passing trough x and y
+          // we check that there is no blocker subset of alpha passing through x and y
           bool new_edge = true;
           for (auto blocker_parent : parent_complex.const_blocker_range(
                                                                         x_parent)) {
diff --git a/src/cmake/modules/GUDHI_options.cmake b/src/cmake/modules/GUDHI_options.cmake
index 5e28c87d..8379e3c6 100644
--- a/src/cmake/modules/GUDHI_options.cmake
+++ b/src/cmake/modules/GUDHI_options.cmake
@@ -4,7 +4,7 @@ option(WITH_GUDHI_REMOTE_TEST "Activate/deactivate datasets fetching test which
 option(WITH_GUDHI_PYTHON "Activate/deactivate python module compilation and installation" ON)
 option(WITH_GUDHI_TEST "Activate/deactivate examples compilation and installation" ON)
 option(WITH_GUDHI_UTILITIES "Activate/deactivate utilities compilation and installation" ON)
-option(WITH_GUDHI_THIRD_PARTY "Activate/deactivate third party libraries cmake detection. When set to OFF, it is usefull for doxygen or user_version i.e." ON)
+option(WITH_GUDHI_THIRD_PARTY "Activate/deactivate third party libraries cmake detection. When set to OFF, it is useful for doxygen or user_version i.e." ON)
 
 if (NOT WITH_GUDHI_THIRD_PARTY)
   set (WITH_GUDHI_BENCHMARK OFF)
@@ -12,4 +12,4 @@ if (NOT WITH_GUDHI_THIRD_PARTY)
   set (WITH_GUDHI_PYTHON OFF)
   set (WITH_GUDHI_TEST OFF)
   set (WITH_GUDHI_UTILITIES OFF)
-endif()
\ No newline at end of file
+endif()
diff --git a/src/python/gudhi/tensorflow/cubical_layer.py b/src/python/gudhi/tensorflow/cubical_layer.py
index 3304e719..5df2c370 100644
--- a/src/python/gudhi/tensorflow/cubical_layer.py
+++ b/src/python/gudhi/tensorflow/cubical_layer.py
@@ -18,7 +18,7 @@ def _Cubical(Xflat, Xdim, dimensions, homology_coeff_field):
     cc = CubicalComplex(dimensions=Xdim[::-1], top_dimensional_cells=Xflat)
     cc.compute_persistence(homology_coeff_field=homology_coeff_field)
 
-    # Retrieve and ouput image indices/pixels corresponding to positive and negative simplices    
+    # Retrieve and output image indices/pixels corresponding to positive and negative simplices
     cof_pp = cc.cofaces_of_persistence_pairs()
     
     L_cofs = []
-- 
cgit v1.2.3


From d9dfffdb580ab865a829fce851779f33fa47e4f7 Mon Sep 17 00:00:00 2001
From: wreise <wojciech.reise@epfl.ch>
Date: Fri, 7 Oct 2022 18:13:26 +0200
Subject: Add triming of the range; Marcs' comments

---
 src/python/gudhi/representations/vector_methods.py | 31 ++++++++++++++--------
 src/python/test/test_representations.py            |  2 +-
 2 files changed, 21 insertions(+), 12 deletions(-)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 8c8b46db..8114885e 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -123,6 +123,15 @@ def _automatic_sample_range(sample_range, X, y):
                 pass
         return sample_range
 
+
+def trim_on_edges(x, are_endpoints_nan):
+    if are_endpoints_nan[0]:
+        x = x[1:]
+    if are_endpoints_nan[1]:
+        x = x[:-1]
+    return x
+
+
 class Landscape(BaseEstimator, TransformerMixin):
     """
     This is a class for computing persistence landscapes from a list of persistence diagrams. A persistence landscape is a collection of 1D piecewise-linear functions computed from the rank function associated to the persistence diagram. These piecewise-linear functions are then sampled evenly on a given range and the corresponding vectors of samples are concatenated and returned. See http://jmlr.org/papers/v16/bubenik15a.html for more details.
@@ -149,6 +158,8 @@ class Landscape(BaseEstimator, TransformerMixin):
             y (n x 1 array): persistence diagram labels (unused).
         """
         self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
+        self.im_range = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution)
+        self.im_range = trim_on_edges(self.im_range, self.nan_in_range)
         return self
 
     def transform(self, X):
@@ -163,17 +174,13 @@ class Landscape(BaseEstimator, TransformerMixin):
         """
 
         Xfit = []
-        x_values = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution)
+        x_values = self.im_range
         for i, diag in enumerate(X):
             midpoints, heights = (diag[:, 0] + diag[:, 1]) / 2., (diag[:, 1] - diag[:, 0]) / 2.
             tent_functions = np.maximum(heights[None, :] - np.abs(x_values[:, None] - midpoints[None, :]), 0)
             tent_functions.partition(diag.shape[0] - self.num_landscapes, axis=1)
-            landscapes = np.sort(tent_functions, axis=1)[:, -self.num_landscapes:][:, ::-1].T
+            landscapes = tent_functions[:, -self.num_landscapes:][:, ::-1].T
 
-            if self.nan_in_range[0]:
-                landscapes = landscapes[:, 1:]
-            if self.nan_in_range[1]:
-                landscapes = landscapes[:, :-1]
             landscapes = np.sqrt(2) * np.ravel(landscapes)
             Xfit.append(landscapes)
 
@@ -189,7 +196,7 @@ class Landscape(BaseEstimator, TransformerMixin):
         Returns:
             numpy array with shape (number of samples = **num_landscapes** x **resolution**): output persistence landscape.
         """
-        return self.fit_transform([diag])[0,:]
+        return self.fit_transform([diag])[0, :]
 
 class Silhouette(BaseEstimator, TransformerMixin):
     """
@@ -205,7 +212,8 @@ class Silhouette(BaseEstimator, TransformerMixin):
             sample_range ([double, double]): minimum and maximum for the weighted average domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
         """
         self.weight, self.resolution, self.sample_range = weight, resolution, sample_range
-        self.im_range = None
+        self.nan_in_range = np.isnan(np.array(self.sample_range))
+        self.new_resolution = self.resolution + self.nan_in_range.sum()
 
     def fit(self, X, y=None):
         """
@@ -216,7 +224,8 @@ class Silhouette(BaseEstimator, TransformerMixin):
             y (n x 1 array): persistence diagram labels (unused).
         """
         self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
-        self.im_range = np.linspace(self.sample_range[0], self.sample_range[1], self.resolution)
+        self.im_range = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution)
+        self.im_range = trim_on_edges(self.im_range, self.nan_in_range)
         return self
 
     def transform(self, X):
@@ -232,9 +241,9 @@ class Silhouette(BaseEstimator, TransformerMixin):
         Xfit = []
         x_values = self.im_range
 
-        for i, diag in enumerate(X):
+        for diag in X:
             midpoints, heights = (diag[:, 0] + diag[:, 1]) / 2., (diag[:, 1] - diag[:, 0]) / 2.
-            weights = np.array([self.weight(point) for point in diag])
+            weights = np.array([self.weight(pt) for pt in diag])
             total_weight = np.sum(weights)
 
             tent_functions = np.maximum(heights[None, :] - np.abs(x_values[:, None] - midpoints[None, :]), 0)
diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py
index 823e8620..948d7804 100755
--- a/src/python/test/test_representations.py
+++ b/src/python/test/test_representations.py
@@ -209,7 +209,7 @@ def test_silhouette_multiplication_invariance():
 
 def test_silhouette_numeric():
     dgm = np.array([[2., 3.], [5., 6.]])
-    slt = Silhouette(resolution=9, weight=pow(1))
+    slt = Silhouette(resolution=9, weight=pow(1), sample_range=[2., 6.])
     #slt.fit([dgm])
     # x_values = array([2., 2.5, 3., 3.5, 4., 4.5, 5., 5.5, 6.])
 
-- 
cgit v1.2.3


From 059ff0c42a069c744ed121c948bc3d39b5cc7f10 Mon Sep 17 00:00:00 2001
From: wreise <wojciech.reise@epfl.ch>
Date: Fri, 7 Oct 2022 18:24:32 +0200
Subject: Remove enumerate

---
 src/python/gudhi/representations/vector_methods.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 8114885e..5ea4ea48 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -175,7 +175,7 @@ class Landscape(BaseEstimator, TransformerMixin):
 
         Xfit = []
         x_values = self.im_range
-        for i, diag in enumerate(X):
+        for diag in X:
             midpoints, heights = (diag[:, 0] + diag[:, 1]) / 2., (diag[:, 1] - diag[:, 0]) / 2.
             tent_functions = np.maximum(heights[None, :] - np.abs(x_values[:, None] - midpoints[None, :]), 0)
             tent_functions.partition(diag.shape[0] - self.num_landscapes, axis=1)
-- 
cgit v1.2.3


From 4aac9e03c400bd43f237504cf4ff9d25f041e473 Mon Sep 17 00:00:00 2001
From: wreise <wojciech.reise@epfl.ch>
Date: Wed, 12 Oct 2022 15:55:22 +0200
Subject: Clean argpartition

---
 src/python/gudhi/representations/vector_methods.py | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 5ea4ea48..3a91eccd 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -124,7 +124,7 @@ def _automatic_sample_range(sample_range, X, y):
         return sample_range
 
 
-def trim_on_edges(x, are_endpoints_nan):
+def _trim_on_edges(x, are_endpoints_nan):
     if are_endpoints_nan[0]:
         x = x[1:]
     if are_endpoints_nan[1]:
@@ -159,7 +159,7 @@ class Landscape(BaseEstimator, TransformerMixin):
         """
         self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
         self.im_range = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution)
-        self.im_range = trim_on_edges(self.im_range, self.nan_in_range)
+        self.im_range = _trim_on_edges(self.im_range, self.nan_in_range)
         return self
 
     def transform(self, X):
@@ -178,9 +178,17 @@ class Landscape(BaseEstimator, TransformerMixin):
         for diag in X:
             midpoints, heights = (diag[:, 0] + diag[:, 1]) / 2., (diag[:, 1] - diag[:, 0]) / 2.
             tent_functions = np.maximum(heights[None, :] - np.abs(x_values[:, None] - midpoints[None, :]), 0)
-            tent_functions.partition(diag.shape[0] - self.num_landscapes, axis=1)
-            landscapes = tent_functions[:, -self.num_landscapes:][:, ::-1].T
-
+            n_points = diag.shape[0]
+            # Get indices of largest elements: can't take more than n_points - 1 (the last ones are in the right position)
+            argpartition = np.argpartition(-tent_functions, min(self.num_landscapes, n_points-1), axis=1)
+            landscapes = np.take_along_axis(tent_functions, argpartition, axis=1)
+            landscapes = landscapes[:, :min(self.num_landscapes, n_points)].T
+
+            # Complete the array with zeros to get the right number of landscapes
+            if self.num_landscapes > n_points:
+                landscapes = np.concatenate([
+                    landscapes, np.zeros((self.num_landscapes-n_points, *landscapes.shape[1:]))
+                ], axis=0)
             landscapes = np.sqrt(2) * np.ravel(landscapes)
             Xfit.append(landscapes)
 
@@ -225,7 +233,7 @@ class Silhouette(BaseEstimator, TransformerMixin):
         """
         self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
         self.im_range = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution)
-        self.im_range = trim_on_edges(self.im_range, self.nan_in_range)
+        self.im_range = _trim_on_edges(self.im_range, self.nan_in_range)
         return self
 
     def transform(self, X):
-- 
cgit v1.2.3


From e94e490554ce56040b998893977255a704d35e59 Mon Sep 17 00:00:00 2001
From: Marc Glisse <marc.glisse@inria.fr>
Date: Sat, 15 Oct 2022 01:18:56 +0200
Subject: Fix badly centered barcode

---
 src/python/gudhi/persistence_graphical_tools.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/persistence_graphical_tools.py b/src/python/gudhi/persistence_graphical_tools.py
index 21275cdd..e438aa66 100644
--- a/src/python/gudhi/persistence_graphical_tools.py
+++ b/src/python/gudhi/persistence_graphical_tools.py
@@ -194,19 +194,21 @@ def plot_persistence_barcode(
         y=[(death - birth) if death != float("inf") else (infinity - birth) for (dim,(birth,death)) in persistence]
         c=[colormap[dim] for (dim,(birth,death)) in persistence]
 
-        axes.barh(list(reversed(range(len(x)))), y, height=0.8, left=x, alpha=alpha, color=c, linewidth=0)
+        axes.barh(range(len(x)), y, left=x, alpha=alpha, color=c, linewidth=0)
 
         if legend:
-            dimensions = list(set(item[0] for item in persistence))
+            dimensions = set(item[0] for item in persistence)
             axes.legend(
                 handles=[mpatches.Patch(color=colormap[dim], label=str(dim)) for dim in dimensions], loc="lower right",
             )
 
         axes.set_title("Persistence barcode", fontsize=fontsize)
+        axes.set_yticks([])
+        axes.invert_yaxis()
 
         # Ends plot on infinity value and starts a little bit before min_birth
         if len(x) != 0:
-            axes.axis([axis_start, infinity, 0, len(x)])
+            axes.set_xlim((axis_start, infinity))
         return axes
 
     except ImportError as import_error:
-- 
cgit v1.2.3


From 74617f0673aa13bce47833c366321a8838a7d123 Mon Sep 17 00:00:00 2001
From: wreise <wojciech.reise@epfl.ch>
Date: Sat, 15 Oct 2022 17:00:25 +0200
Subject: Remove old lines

---
 src/python/gudhi/representations/vector_methods.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 3a91eccd..6267e077 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -179,10 +179,8 @@ class Landscape(BaseEstimator, TransformerMixin):
             midpoints, heights = (diag[:, 0] + diag[:, 1]) / 2., (diag[:, 1] - diag[:, 0]) / 2.
             tent_functions = np.maximum(heights[None, :] - np.abs(x_values[:, None] - midpoints[None, :]), 0)
             n_points = diag.shape[0]
-            # Get indices of largest elements: can't take more than n_points - 1 (the last ones are in the right position)
-            argpartition = np.argpartition(-tent_functions, min(self.num_landscapes, n_points-1), axis=1)
-            landscapes = np.take_along_axis(tent_functions, argpartition, axis=1)
-            landscapes = landscapes[:, :min(self.num_landscapes, n_points)].T
+            tent_functions.partition(n_points-self.num_landscapes, axis=1)
+            landscapes = np.sort(tent_functions[:, -self.num_landscapes:], axis=1)[:, ::-1].T
 
             # Complete the array with zeros to get the right number of landscapes
             if self.num_landscapes > n_points:
-- 
cgit v1.2.3


From cd7dea8627f4b1c624e88d5ff28b32d1602f5e39 Mon Sep 17 00:00:00 2001
From: wreise <wojciech.reise@epfl.ch>
Date: Sat, 15 Oct 2022 18:45:42 +0200
Subject: Treat the case when there are less points than landscape layers

---
 src/python/gudhi/representations/vector_methods.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 6267e077..a169aee8 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -179,14 +179,15 @@ class Landscape(BaseEstimator, TransformerMixin):
             midpoints, heights = (diag[:, 0] + diag[:, 1]) / 2., (diag[:, 1] - diag[:, 0]) / 2.
             tent_functions = np.maximum(heights[None, :] - np.abs(x_values[:, None] - midpoints[None, :]), 0)
             n_points = diag.shape[0]
-            tent_functions.partition(n_points-self.num_landscapes, axis=1)
-            landscapes = np.sort(tent_functions[:, -self.num_landscapes:], axis=1)[:, ::-1].T
-
             # Complete the array with zeros to get the right number of landscapes
             if self.num_landscapes > n_points:
-                landscapes = np.concatenate([
-                    landscapes, np.zeros((self.num_landscapes-n_points, *landscapes.shape[1:]))
-                ], axis=0)
+                tent_functions = np.concatenate(
+                    [tent_functions, np.zeros((tent_functions.shape[0], self.num_landscapes-n_points))],
+                    axis=1
+                )
+            tent_functions.partition(tent_functions.shape[1]-self.num_landscapes, axis=1)
+            landscapes = np.sort(tent_functions[:, -self.num_landscapes:], axis=1)[:, ::-1].T
+
             landscapes = np.sqrt(2) * np.ravel(landscapes)
             Xfit.append(landscapes)
 
-- 
cgit v1.2.3


From 19412d57d281acfd2d14efd15764e45da837b87a Mon Sep 17 00:00:00 2001
From: Marc Glisse <marc.glisse@inria.fr>
Date: Sun, 16 Oct 2022 23:03:30 +0200
Subject: doc + comments

---
 src/python/gudhi/simplex_tree.pyx | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx
index 6b1b5c00..372cb15c 100644
--- a/src/python/gudhi/simplex_tree.pyx
+++ b/src/python/gudhi/simplex_tree.pyx
@@ -272,12 +272,14 @@ cdef class SimplexTree:
         """Inserts edges given by a sparse matrix in `COOrdinate format
         <https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.coo_matrix.html>`_.
         If an edge is repeated, the smallest filtration value is used. Missing entries are not inserted.
-        Diagonal entries are interpreted as vertices, although this is only useful if you want to insert
-        vertices with a smaller filtration value than the smallest edge containing it, since vertices are
-        implicitly inserted together with the edges.
+        Diagonal entries are currently interpreted as vertices, although we do not guarantee this behavior
+        in the future, and this is only useful if you want to insert vertices with a smaller filtration value
+        than the smallest edge containing it, since vertices are implicitly inserted together with the edges.
 
         :param edges: the edges to insert and their filtration values.
         :type edges: scipy.sparse.coo_matrix of shape (n,n)
+
+        .. seealso:: :func:`insert_batch`
         """
         # TODO: optimize this?
         for edge in zip(edges.row, edges.col, edges.data):
@@ -299,6 +301,8 @@ cdef class SimplexTree:
         :param filtrations: the filtration values.
         :type filtrations: numpy.array of shape (n,)
         """
+        # This may be slow if we end up inserting vertices in a bad order (flat_map).
+        # We could first insert the vertices from np.unique(vertex_array), or leave it to the caller.
         cdef Py_ssize_t k = vertex_array.shape[0]
         cdef Py_ssize_t n = vertex_array.shape[1]
         assert filtrations.shape[0] == n, 'inconsistent sizes for vertex_array and filtrations'
-- 
cgit v1.2.3


From 26d7bcc518f3bdc9b0d8f854f2879ed9c219e440 Mon Sep 17 00:00:00 2001
From: Marc Glisse <marc.glisse@inria.fr>
Date: Thu, 3 Nov 2022 14:56:34 +0100
Subject: Translate n_jobs to workers for SciPy

---
 src/python/gudhi/point_cloud/knn.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py
index de5844f9..7dc83817 100644
--- a/src/python/gudhi/point_cloud/knn.py
+++ b/src/python/gudhi/point_cloud/knn.py
@@ -314,7 +314,9 @@ class KNearestNeighbors:
             return None
 
         if self.params["implementation"] == "ckdtree":
-            qargs = {key: val for key, val in self.params.items() if key in {"p", "eps", "n_jobs"}}
+            qargs = {key: val for key, val in self.params.items() if key in {"p", "eps"}}
+            # SciPy renamed n_jobs to workers
+            qargs["workers"] = self.params.get("workers") or self.params.get("n_jobs") or 1
             distances, neighbors = self.kdtree.query(X, k=self.k, **qargs)
             if k == 1:
                 # SciPy decided to squeeze the last dimension for k=1
-- 
cgit v1.2.3


From 2ebdeb905d3ca90e2ba2d24e6d3aac52240f6c86 Mon Sep 17 00:00:00 2001
From: Marc Glisse <marc.glisse@inria.fr>
Date: Fri, 4 Nov 2022 14:05:42 +0100
Subject: More consistent choice of a grid for diagram representations

---
 src/python/gudhi/representations/vector_methods.py | 46 +++++++++++++++-------
 src/python/test/test_representations.py            | 12 ++++++
 2 files changed, 44 insertions(+), 14 deletions(-)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index a169aee8..212fa9f5 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -124,7 +124,7 @@ def _automatic_sample_range(sample_range, X, y):
         return sample_range
 
 
-def _trim_on_edges(x, are_endpoints_nan):
+def _trim_endpoints(x, are_endpoints_nan):
     if are_endpoints_nan[0]:
         x = x[1:]
     if are_endpoints_nan[1]:
@@ -136,7 +136,7 @@ class Landscape(BaseEstimator, TransformerMixin):
     """
     This is a class for computing persistence landscapes from a list of persistence diagrams. A persistence landscape is a collection of 1D piecewise-linear functions computed from the rank function associated to the persistence diagram. These piecewise-linear functions are then sampled evenly on a given range and the corresponding vectors of samples are concatenated and returned. See http://jmlr.org/papers/v16/bubenik15a.html for more details.
     """
-    def __init__(self, num_landscapes=5, resolution=100, sample_range=[np.nan, np.nan]):
+    def __init__(self, num_landscapes=5, resolution=100, sample_range=[np.nan, np.nan], *, keep_endpoints=False):
         """
         Constructor for the Landscape class.
 
@@ -144,10 +144,14 @@ class Landscape(BaseEstimator, TransformerMixin):
             num_landscapes (int): number of piecewise-linear functions to output (default 5).
             resolution (int): number of sample for all piecewise-linear functions (default 100).
             sample_range ([double, double]): minimum and maximum of all piecewise-linear function domains, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
+            keep_endpoints (bool): when guessing `sample_range`, use the exact extremities (where the value is always 0). This is mostly useful for plotting, the default is to use a slightly smaller range.
         """
         self.num_landscapes, self.resolution, self.sample_range = num_landscapes, resolution, sample_range
         self.nan_in_range = np.isnan(np.array(self.sample_range))
-        self.new_resolution = self.resolution + self.nan_in_range.sum()
+        self.new_resolution = self.resolution
+        if not keep_endpoints:
+            self.new_resolution += self.nan_in_range.sum()
+        self.keep_endpoints = keep_endpoints
 
     def fit(self, X, y=None):
         """
@@ -158,8 +162,9 @@ class Landscape(BaseEstimator, TransformerMixin):
             y (n x 1 array): persistence diagram labels (unused).
         """
         self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
-        self.im_range = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution)
-        self.im_range = _trim_on_edges(self.im_range, self.nan_in_range)
+        self.grid_ = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution)
+        if not self.keep_endpoints:
+            self.grid_ = _trim_endpoints(self.grid_, self.nan_in_range)
         return self
 
     def transform(self, X):
@@ -174,7 +179,7 @@ class Landscape(BaseEstimator, TransformerMixin):
         """
 
         Xfit = []
-        x_values = self.im_range
+        x_values = self.grid_
         for diag in X:
             midpoints, heights = (diag[:, 0] + diag[:, 1]) / 2., (diag[:, 1] - diag[:, 0]) / 2.
             tent_functions = np.maximum(heights[None, :] - np.abs(x_values[:, None] - midpoints[None, :]), 0)
@@ -209,7 +214,7 @@ class Silhouette(BaseEstimator, TransformerMixin):
     """
     This is a class for computing persistence silhouettes from a list of persistence diagrams. A persistence silhouette is computed by taking a weighted average of the collection of 1D piecewise-linear functions given by the persistence landscapes, and then by evenly sampling this average on a given range. Finally, the corresponding vector of samples is returned. See https://arxiv.org/abs/1312.0308 for more details.
     """
-    def __init__(self, weight=lambda x: 1, resolution=100, sample_range=[np.nan, np.nan]):
+    def __init__(self, weight=lambda x: 1, resolution=100, sample_range=[np.nan, np.nan], *, keep_endpoints=False):
         """
         Constructor for the Silhouette class.
 
@@ -217,10 +222,14 @@ class Silhouette(BaseEstimator, TransformerMixin):
             weight (function): weight function for the persistence diagram points (default constant function, ie lambda x: 1). This function must be defined on 2D points, ie on lists or numpy arrays of the form [p_x,p_y].
             resolution (int): number of samples for the weighted average (default 100).
             sample_range ([double, double]): minimum and maximum for the weighted average domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
+            keep_endpoints (bool): when guessing `sample_range`, use the exact extremities (where the value is always 0). This is mostly useful for plotting, the default is to use a slightly smaller range.
         """
         self.weight, self.resolution, self.sample_range = weight, resolution, sample_range
         self.nan_in_range = np.isnan(np.array(self.sample_range))
-        self.new_resolution = self.resolution + self.nan_in_range.sum()
+        self.new_resolution = self.resolution
+        if not keep_endpoints:
+            self.new_resolution += self.nan_in_range.sum()
+        self.keep_endpoints = keep_endpoints
 
     def fit(self, X, y=None):
         """
@@ -231,8 +240,9 @@ class Silhouette(BaseEstimator, TransformerMixin):
             y (n x 1 array): persistence diagram labels (unused).
         """
         self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
-        self.im_range = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution)
-        self.im_range = _trim_on_edges(self.im_range, self.nan_in_range)
+        self.grid_ = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution)
+        if not self.keep_endpoints:
+            self.grid_ = _trim_endpoints(self.grid_, self.nan_in_range)
         return self
 
     def transform(self, X):
@@ -246,7 +256,7 @@ class Silhouette(BaseEstimator, TransformerMixin):
             numpy array with shape (number of diagrams) x (**resolution**): output persistence silhouettes.
         """
         Xfit = []
-        x_values = self.im_range
+        x_values = self.grid_
 
         for diag in X:
             midpoints, heights = (diag[:, 0] + diag[:, 1]) / 2., (diag[:, 1] - diag[:, 0]) / 2.
@@ -277,14 +287,15 @@ class BettiCurve(BaseEstimator, TransformerMixin):
     Compute Betti curves from persistence diagrams. There are several modes of operation: with a given resolution (with or without a sample_range), with a predefined grid, and with none of the previous. With a predefined grid, the class computes the Betti numbers at those grid points. Without a predefined grid, if the resolution is set to None, it can be fit to a list of persistence diagrams and produce a grid that consists of (at least) the filtration values at which at least one of those persistence diagrams changes Betti numbers, and then compute the Betti numbers at those grid points. In the latter mode, the exact Betti curve is computed for the entire real line. Otherwise, if the resolution is given, the Betti curve is obtained by sampling evenly using either the given sample_range or based on the persistence diagrams.
     """
 
-    def __init__(self, resolution=100, sample_range=[np.nan, np.nan], predefined_grid=None):
+    def __init__(self, resolution=100, sample_range=[np.nan, np.nan], predefined_grid=None, *, keep_endpoints=False):
         """
         Constructor for the BettiCurve class.
 
         Parameters:
-            resolution (int): number of sample for the piecewise-constant function (default 100).
+            resolution (int): number of samples for the piecewise-constant function (default 100), or None for the exact curve.
             sample_range ([double, double]): minimum and maximum of the piecewise-constant function domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
             predefined_grid (1d array or None, default=None): Predefined filtration grid points at which to compute the Betti curves. Must be strictly ordered. Infinities are ok. If None (default), and resolution is given, the grid will be uniform from x_min to x_max in 'resolution' steps, otherwise a grid will be computed that captures all changes in Betti numbers in the provided data.
+            keep_endpoints (bool): when guessing `sample_range` (fixed `resolution`, no `predefined_grid`), use the exact extremities. This is mostly useful for plotting, the default is to use a slightly smaller range.
 
         Attributes:
             grid_ (1d array): The grid on which the Betti numbers are computed. If predefined_grid was specified, `grid_` will always be that grid, independently of data. If not, the grid is fitted to capture all filtration values at which the Betti numbers change.
@@ -313,6 +324,7 @@ class BettiCurve(BaseEstimator, TransformerMixin):
         self.predefined_grid = predefined_grid
         self.resolution = resolution
         self.sample_range = sample_range
+        self.keep_endpoints = keep_endpoints
 
     def is_fitted(self):
         return hasattr(self, "grid_")
@@ -331,8 +343,14 @@ class BettiCurve(BaseEstimator, TransformerMixin):
                 events = np.unique(np.concatenate([pd.flatten() for pd in X] + [[-np.inf]], axis=0))
                 self.grid_ = np.array(events)
             else:
+                self.nan_in_range = np.isnan(np.array(self.sample_range))
+                self.new_resolution = self.resolution
+                if not self.keep_endpoints:
+                    self.new_resolution += self.nan_in_range.sum()
                 self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
-                self.grid_ = np.linspace(self.sample_range[0], self.sample_range[1], self.resolution)
+                self.grid_ = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution)
+                if not self.keep_endpoints:
+                    self.grid_ = _trim_endpoints(self.grid_, self.nan_in_range)
         else:
             self.grid_ = self.predefined_grid # Get the predefined grid from user
 
diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py
index 58caab21..9e94feeb 100755
--- a/src/python/test/test_representations.py
+++ b/src/python/test/test_representations.py
@@ -251,3 +251,15 @@ def test_landscape_nan_range():
     lds_dgm = lds(dgm)
     assert (lds.sample_range[0] == 2) & (lds.sample_range[1] == 6)
     assert lds.new_resolution == 10
+
+def test_endpoints():
+    diags = [ np.array([[2., 3.]]) ]
+    for vec in [ Landscape(), Silhouette(), BettiCurve() ]:
+        vec.fit(diags)
+        assert vec.grid_[0] > 2 and vec.grid_[-1] < 3
+    for vec in [ Landscape(keep_endpoints=True), Silhouette(keep_endpoints=True), BettiCurve(keep_endpoints=True) ]:
+        vec.fit(diags)
+        assert vec.grid_[0] == 2 and vec.grid_[-1] == 3
+    vec = BettiCurve(resolution=None)
+    vec.fit(diags)
+    assert np.equal(vec.grid_, [-np.inf, 2., 3.]).all()
-- 
cgit v1.2.3


From 8804aa1580c500ed927d65c25e8b78700725338e Mon Sep 17 00:00:00 2001
From: Marc Glisse <marc.glisse@inria.fr>
Date: Tue, 8 Nov 2022 22:26:36 +0100
Subject: Clarify doc of RipsComplex.__init__

---
 src/python/gudhi/rips_complex.pyx | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/rips_complex.pyx b/src/python/gudhi/rips_complex.pyx
index c3470292..a0924cd6 100644
--- a/src/python/gudhi/rips_complex.pyx
+++ b/src/python/gudhi/rips_complex.pyx
@@ -45,20 +45,19 @@ cdef class RipsComplex:
                  max_edge_length=float('inf'), sparse=None):
         """RipsComplex constructor.
 
-        :param max_edge_length: Rips value.
-        :type max_edge_length: float
-
         :param points: A list of points in d-Dimension.
-        :type points: list of list of float
+        :type points: List[List[float]]
 
         Or
 
         :param distance_matrix: A distance matrix (full square or lower
             triangular).
-        :type points: list of list of float
+        :type distance_matrix: List[List[float]]
 
         And in both cases
 
+        :param max_edge_length: Rips value.
+        :type max_edge_length: float
         :param sparse: If this is not None, it switches to building a sparse
             Rips and represents the approximation parameter epsilon.
         :type sparse: float
-- 
cgit v1.2.3


From 7c17408897a95a1f74626e8ff0ec8101ac4f92fd Mon Sep 17 00:00:00 2001
From: Marc Glisse <marc.glisse@inria.fr>
Date: Tue, 8 Nov 2022 22:36:16 +0100
Subject: Reject positional arguments in RipsComplex.__init__

---
 .github/next_release.md                    | 3 +++
 src/python/gudhi/rips_complex.pyx          | 4 ++--
 src/python/test/test_simplex_generators.py | 2 +-
 3 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'src/python/gudhi')

diff --git a/.github/next_release.md b/.github/next_release.md
index 81599b2c..d5fcef1c 100644
--- a/.github/next_release.md
+++ b/.github/next_release.md
@@ -9,6 +9,9 @@ Below is a list of changes made since GUDHI 3.6.0:
 - [Module](link)
      - ...
 
+- [Rips complex](https://gudhi.inria.fr/python/latest/rips_complex_user.html)
+     - Construction now rejects positional arguments, you need to specify `points=X`.
+
 - Installation
      - c++17 is the new minimal standard to compile the library. This implies Visual Studio minimal version is now 2017.
 
diff --git a/src/python/gudhi/rips_complex.pyx b/src/python/gudhi/rips_complex.pyx
index a0924cd6..d748f91e 100644
--- a/src/python/gudhi/rips_complex.pyx
+++ b/src/python/gudhi/rips_complex.pyx
@@ -41,7 +41,7 @@ cdef class RipsComplex:
     cdef Rips_complex_interface thisref
 
     # Fake constructor that does nothing but documenting the constructor
-    def __init__(self, points=None, distance_matrix=None,
+    def __init__(self, *, points=None, distance_matrix=None,
                  max_edge_length=float('inf'), sparse=None):
         """RipsComplex constructor.
 
@@ -64,7 +64,7 @@ cdef class RipsComplex:
         """
 
     # The real cython constructor
-    def __cinit__(self, points=None, distance_matrix=None,
+    def __cinit__(self, *, points=None, distance_matrix=None,
                   max_edge_length=float('inf'), sparse=None):
         if sparse is not None:
           if distance_matrix is not None:
diff --git a/src/python/test/test_simplex_generators.py b/src/python/test/test_simplex_generators.py
index 8a9b4844..c567d4c1 100755
--- a/src/python/test/test_simplex_generators.py
+++ b/src/python/test/test_simplex_generators.py
@@ -14,7 +14,7 @@ import numpy as np
 
 def test_flag_generators():
     pts = np.array([[0, 0], [0, 1.01], [1, 0], [1.02, 1.03], [100, 0], [100, 3.01], [103, 0], [103.02, 3.03]])
-    r = gudhi.RipsComplex(pts, max_edge_length=4)
+    r = gudhi.RipsComplex(points=pts, max_edge_length=4)
     st = r.create_simplex_tree(max_dimension=50)
     st.persistence()
     g = st.flag_persistence_generators()
-- 
cgit v1.2.3


From ad1123c3c7cfddc1c15e9933b96af08ef3398b3c Mon Sep 17 00:00:00 2001
From: Marc Glisse <marc.glisse@inria.fr>
Date: Wed, 9 Nov 2022 17:46:39 +0100
Subject: New write_points_to_off_file

---
 src/python/CMakeLists.txt           |   4 +-
 src/python/doc/installation.rst     |   4 +-
 src/python/doc/point_cloud.rst      |   5 ++
 src/python/gudhi/off_reader.pyx     |  41 --------------
 src/python/gudhi/off_utils.pyx      |  57 ++++++++++++++++++++
 src/python/test/test_subsampling.py | 103 ++++++++----------------------------
 6 files changed, 88 insertions(+), 126 deletions(-)
 delete mode 100644 src/python/gudhi/off_reader.pyx
 create mode 100644 src/python/gudhi/off_utils.pyx

(limited to 'src/python/gudhi')

diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt
index 8f8df138..35ddb778 100644
--- a/src/python/CMakeLists.txt
+++ b/src/python/CMakeLists.txt
@@ -53,7 +53,7 @@ if(PYTHONINTERP_FOUND)
     set(GUDHI_PYTHON_MODULES_EXTRA "${GUDHI_PYTHON_MODULES_EXTRA}'datasets', ")
 
     # Cython modules
-    set(GUDHI_PYTHON_MODULES "${GUDHI_PYTHON_MODULES}'off_reader', ")
+    set(GUDHI_PYTHON_MODULES "${GUDHI_PYTHON_MODULES}'off_utils', ")
     set(GUDHI_PYTHON_MODULES "${GUDHI_PYTHON_MODULES}'simplex_tree', ")
     set(GUDHI_PYTHON_MODULES "${GUDHI_PYTHON_MODULES}'rips_complex', ")
     set(GUDHI_PYTHON_MODULES "${GUDHI_PYTHON_MODULES}'cubical_complex', ")
@@ -152,7 +152,7 @@ if(PYTHONINTERP_FOUND)
       set(GUDHI_PYTHON_EXTRA_COMPILE_ARGS "${GUDHI_PYTHON_EXTRA_COMPILE_ARGS}'-DCGAL_EIGEN3_ENABLED', ")
     endif (EIGEN3_FOUND)
 
-    set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'off_reader', ")
+    set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'off_utils', ")
     set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'simplex_tree', ")
     set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'rips_complex', ")
     set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'cubical_complex', ")
diff --git a/src/python/doc/installation.rst b/src/python/doc/installation.rst
index 276ac4e2..5491542f 100644
--- a/src/python/doc/installation.rst
+++ b/src/python/doc/installation.rst
@@ -150,7 +150,7 @@ You shall have something like:
     Cython version 0.29.25 
     Numpy version 1.21.4 
     Boost version 1.77.0
-    + Installed modules are: off_reader;simplex_tree;rips_complex;cubical_complex;periodic_cubical_complex;
+    + Installed modules are: off_utils;simplex_tree;rips_complex;cubical_complex;periodic_cubical_complex;
         persistence_graphical_tools;reader_utils;witness_complex;strong_witness_complex;
     + Missing modules are: bottleneck;nerve_gic;subsampling;tangential_complex;alpha_complex;euclidean_witness_complex;
         euclidean_strong_witness_complex;
@@ -188,7 +188,7 @@ A complete configuration would be :
     GMPXX_LIBRARIES = /usr/lib/x86_64-linux-gnu/libgmpxx.so
     MPFR_LIBRARIES = /usr/lib/x86_64-linux-gnu/libmpfr.so
     TBB version 9107 found and used
-    + Installed modules are: bottleneck;off_reader;simplex_tree;rips_complex;cubical_complex;periodic_cubical_complex;
+    + Installed modules are: bottleneck;off_utils;simplex_tree;rips_complex;cubical_complex;periodic_cubical_complex;
         persistence_graphical_tools;reader_utils;witness_complex;strong_witness_complex;nerve_gic;subsampling;
         tangential_complex;alpha_complex;euclidean_witness_complex;euclidean_strong_witness_complex;
     + Missing modules are: 
diff --git a/src/python/doc/point_cloud.rst b/src/python/doc/point_cloud.rst
index ffd8f85b..473b303f 100644
--- a/src/python/doc/point_cloud.rst
+++ b/src/python/doc/point_cloud.rst
@@ -13,6 +13,11 @@ File Readers
 
 .. autofunction:: gudhi.read_lower_triangular_matrix_from_csv_file
 
+File Writers
+------------
+
+.. autofunction:: gudhi.write_points_to_off_file
+
 Subsampling
 -----------
 
diff --git a/src/python/gudhi/off_reader.pyx b/src/python/gudhi/off_reader.pyx
deleted file mode 100644
index a3200704..00000000
--- a/src/python/gudhi/off_reader.pyx
+++ /dev/null
@@ -1,41 +0,0 @@
-# This file is part of the Gudhi Library - https://gudhi.inria.fr/ -
-# which is released under MIT.
-# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full
-# license details.
-# Author(s):       Vincent Rouvreau
-#
-# Copyright (C) 2016 Inria
-#
-# Modification(s):
-#   - YYYY/MM Author: Description of the modification
-
-from __future__ import print_function
-from cython cimport numeric
-from libcpp.vector cimport vector
-from libcpp.string cimport string
-import errno
-import os
-
-__author__ = "Vincent Rouvreau"
-__copyright__ = "Copyright (C) 2016 Inria"
-__license__ = "MIT"
-
-cdef extern from "Off_reader_interface.h" namespace "Gudhi":
-    vector[vector[double]] read_points_from_OFF_file(string off_file)
-
-def read_points_from_off_file(off_file=''):
-    """Read points from OFF file.
-
-    :param off_file: An OFF file style name.
-    :type off_file: string
-
-    :returns:  The point set.
-    :rtype: List[List[float]]
-    """
-    if off_file:
-        if os.path.isfile(off_file):
-            return read_points_from_OFF_file(off_file.encode('utf-8'))
-        else:
-            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT),
-                                    off_file)
-
diff --git a/src/python/gudhi/off_utils.pyx b/src/python/gudhi/off_utils.pyx
new file mode 100644
index 00000000..155575d5
--- /dev/null
+++ b/src/python/gudhi/off_utils.pyx
@@ -0,0 +1,57 @@
+# This file is part of the Gudhi Library - https://gudhi.inria.fr/ -
+# which is released under MIT.
+# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full
+# license details.
+# Author(s):       Vincent Rouvreau
+#
+# Copyright (C) 2016 Inria
+#
+# Modification(s):
+#   - YYYY/MM Author: Description of the modification
+
+from __future__ import print_function
+from cython cimport numeric
+from libcpp.vector cimport vector
+from libcpp.string cimport string
+cimport cython
+import errno
+import os
+import numpy as np
+
+__author__ = "Vincent Rouvreau"
+__copyright__ = "Copyright (C) 2016 Inria"
+__license__ = "MIT"
+
+cdef extern from "Off_reader_interface.h" namespace "Gudhi":
+    vector[vector[double]] read_points_from_OFF_file(string off_file)
+
+def read_points_from_off_file(off_file=''):
+    """Read points from OFF file.
+
+    :param off_file: An OFF file style name.
+    :type off_file: string
+
+    :returns:  The point set.
+    :rtype: List[List[float]]
+    """
+    if off_file:
+        if os.path.isfile(off_file):
+            return read_points_from_OFF_file(off_file.encode('utf-8'))
+        else:
+            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT),
+                                    off_file)
+
+@cython.embedsignature(True)
+def write_points_to_off_file(fname, points):
+    """Write points to an OFF file.
+
+    A simple wrapper for `numpy.savetxt`.
+
+    :param fname: Name of the OFF file.
+    :type fname: str or file handle
+    :param points: Point coordinates.
+    :type points: numpy array of shape (n, dim)
+    """
+    points = np.array(points, copy=False)
+    assert len(points.shape) == 2
+    np.savetxt(fname, points, header='nOFF\n{} {} 0 0'.format(points.shape[1], points.shape[0]), comments='')
diff --git a/src/python/test/test_subsampling.py b/src/python/test/test_subsampling.py
index 3431f372..c1cb4e3f 100755
--- a/src/python/test/test_subsampling.py
+++ b/src/python/test/test_subsampling.py
@@ -16,17 +16,9 @@ __license__ = "MIT"
 
 
 def test_write_off_file_for_tests():
-    file = open("subsample.off", "w")
-    file.write("nOFF\n")
-    file.write("2 7 0 0\n")
-    file.write("1.0 1.0\n")
-    file.write("7.0 0.0\n")
-    file.write("4.0 6.0\n")
-    file.write("9.0 6.0\n")
-    file.write("0.0 14.0\n")
-    file.write("2.0 19.0\n")
-    file.write("9.0 17.0\n")
-    file.close()
+    gudhi.write_points_to_off_file(
+        "subsample.off", [[1.0, 1.0], [7.0, 0.0], [4.0, 6.0], [9.0, 6.0], [0.0, 14.0], [2.0, 19.0], [9.0, 17.0]]
+    )
 
 
 def test_simple_choose_n_farthest_points_with_a_starting_point():
@@ -34,54 +26,29 @@ def test_simple_choose_n_farthest_points_with_a_starting_point():
     i = 0
     for point in point_set:
         # The iteration starts with the given starting point
-        sub_set = gudhi.choose_n_farthest_points(
-            points=point_set, nb_points=1, starting_point=i
-        )
+        sub_set = gudhi.choose_n_farthest_points(points=point_set, nb_points=1, starting_point=i)
         assert sub_set[0] == point_set[i]
         i = i + 1
 
     # The iteration finds then the farthest
-    sub_set = gudhi.choose_n_farthest_points(
-        points=point_set, nb_points=2, starting_point=1
-    )
+    sub_set = gudhi.choose_n_farthest_points(points=point_set, nb_points=2, starting_point=1)
     assert sub_set[1] == point_set[3]
-    sub_set = gudhi.choose_n_farthest_points(
-        points=point_set, nb_points=2, starting_point=3
-    )
+    sub_set = gudhi.choose_n_farthest_points(points=point_set, nb_points=2, starting_point=3)
     assert sub_set[1] == point_set[1]
-    sub_set = gudhi.choose_n_farthest_points(
-        points=point_set, nb_points=2, starting_point=0
-    )
+    sub_set = gudhi.choose_n_farthest_points(points=point_set, nb_points=2, starting_point=0)
     assert sub_set[1] == point_set[2]
-    sub_set = gudhi.choose_n_farthest_points(
-        points=point_set, nb_points=2, starting_point=2
-    )
+    sub_set = gudhi.choose_n_farthest_points(points=point_set, nb_points=2, starting_point=2)
     assert sub_set[1] == point_set[0]
 
     # Test the limits
-    assert (
-        gudhi.choose_n_farthest_points(points=[], nb_points=0, starting_point=0) == []
-    )
-    assert (
-        gudhi.choose_n_farthest_points(points=[], nb_points=1, starting_point=0) == []
-    )
-    assert (
-        gudhi.choose_n_farthest_points(points=[], nb_points=0, starting_point=1) == []
-    )
-    assert (
-        gudhi.choose_n_farthest_points(points=[], nb_points=1, starting_point=1) == []
-    )
+    assert gudhi.choose_n_farthest_points(points=[], nb_points=0, starting_point=0) == []
+    assert gudhi.choose_n_farthest_points(points=[], nb_points=1, starting_point=0) == []
+    assert gudhi.choose_n_farthest_points(points=[], nb_points=0, starting_point=1) == []
+    assert gudhi.choose_n_farthest_points(points=[], nb_points=1, starting_point=1) == []
 
     # From off file test
     for i in range(0, 7):
-        assert (
-            len(
-                gudhi.choose_n_farthest_points(
-                    off_file="subsample.off", nb_points=i, starting_point=i
-                )
-            )
-            == i
-        )
+        assert len(gudhi.choose_n_farthest_points(off_file="subsample.off", nb_points=i, starting_point=i)) == i
 
 
 def test_simple_choose_n_farthest_points_randomed():
@@ -104,10 +71,7 @@ def test_simple_choose_n_farthest_points_randomed():
 
     # From off file test
     for i in range(0, 7):
-        assert (
-            len(gudhi.choose_n_farthest_points(off_file="subsample.off", nb_points=i))
-            == i
-        )
+        assert len(gudhi.choose_n_farthest_points(off_file="subsample.off", nb_points=i)) == i
 
 
 def test_simple_pick_n_random_points():
@@ -130,9 +94,7 @@ def test_simple_pick_n_random_points():
 
     # From off file test
     for i in range(0, 7):
-        assert (
-            len(gudhi.pick_n_random_points(off_file="subsample.off", nb_points=i)) == i
-        )
+        assert len(gudhi.pick_n_random_points(off_file="subsample.off", nb_points=i)) == i
 
 
 def test_simple_sparsify_points():
@@ -152,31 +114,10 @@ def test_simple_sparsify_points():
     ]
     assert gudhi.sparsify_point_set(points=point_set, min_squared_dist=2.001) == [[0, 1]]
 
-    assert (
-        len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=0.0))
-        == 7
-    )
-    assert (
-        len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=30.0))
-        == 5
-    )
-    assert (
-        len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=40.1))
-        == 4
-    )
-    assert (
-        len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=89.9))
-        == 3
-    )
-    assert (
-        len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=100.0))
-        == 2
-    )
-    assert (
-        len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=324.9))
-        == 2
-    )
-    assert (
-        len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=325.01))
-        == 1
-    )
+    assert len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=0.0)) == 7
+    assert len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=30.0)) == 5
+    assert len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=40.1)) == 4
+    assert len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=89.9)) == 3
+    assert len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=100.0)) == 2
+    assert len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=324.9)) == 2
+    assert len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=325.01)) == 1
-- 
cgit v1.2.3


From 4118bcb622c624130e768d9116a7e147a5e45c68 Mon Sep 17 00:00:00 2001
From: Marc Glisse <marc.glisse@inria.fr>
Date: Fri, 11 Nov 2022 18:00:42 +0100
Subject: Link to OFF format

---
 src/python/gudhi/off_utils.pyx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/off_utils.pyx b/src/python/gudhi/off_utils.pyx
index 155575d5..a8142791 100644
--- a/src/python/gudhi/off_utils.pyx
+++ b/src/python/gudhi/off_utils.pyx
@@ -26,7 +26,7 @@ cdef extern from "Off_reader_interface.h" namespace "Gudhi":
     vector[vector[double]] read_points_from_OFF_file(string off_file)
 
 def read_points_from_off_file(off_file=''):
-    """Read points from OFF file.
+    """Read points from an `OFF file <fileformats.html#off-file-format>`_.
 
     :param off_file: An OFF file style name.
     :type off_file: string
@@ -43,7 +43,7 @@ def read_points_from_off_file(off_file=''):
 
 @cython.embedsignature(True)
 def write_points_to_off_file(fname, points):
-    """Write points to an OFF file.
+    """Write points to an `OFF file <fileformats.html#off-file-format>`_.
 
     A simple wrapper for `numpy.savetxt`.
 
-- 
cgit v1.2.3


From 2d5039b7eeb16116ab859076aa0a93f092250d88 Mon Sep 17 00:00:00 2001
From: Marc Glisse <marc.glisse@inria.fr>
Date: Fri, 11 Nov 2022 18:59:42 +0100
Subject: Special case for writing 3d OFF

---
 src/python/CMakeLists.txt      |  1 +
 src/python/gudhi/off_utils.pyx |  7 ++++++-
 src/python/test/test_off.py    | 21 +++++++++++++++++++++
 3 files changed, 28 insertions(+), 1 deletion(-)
 create mode 100644 src/python/test/test_off.py

(limited to 'src/python/gudhi')

diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt
index 35ddb778..32ec13bd 100644
--- a/src/python/CMakeLists.txt
+++ b/src/python/CMakeLists.txt
@@ -546,6 +546,7 @@ if(PYTHONINTERP_FOUND)
 
     # Reader utils
     add_gudhi_py_test(test_reader_utils)
+    add_gudhi_py_test(test_off)
 
     # Wasserstein
     if(OT_FOUND)
diff --git a/src/python/gudhi/off_utils.pyx b/src/python/gudhi/off_utils.pyx
index a8142791..9276c7b0 100644
--- a/src/python/gudhi/off_utils.pyx
+++ b/src/python/gudhi/off_utils.pyx
@@ -54,4 +54,9 @@ def write_points_to_off_file(fname, points):
     """
     points = np.array(points, copy=False)
     assert len(points.shape) == 2
-    np.savetxt(fname, points, header='nOFF\n{} {} 0 0'.format(points.shape[1], points.shape[0]), comments='')
+    dim = points.shape[1]
+    if dim == 3:
+        head = 'OFF\n{} 0 0'.format(points.shape[0])
+    else:
+        head = 'nOFF\n{} {} 0 0'.format(dim, points.shape[0])
+    np.savetxt(fname, points, header=head, comments='')
diff --git a/src/python/test/test_off.py b/src/python/test/test_off.py
new file mode 100644
index 00000000..69bfa1f9
--- /dev/null
+++ b/src/python/test/test_off.py
@@ -0,0 +1,21 @@
+""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+    See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+    Author(s):       Marc Glisse
+
+    Copyright (C) 2022 Inria
+
+    Modification(s):
+      - YYYY/MM Author: Description of the modification
+"""
+
+import gudhi as gd
+import numpy as np
+import pytest
+
+
+def test_off_rw():
+    for dim in range(2, 6):
+        X = np.random.rand(123, dim)
+        gd.write_points_to_off_file('rand.off', X)
+        Y = gd.read_points_from_off_file('rand.off')
+        assert Y == pytest.approx(X)
-- 
cgit v1.2.3


From 2f0db9e495afe774409f4b0acb823e1b984aeb71 Mon Sep 17 00:00:00 2001
From: Marc Glisse <marc.glisse@inria.fr>
Date: Mon, 14 Nov 2022 16:24:30 +0100
Subject: endpoints for Entropy, idempotent fit(), refactor grid_

---
 src/python/gudhi/representations/vector_methods.py | 65 ++++++++++------------
 src/python/test/test_representations.py            |  8 +--
 2 files changed, 33 insertions(+), 40 deletions(-)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 212fa9f5..f0bc9f95 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -101,7 +101,7 @@ class PersistenceImage(BaseEstimator, TransformerMixin):
         """
         return self.fit_transform([diag])[0,:]
 
-def _automatic_sample_range(sample_range, X, y):
+def _automatic_sample_range(sample_range, X):
         """
         Compute and returns sample range from the persistence diagrams if one of the sample_range values is numpy.nan.
 
@@ -114,7 +114,7 @@ def _automatic_sample_range(sample_range, X, y):
         nan_in_range = np.isnan(sample_range)
         if nan_in_range.any():
             try:
-                pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y)
+                pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X)
                 [mx,my] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]]
                 [Mx,My] = [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]]
                 return np.where(nan_in_range, np.array([mx, My]), sample_range)
@@ -132,6 +132,18 @@ def _trim_endpoints(x, are_endpoints_nan):
     return x
 
 
+def _grid_from_sample_range(self, X):
+    sample_range = np.array(self.sample_range_init)
+    self.nan_in_range = np.isnan(sample_range)
+    self.new_resolution = self.resolution
+    if not self.keep_endpoints:
+        self.new_resolution += self.nan_in_range.sum()
+    self.sample_range = _automatic_sample_range(sample_range, X)
+    self.grid_ = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution)
+    if not self.keep_endpoints:
+        self.grid_ = _trim_endpoints(self.grid_, self.nan_in_range)
+
+
 class Landscape(BaseEstimator, TransformerMixin):
     """
     This is a class for computing persistence landscapes from a list of persistence diagrams. A persistence landscape is a collection of 1D piecewise-linear functions computed from the rank function associated to the persistence diagram. These piecewise-linear functions are then sampled evenly on a given range and the corresponding vectors of samples are concatenated and returned. See http://jmlr.org/papers/v16/bubenik15a.html for more details.
@@ -146,11 +158,7 @@ class Landscape(BaseEstimator, TransformerMixin):
             sample_range ([double, double]): minimum and maximum of all piecewise-linear function domains, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
             keep_endpoints (bool): when guessing `sample_range`, use the exact extremities (where the value is always 0). This is mostly useful for plotting, the default is to use a slightly smaller range.
         """
-        self.num_landscapes, self.resolution, self.sample_range = num_landscapes, resolution, sample_range
-        self.nan_in_range = np.isnan(np.array(self.sample_range))
-        self.new_resolution = self.resolution
-        if not keep_endpoints:
-            self.new_resolution += self.nan_in_range.sum()
+        self.num_landscapes, self.resolution, self.sample_range_init = num_landscapes, resolution, sample_range
         self.keep_endpoints = keep_endpoints
 
     def fit(self, X, y=None):
@@ -161,10 +169,7 @@ class Landscape(BaseEstimator, TransformerMixin):
             X (list of n x 2 numpy arrays): input persistence diagrams.
             y (n x 1 array): persistence diagram labels (unused).
         """
-        self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
-        self.grid_ = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution)
-        if not self.keep_endpoints:
-            self.grid_ = _trim_endpoints(self.grid_, self.nan_in_range)
+        _grid_from_sample_range(self, X)
         return self
 
     def transform(self, X):
@@ -224,11 +229,7 @@ class Silhouette(BaseEstimator, TransformerMixin):
             sample_range ([double, double]): minimum and maximum for the weighted average domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
             keep_endpoints (bool): when guessing `sample_range`, use the exact extremities (where the value is always 0). This is mostly useful for plotting, the default is to use a slightly smaller range.
         """
-        self.weight, self.resolution, self.sample_range = weight, resolution, sample_range
-        self.nan_in_range = np.isnan(np.array(self.sample_range))
-        self.new_resolution = self.resolution
-        if not keep_endpoints:
-            self.new_resolution += self.nan_in_range.sum()
+        self.weight, self.resolution, self.sample_range_init = weight, resolution, sample_range
         self.keep_endpoints = keep_endpoints
 
     def fit(self, X, y=None):
@@ -239,10 +240,7 @@ class Silhouette(BaseEstimator, TransformerMixin):
             X (list of n x 2 numpy arrays): input persistence diagrams.
             y (n x 1 array): persistence diagram labels (unused).
         """
-        self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
-        self.grid_ = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution)
-        if not self.keep_endpoints:
-            self.grid_ = _trim_endpoints(self.grid_, self.nan_in_range)
+        _grid_from_sample_range(self, X)
         return self
 
     def transform(self, X):
@@ -323,7 +321,7 @@ class BettiCurve(BaseEstimator, TransformerMixin):
 
         self.predefined_grid = predefined_grid
         self.resolution = resolution
-        self.sample_range = sample_range
+        self.sample_range_init = sample_range
         self.keep_endpoints = keep_endpoints
 
     def is_fitted(self):
@@ -343,14 +341,7 @@ class BettiCurve(BaseEstimator, TransformerMixin):
                 events = np.unique(np.concatenate([pd.flatten() for pd in X] + [[-np.inf]], axis=0))
                 self.grid_ = np.array(events)
             else:
-                self.nan_in_range = np.isnan(np.array(self.sample_range))
-                self.new_resolution = self.resolution
-                if not self.keep_endpoints:
-                    self.new_resolution += self.nan_in_range.sum()
-                self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
-                self.grid_ = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution)
-                if not self.keep_endpoints:
-                    self.grid_ = _trim_endpoints(self.grid_, self.nan_in_range)
+                _grid_from_sample_range(self, X)
         else:
             self.grid_ = self.predefined_grid # Get the predefined grid from user
 
@@ -450,7 +441,7 @@ class Entropy(BaseEstimator, TransformerMixin):
     """
     This is a class for computing persistence entropy. Persistence entropy is a statistic for persistence diagrams inspired from Shannon entropy. This statistic can also be used to compute a feature vector, called the entropy summary function. See https://arxiv.org/pdf/1803.08304.pdf for more details. Note that a previous implementation was contributed by Manuel Soriano-Trigueros.
     """
-    def __init__(self, mode="scalar", normalized=True, resolution=100, sample_range=[np.nan, np.nan]):
+    def __init__(self, mode="scalar", normalized=True, resolution=100, sample_range=[np.nan, np.nan], *, keep_endpoints=False):
         """
         Constructor for the Entropy class.
 
@@ -459,8 +450,10 @@ class Entropy(BaseEstimator, TransformerMixin):
             normalized (bool): whether to normalize the entropy summary function (default True). Used only if **mode** = "vector". 
             resolution (int): number of sample for the entropy summary function (default 100). Used only if **mode** = "vector".
             sample_range ([double, double]): minimum and maximum of the entropy summary function domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method. Used only if **mode** = "vector".
+            keep_endpoints (bool): when guessing `sample_range`, use the exact extremities. This is mostly useful for plotting, the default is to use a slightly smaller range.
         """
-        self.mode, self.normalized, self.resolution, self.sample_range = mode, normalized, resolution, sample_range
+        self.mode, self.normalized, self.resolution, self.sample_range_init = mode, normalized, resolution, sample_range
+        self.keep_endpoints = keep_endpoints
 
     def fit(self, X, y=None):
         """
@@ -470,7 +463,9 @@ class Entropy(BaseEstimator, TransformerMixin):
             X (list of n x 2 numpy arrays): input persistence diagrams.
             y (n x 1 array): persistence diagram labels (unused).
         """
-        self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
+        if self.mode == "vector":
+            _grid_from_sample_range(self, X)
+            self.step_ = self.grid_[1] - self.grid_[0]
         return self
 
     def transform(self, X):
@@ -484,8 +479,6 @@ class Entropy(BaseEstimator, TransformerMixin):
             numpy array with shape (number of diagrams) x (1 if **mode** = "scalar" else **resolution**): output entropy.
         """
         num_diag, Xfit = len(X), []
-        x_values = np.linspace(self.sample_range[0], self.sample_range[1], self.resolution)
-        step_x = x_values[1] - x_values[0]
         new_X = BirthPersistenceTransform().fit_transform(X)        
 
         for i in range(num_diag):
@@ -500,8 +493,8 @@ class Entropy(BaseEstimator, TransformerMixin):
                 ent = np.zeros(self.resolution)
                 for j in range(num_pts_in_diag):
                     [px,py] = orig_diagram[j,:2]
-                    min_idx = np.clip(np.ceil((px - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
-                    max_idx = np.clip(np.ceil((py - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
+                    min_idx = np.clip(np.ceil((px - self.sample_range[0]) / self.step_).astype(int), 0, self.resolution)
+                    max_idx = np.clip(np.ceil((py - self.sample_range[0]) / self.step_).astype(int), 0, self.resolution)
                     ent[min_idx:max_idx]-=p[j]*np.log(p[j])
                 if self.normalized:
                     ent = ent / np.linalg.norm(ent, ord=1)
diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py
index 9e94feeb..ae0362f8 100755
--- a/src/python/test/test_representations.py
+++ b/src/python/test/test_representations.py
@@ -161,7 +161,7 @@ def test_entropy_miscalculation():
         return -np.dot(l, np.log(l))
     sce = Entropy(mode="scalar")
     assert [[pe(diag_ex)]] == sce.fit_transform([diag_ex])
-    sce = Entropy(mode="vector", resolution=4, normalized=False)
+    sce = Entropy(mode="vector", resolution=4, normalized=False, keep_endpoints=True)
     pef = [-1/4*np.log(1/4)-1/4*np.log(1/4)-1/2*np.log(1/2),
            -1/4*np.log(1/4)-1/4*np.log(1/4)-1/2*np.log(1/2),
            -1/2*np.log(1/2), 
@@ -170,7 +170,7 @@ def test_entropy_miscalculation():
     sce = Entropy(mode="vector", resolution=4, normalized=True)
     pefN = (sce.fit_transform([diag_ex]))[0]
     area = np.linalg.norm(pefN, ord=1)
-    assert area==1
+    assert area==pytest.approx(1)
         
 def test_kernel_empty_diagrams():
     empty_diag = np.empty(shape = [0, 2])
@@ -254,10 +254,10 @@ def test_landscape_nan_range():
 
 def test_endpoints():
     diags = [ np.array([[2., 3.]]) ]
-    for vec in [ Landscape(), Silhouette(), BettiCurve() ]:
+    for vec in [ Landscape(), Silhouette(), BettiCurve(), Entropy(mode="vector") ]:
         vec.fit(diags)
         assert vec.grid_[0] > 2 and vec.grid_[-1] < 3
-    for vec in [ Landscape(keep_endpoints=True), Silhouette(keep_endpoints=True), BettiCurve(keep_endpoints=True) ]:
+    for vec in [ Landscape(keep_endpoints=True), Silhouette(keep_endpoints=True), BettiCurve(keep_endpoints=True), Entropy(mode="vector", keep_endpoints=True)]:
         vec.fit(diags)
         assert vec.grid_[0] == 2 and vec.grid_[-1] == 3
     vec = BettiCurve(resolution=None)
-- 
cgit v1.2.3


From c23bc46f80f3075d1b6c008d903da9c399e812e0 Mon Sep 17 00:00:00 2001
From: Marc Glisse <marc.glisse@inria.fr>
Date: Mon, 14 Nov 2022 17:18:08 +0100
Subject: Document grid_

---
 src/python/gudhi/representations/vector_methods.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index f0bc9f95..4ebd834d 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -157,6 +157,9 @@ class Landscape(BaseEstimator, TransformerMixin):
             resolution (int): number of sample for all piecewise-linear functions (default 100).
             sample_range ([double, double]): minimum and maximum of all piecewise-linear function domains, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
             keep_endpoints (bool): when guessing `sample_range`, use the exact extremities (where the value is always 0). This is mostly useful for plotting, the default is to use a slightly smaller range.
+
+        Attributes:
+            grid_ (1d array): The grid on which the landscapes are computed.
         """
         self.num_landscapes, self.resolution, self.sample_range_init = num_landscapes, resolution, sample_range
         self.keep_endpoints = keep_endpoints
@@ -228,6 +231,9 @@ class Silhouette(BaseEstimator, TransformerMixin):
             resolution (int): number of samples for the weighted average (default 100).
             sample_range ([double, double]): minimum and maximum for the weighted average domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
             keep_endpoints (bool): when guessing `sample_range`, use the exact extremities (where the value is always 0). This is mostly useful for plotting, the default is to use a slightly smaller range.
+
+        Attributes:
+            grid_ (1d array): The grid on which the silhouette is computed.
         """
         self.weight, self.resolution, self.sample_range_init = weight, resolution, sample_range
         self.keep_endpoints = keep_endpoints
@@ -296,7 +302,7 @@ class BettiCurve(BaseEstimator, TransformerMixin):
             keep_endpoints (bool): when guessing `sample_range` (fixed `resolution`, no `predefined_grid`), use the exact extremities. This is mostly useful for plotting, the default is to use a slightly smaller range.
 
         Attributes:
-            grid_ (1d array): The grid on which the Betti numbers are computed. If predefined_grid was specified, `grid_` will always be that grid, independently of data. If not, the grid is fitted to capture all filtration values at which the Betti numbers change.
+            grid_ (1d array): The grid on which the Betti numbers are computed. If predefined_grid was specified, `grid_` will always be that grid, independently of data. If not and resolution is None, the grid is fitted to capture all filtration values at which the Betti numbers change.
 
         Examples
         --------
@@ -451,6 +457,9 @@ class Entropy(BaseEstimator, TransformerMixin):
             resolution (int): number of sample for the entropy summary function (default 100). Used only if **mode** = "vector".
             sample_range ([double, double]): minimum and maximum of the entropy summary function domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method. Used only if **mode** = "vector".
             keep_endpoints (bool): when guessing `sample_range`, use the exact extremities. This is mostly useful for plotting, the default is to use a slightly smaller range.
+
+        Attributes:
+            grid_ (1d array): In vector mode, the grid on which the entropy summary function is computed.
         """
         self.mode, self.normalized, self.resolution, self.sample_range_init = mode, normalized, resolution, sample_range
         self.keep_endpoints = keep_endpoints
-- 
cgit v1.2.3


From a74fe47220d1a95dd6bca6b9561df02c6a49b9d2 Mon Sep 17 00:00:00 2001
From: Marc Glisse <marc.glisse@inria.fr>
Date: Mon, 14 Nov 2022 17:22:45 +0100
Subject: Avoid the word "guess"

---
 src/python/gudhi/representations/vector_methods.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 4ebd834d..346d892d 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -156,7 +156,7 @@ class Landscape(BaseEstimator, TransformerMixin):
             num_landscapes (int): number of piecewise-linear functions to output (default 5).
             resolution (int): number of sample for all piecewise-linear functions (default 100).
             sample_range ([double, double]): minimum and maximum of all piecewise-linear function domains, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
-            keep_endpoints (bool): when guessing `sample_range`, use the exact extremities (where the value is always 0). This is mostly useful for plotting, the default is to use a slightly smaller range.
+            keep_endpoints (bool): when computing `sample_range`, use the exact extremities (where the value is always 0). This is mostly useful for plotting, the default is to use a slightly smaller range.
 
         Attributes:
             grid_ (1d array): The grid on which the landscapes are computed.
@@ -230,7 +230,7 @@ class Silhouette(BaseEstimator, TransformerMixin):
             weight (function): weight function for the persistence diagram points (default constant function, ie lambda x: 1). This function must be defined on 2D points, ie on lists or numpy arrays of the form [p_x,p_y].
             resolution (int): number of samples for the weighted average (default 100).
             sample_range ([double, double]): minimum and maximum for the weighted average domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
-            keep_endpoints (bool): when guessing `sample_range`, use the exact extremities (where the value is always 0). This is mostly useful for plotting, the default is to use a slightly smaller range.
+            keep_endpoints (bool): when computing `sample_range`, use the exact extremities (where the value is always 0). This is mostly useful for plotting, the default is to use a slightly smaller range.
 
         Attributes:
             grid_ (1d array): The grid on which the silhouette is computed.
@@ -299,7 +299,7 @@ class BettiCurve(BaseEstimator, TransformerMixin):
             resolution (int): number of samples for the piecewise-constant function (default 100), or None for the exact curve.
             sample_range ([double, double]): minimum and maximum of the piecewise-constant function domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
             predefined_grid (1d array or None, default=None): Predefined filtration grid points at which to compute the Betti curves. Must be strictly ordered. Infinities are ok. If None (default), and resolution is given, the grid will be uniform from x_min to x_max in 'resolution' steps, otherwise a grid will be computed that captures all changes in Betti numbers in the provided data.
-            keep_endpoints (bool): when guessing `sample_range` (fixed `resolution`, no `predefined_grid`), use the exact extremities. This is mostly useful for plotting, the default is to use a slightly smaller range.
+            keep_endpoints (bool): when computing `sample_range` (fixed `resolution`, no `predefined_grid`), use the exact extremities. This is mostly useful for plotting, the default is to use a slightly smaller range.
 
         Attributes:
             grid_ (1d array): The grid on which the Betti numbers are computed. If predefined_grid was specified, `grid_` will always be that grid, independently of data. If not and resolution is None, the grid is fitted to capture all filtration values at which the Betti numbers change.
@@ -456,7 +456,7 @@ class Entropy(BaseEstimator, TransformerMixin):
             normalized (bool): whether to normalize the entropy summary function (default True). Used only if **mode** = "vector". 
             resolution (int): number of sample for the entropy summary function (default 100). Used only if **mode** = "vector".
             sample_range ([double, double]): minimum and maximum of the entropy summary function domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method. Used only if **mode** = "vector".
-            keep_endpoints (bool): when guessing `sample_range`, use the exact extremities. This is mostly useful for plotting, the default is to use a slightly smaller range.
+            keep_endpoints (bool): when computing `sample_range`, use the exact extremities. This is mostly useful for plotting, the default is to use a slightly smaller range.
 
         Attributes:
             grid_ (1d array): In vector mode, the grid on which the entropy summary function is computed.
-- 
cgit v1.2.3


From 0fba7fe05a72ce7b96633f6500e5313f32c4bc20 Mon Sep 17 00:00:00 2001
From: Marc Glisse <marc.glisse@inria.fr>
Date: Mon, 14 Nov 2022 19:47:40 +0100
Subject: Fix doc of attributes

---
 src/python/gudhi/representations/vector_methods.py | 60 +++++++++++-----------
 1 file changed, 31 insertions(+), 29 deletions(-)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 346d892d..9e6db960 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -147,6 +147,9 @@ def _grid_from_sample_range(self, X):
 class Landscape(BaseEstimator, TransformerMixin):
     """
     This is a class for computing persistence landscapes from a list of persistence diagrams. A persistence landscape is a collection of 1D piecewise-linear functions computed from the rank function associated to the persistence diagram. These piecewise-linear functions are then sampled evenly on a given range and the corresponding vectors of samples are concatenated and returned. See http://jmlr.org/papers/v16/bubenik15a.html for more details.
+
+    Attributes:
+        grid_ (1d array): The grid on which the landscapes are computed.
     """
     def __init__(self, num_landscapes=5, resolution=100, sample_range=[np.nan, np.nan], *, keep_endpoints=False):
         """
@@ -157,9 +160,6 @@ class Landscape(BaseEstimator, TransformerMixin):
             resolution (int): number of sample for all piecewise-linear functions (default 100).
             sample_range ([double, double]): minimum and maximum of all piecewise-linear function domains, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
             keep_endpoints (bool): when computing `sample_range`, use the exact extremities (where the value is always 0). This is mostly useful for plotting, the default is to use a slightly smaller range.
-
-        Attributes:
-            grid_ (1d array): The grid on which the landscapes are computed.
         """
         self.num_landscapes, self.resolution, self.sample_range_init = num_landscapes, resolution, sample_range
         self.keep_endpoints = keep_endpoints
@@ -221,6 +221,9 @@ class Landscape(BaseEstimator, TransformerMixin):
 class Silhouette(BaseEstimator, TransformerMixin):
     """
     This is a class for computing persistence silhouettes from a list of persistence diagrams. A persistence silhouette is computed by taking a weighted average of the collection of 1D piecewise-linear functions given by the persistence landscapes, and then by evenly sampling this average on a given range. Finally, the corresponding vector of samples is returned. See https://arxiv.org/abs/1312.0308 for more details.
+
+    Attributes:
+        grid_ (1d array): The grid on which the silhouette is computed.
     """
     def __init__(self, weight=lambda x: 1, resolution=100, sample_range=[np.nan, np.nan], *, keep_endpoints=False):
         """
@@ -231,9 +234,6 @@ class Silhouette(BaseEstimator, TransformerMixin):
             resolution (int): number of samples for the weighted average (default 100).
             sample_range ([double, double]): minimum and maximum for the weighted average domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
             keep_endpoints (bool): when computing `sample_range`, use the exact extremities (where the value is always 0). This is mostly useful for plotting, the default is to use a slightly smaller range.
-
-        Attributes:
-            grid_ (1d array): The grid on which the silhouette is computed.
         """
         self.weight, self.resolution, self.sample_range_init = weight, resolution, sample_range
         self.keep_endpoints = keep_endpoints
@@ -289,6 +289,28 @@ class Silhouette(BaseEstimator, TransformerMixin):
 class BettiCurve(BaseEstimator, TransformerMixin):
     """
     Compute Betti curves from persistence diagrams. There are several modes of operation: with a given resolution (with or without a sample_range), with a predefined grid, and with none of the previous. With a predefined grid, the class computes the Betti numbers at those grid points. Without a predefined grid, if the resolution is set to None, it can be fit to a list of persistence diagrams and produce a grid that consists of (at least) the filtration values at which at least one of those persistence diagrams changes Betti numbers, and then compute the Betti numbers at those grid points. In the latter mode, the exact Betti curve is computed for the entire real line. Otherwise, if the resolution is given, the Betti curve is obtained by sampling evenly using either the given sample_range or based on the persistence diagrams.
+
+    Examples
+    --------
+    If pd is a persistence diagram and xs is a nonempty grid of finite values such that xs[0] >= pd.min(), then the results of:
+
+    >>> bc = BettiCurve(predefined_grid=xs) # doctest: +SKIP
+    >>> result = bc(pd) # doctest: +SKIP
+
+    and
+
+    >>> from scipy.interpolate import interp1d # doctest: +SKIP
+    >>> bc = BettiCurve(resolution=None, predefined_grid=None) # doctest: +SKIP
+    >>> bettis = bc.fit_transform([pd]) # doctest: +SKIP
+    >>> interp = interp1d(bc.grid_, bettis[0, :], kind="previous", fill_value="extrapolate") # doctest: +SKIP
+    >>> result = np.array(interp(xs), dtype=int) # doctest: +SKIP
+
+    are the same.
+
+    Attributes
+    ----------
+    grid_ : 1d array
+        The grid on which the Betti numbers are computed. If predefined_grid was specified, `grid_` will always be that grid, independently of data. If not and resolution is None, the grid is fitted to capture all filtration values at which the Betti numbers change.
     """
 
     def __init__(self, resolution=100, sample_range=[np.nan, np.nan], predefined_grid=None, *, keep_endpoints=False):
@@ -300,26 +322,6 @@ class BettiCurve(BaseEstimator, TransformerMixin):
             sample_range ([double, double]): minimum and maximum of the piecewise-constant function domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
             predefined_grid (1d array or None, default=None): Predefined filtration grid points at which to compute the Betti curves. Must be strictly ordered. Infinities are ok. If None (default), and resolution is given, the grid will be uniform from x_min to x_max in 'resolution' steps, otherwise a grid will be computed that captures all changes in Betti numbers in the provided data.
             keep_endpoints (bool): when computing `sample_range` (fixed `resolution`, no `predefined_grid`), use the exact extremities. This is mostly useful for plotting, the default is to use a slightly smaller range.
-
-        Attributes:
-            grid_ (1d array): The grid on which the Betti numbers are computed. If predefined_grid was specified, `grid_` will always be that grid, independently of data. If not and resolution is None, the grid is fitted to capture all filtration values at which the Betti numbers change.
-
-        Examples
-        --------
-        If pd is a persistence diagram and xs is a nonempty grid of finite values such that xs[0] >= pd.min(), then the results of:
-
-        >>> bc = BettiCurve(predefined_grid=xs) # doctest: +SKIP
-        >>> result = bc(pd) # doctest: +SKIP
-
-        and
-
-        >>> from scipy.interpolate import interp1d # doctest: +SKIP
-        >>> bc = BettiCurve(resolution=None, predefined_grid=None) # doctest: +SKIP
-        >>> bettis = bc.fit_transform([pd]) # doctest: +SKIP
-        >>> interp = interp1d(bc.grid_, bettis[0, :], kind="previous", fill_value="extrapolate") # doctest: +SKIP
-        >>> result = np.array(interp(xs), dtype=int) # doctest: +SKIP
-
-        are the same.
         """
 
         if (predefined_grid is not None) and (not isinstance(predefined_grid, np.ndarray)):
@@ -446,6 +448,9 @@ class BettiCurve(BaseEstimator, TransformerMixin):
 class Entropy(BaseEstimator, TransformerMixin):
     """
     This is a class for computing persistence entropy. Persistence entropy is a statistic for persistence diagrams inspired from Shannon entropy. This statistic can also be used to compute a feature vector, called the entropy summary function. See https://arxiv.org/pdf/1803.08304.pdf for more details. Note that a previous implementation was contributed by Manuel Soriano-Trigueros.
+
+    Attributes:
+        grid_ (1d array): In vector mode, the grid on which the entropy summary function is computed.
     """
     def __init__(self, mode="scalar", normalized=True, resolution=100, sample_range=[np.nan, np.nan], *, keep_endpoints=False):
         """
@@ -457,9 +462,6 @@ class Entropy(BaseEstimator, TransformerMixin):
             resolution (int): number of sample for the entropy summary function (default 100). Used only if **mode** = "vector".
             sample_range ([double, double]): minimum and maximum of the entropy summary function domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method. Used only if **mode** = "vector".
             keep_endpoints (bool): when computing `sample_range`, use the exact extremities. This is mostly useful for plotting, the default is to use a slightly smaller range.
-
-        Attributes:
-            grid_ (1d array): In vector mode, the grid on which the entropy summary function is computed.
         """
         self.mode, self.normalized, self.resolution, self.sample_range_init = mode, normalized, resolution, sample_range
         self.keep_endpoints = keep_endpoints
-- 
cgit v1.2.3


From 940c5bc058970edbe908e0c7f4c655fbca94a87b Mon Sep 17 00:00:00 2001
From: Marc Glisse <marc.glisse@inria.fr>
Date: Sun, 13 Nov 2022 20:40:17 +0100
Subject: Update to Hera 2

---
 ext/hera                                          | 2 +-
 src/cmake/modules/GUDHI_submodules.cmake          | 6 ++----
 src/cmake/modules/GUDHI_user_version_target.cmake | 5 ++---
 src/python/gudhi/hera/bottleneck.cc               | 2 +-
 src/python/gudhi/hera/wasserstein.cc              | 2 +-
 src/python/setup.py.in                            | 6 ++----
 6 files changed, 9 insertions(+), 14 deletions(-)

(limited to 'src/python/gudhi')

diff --git a/ext/hera b/ext/hera
index b528c406..8bfdd4bd 160000
--- a/ext/hera
+++ b/ext/hera
@@ -1 +1 @@
-Subproject commit b528c4067a8aac346eb307d3c23b82d5953cfe2d
+Subproject commit 8bfdd4bd32f005c18b5c75c502b987de552d6e48
diff --git a/src/cmake/modules/GUDHI_submodules.cmake b/src/cmake/modules/GUDHI_submodules.cmake
index 78b045bd..c844386d 100644
--- a/src/cmake/modules/GUDHI_submodules.cmake
+++ b/src/cmake/modules/GUDHI_submodules.cmake
@@ -1,5 +1,3 @@
 # For those who dislike bundled dependencies, this indicates where to find a preinstalled Hera.
-set(HERA_WASSERSTEIN_INTERNAL_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/ext/hera/wasserstein/include)
-set(HERA_WASSERSTEIN_INCLUDE_DIR ${HERA_WASSERSTEIN_INTERNAL_INCLUDE_DIR} CACHE PATH "Directory where one can find Hera's wasserstein.h")
-set(HERA_BOTTLENECK_INTERNAL_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/ext/hera/bottleneck/include)
-set(HERA_BOTTLENECK_INCLUDE_DIR ${HERA_BOTTLENECK_INTERNAL_INCLUDE_DIR} CACHE PATH "Directory where one can find Hera's bottleneck.h")
\ No newline at end of file
+set(HERA_INTERNAL_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/ext/hera/include)
+set(HERA_INCLUDE_DIR ${HERA_INTERNAL_INCLUDE_DIR} CACHE PATH "Directory where one can find hera/{wasserstein.h,bottleneck.h}")
diff --git a/src/cmake/modules/GUDHI_user_version_target.cmake b/src/cmake/modules/GUDHI_user_version_target.cmake
index 4487ad86..2144ff6f 100644
--- a/src/cmake/modules/GUDHI_user_version_target.cmake
+++ b/src/cmake/modules/GUDHI_user_version_target.cmake
@@ -60,10 +60,9 @@ add_custom_command(TARGET user_version PRE_BUILD COMMAND ${CMAKE_COMMAND} -E
 add_custom_command(TARGET user_version PRE_BUILD COMMAND ${CMAKE_COMMAND} -E
                    copy_directory ${CMAKE_SOURCE_DIR}/src/GudhUI ${GUDHI_USER_VERSION_DIR}/GudhUI)
 
-if(HERA_WASSERSTEIN_INCLUDE_DIR STREQUAL HERA_WASSERSTEIN_INTERNAL_INCLUDE_DIR OR
-    HERA_BOTTLENECK_INCLUDE_DIR STREQUAL HERA_BOTTLENECK_INTERNAL_INCLUDE_DIR)
+if(HERA_INCLUDE_DIR STREQUAL HERA_INTERNAL_INCLUDE_DIR)
   add_custom_command(TARGET user_version PRE_BUILD COMMAND ${CMAKE_COMMAND} -E
-                     copy_directory ${CMAKE_SOURCE_DIR}/ext/hera ${GUDHI_USER_VERSION_DIR}/ext/hera)
+                     copy_directory ${CMAKE_SOURCE_DIR}/ext/hera/include ${GUDHI_USER_VERSION_DIR}/ext/hera/include)
 endif()
 
 set(GUDHI_DIRECTORIES "doc;example;concept;utilities")
diff --git a/src/python/gudhi/hera/bottleneck.cc b/src/python/gudhi/hera/bottleneck.cc
index 0cb562ce..ec461f7c 100644
--- a/src/python/gudhi/hera/bottleneck.cc
+++ b/src/python/gudhi/hera/bottleneck.cc
@@ -16,7 +16,7 @@
 using py::ssize_t;
 #endif
 
-#include <bottleneck.h> // Hera
+#include <hera/bottleneck.h> // Hera
 
 double bottleneck_distance(Dgm d1, Dgm d2, double delta)
 {
diff --git a/src/python/gudhi/hera/wasserstein.cc b/src/python/gudhi/hera/wasserstein.cc
index fa0cf8aa..b1fce1e7 100644
--- a/src/python/gudhi/hera/wasserstein.cc
+++ b/src/python/gudhi/hera/wasserstein.cc
@@ -8,7 +8,7 @@
  *      - YYYY/MM Author: Description of the modification
  */
 
-#include <wasserstein.h> // Hera
+#include <hera/wasserstein.h> // Hera
 
 #include <pybind11_diagram_utils.h>
 
diff --git a/src/python/setup.py.in b/src/python/setup.py.in
index 2c67c2c5..1ecbe985 100644
--- a/src/python/setup.py.in
+++ b/src/python/setup.py.in
@@ -48,10 +48,8 @@ ext_modules = cythonize(ext_modules, compiler_directives={'language_level': '3'}
 
 for module in pybind11_modules:
     my_include_dirs = include_dirs + [pybind11.get_include(False), pybind11.get_include(True)]
-    if module == 'hera/wasserstein':
-        my_include_dirs = ['@HERA_WASSERSTEIN_INCLUDE_DIR@'] + my_include_dirs
-    elif module == 'hera/bottleneck':
-        my_include_dirs = ['@HERA_BOTTLENECK_INCLUDE_DIR@'] + my_include_dirs
+    if module.startswith('hera/'):
+        my_include_dirs = ['@HERA_INCLUDE_DIR@'] + my_include_dirs
     ext_modules.append(Extension(
         'gudhi.' + module.replace('/', '.'),
         sources = [source_dir + module + '.cc'],
-- 
cgit v1.2.3


From 04370bae13251d0bcce205f253fb758f91fdf207 Mon Sep 17 00:00:00 2001
From: Marc Glisse <marc.glisse@inria.fr>
Date: Tue, 15 Nov 2022 19:11:20 +0100
Subject: Also provide ssize_t for wasserstein

I think I'll merge the 2 files later.
---
 src/python/gudhi/hera/wasserstein.cc | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/hera/wasserstein.cc b/src/python/gudhi/hera/wasserstein.cc
index b1fce1e7..3516352e 100644
--- a/src/python/gudhi/hera/wasserstein.cc
+++ b/src/python/gudhi/hera/wasserstein.cc
@@ -8,10 +8,16 @@
  *      - YYYY/MM Author: Description of the modification
  */
 
-#include <hera/wasserstein.h> // Hera
-
 #include <pybind11_diagram_utils.h>
 
+#ifdef _MSC_VER
+// https://github.com/grey-narn/hera/issues/3
+// ssize_t is a non-standard type (well, posix)
+using py::ssize_t;
+#endif
+
+#include <hera/wasserstein.h> // Hera
+
 double wasserstein_distance(
     Dgm d1, Dgm d2,
     double wasserstein_power, double internal_p,
-- 
cgit v1.2.3


From 1f532933658bf6788b1ead8ae1d902416872e70a Mon Sep 17 00:00:00 2001
From: Marc Glisse <marc.glisse@inria.fr>
Date: Wed, 16 Nov 2022 09:55:36 +0100
Subject: Clarify doc.

---
 src/python/gudhi/simplex_tree.pyx | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx
index 372cb15c..24b970c4 100644
--- a/src/python/gudhi/simplex_tree.pyx
+++ b/src/python/gudhi/simplex_tree.pyx
@@ -290,6 +290,8 @@ cdef class SimplexTree:
     def insert_batch(self, some_int[:,:] vertex_array, some_float[:] filtrations):
         """Inserts k-simplices given by a sparse array in a format similar
         to `torch.sparse <https://pytorch.org/docs/stable/sparse.html>`_.
+        The n-th simplex has vertices `vertex_array[0,n]`, ...,
+        `vertex_array[k,n]` and filtration value `filtrations[n]`.
         If a simplex is repeated, the smallest filtration value is used.
         Simplices with a repeated vertex are currently interpreted as lower
         dimensional simplices, but we do not guarantee this behavior in the
-- 
cgit v1.2.3


From c80a322a397f88e2b32d4ef286c462a96c20206a Mon Sep 17 00:00:00 2001
From: Marc Glisse <marc.glisse@inria.fr>
Date: Thu, 17 Nov 2022 00:30:02 +0100
Subject: Rename argument of expansion

---
 src/python/gudhi/simplex_tree.pyx | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx
index 05bfe22e..ec18b708 100644
--- a/src/python/gudhi/simplex_tree.pyx
+++ b/src/python/gudhi/simplex_tree.pyx
@@ -376,7 +376,7 @@ cdef class SimplexTree:
         """
         return self.get_ptr().prune_above_filtration(filtration)
 
-    def expansion(self, max_dim):
+    def expansion(self, max_dimension):
         """Expands the simplex tree containing only its one skeleton
         until dimension max_dim.
 
@@ -390,10 +390,10 @@ cdef class SimplexTree:
         The simplex tree must contain no simplex of dimension bigger than
         1 when calling the method.
 
-        :param max_dim: The maximal dimension.
-        :type max_dim: int
+        :param max_dimension: The maximal dimension.
+        :type max_dimension: int
         """
-        cdef int maxdim = max_dim
+        cdef int maxdim = max_dimension
         with nogil:
             self.get_ptr().expansion(maxdim)
 
-- 
cgit v1.2.3


From 78533374f5255a601e4e12edf28ca8b51b93f016 Mon Sep 17 00:00:00 2001
From: Marc Glisse <marc.glisse@inria.fr>
Date: Fri, 18 Nov 2022 15:55:31 +0100
Subject: sklearn's DistanceMetric has moved

---
 src/python/gudhi/representations/vector_methods.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index a169aee8..d52185ef 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -13,8 +13,13 @@ import numpy as np
 from sklearn.base          import BaseEstimator, TransformerMixin
 from sklearn.exceptions    import NotFittedError
 from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler
-from sklearn.neighbors     import DistanceMetric
 from sklearn.metrics       import pairwise
+try:
+    # New location since 1.0
+    from sklearn.metrics     import DistanceMetric
+except ImportError:
+    # Will be removed in 1.3
+    from sklearn.neighbors     import DistanceMetric
 
 from .preprocessing import DiagramScaler, BirthPersistenceTransform
 
-- 
cgit v1.2.3


From b61611728970de6a9f19fe29de11b9f7087063d8 Mon Sep 17 00:00:00 2001
From: Marc Glisse <marc.glisse@inria.fr>
Date: Mon, 21 Nov 2022 19:23:45 +0100
Subject: Insert vertices first

---
 src/python/gudhi/simplex_tree.pxd |  3 ++-
 src/python/gudhi/simplex_tree.pyx | 11 ++++++++---
 2 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/simplex_tree.pxd b/src/python/gudhi/simplex_tree.pxd
index f86f1232..5309c6fa 100644
--- a/src/python/gudhi/simplex_tree.pxd
+++ b/src/python/gudhi/simplex_tree.pxd
@@ -56,7 +56,8 @@ cdef extern from "Simplex_tree_interface.h" namespace "Gudhi":
         int upper_bound_dimension() nogil
         bool find_simplex(vector[int] simplex) nogil
         bool insert(vector[int] simplex, double filtration) nogil
-        void insert_matrix(double* filtrations, int n, int stride0, int stride1, double max_filtration) nogil
+        void insert_matrix(double* filtrations, int n, int stride0, int stride1, double max_filtration) nogil except +
+        void insert_batch_vertices(vector[int] v, double f) nogil except +
         vector[pair[vector[int], double]] get_star(vector[int] simplex) nogil
         vector[pair[vector[int], double]] get_cofaces(vector[int] simplex, int dimension) nogil
         void expansion(int max_dim) nogil except +
diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx
index 18215d2f..4cf176f5 100644
--- a/src/python/gudhi/simplex_tree.pyx
+++ b/src/python/gudhi/simplex_tree.pyx
@@ -12,6 +12,7 @@ from libc.stdint cimport intptr_t, int32_t, int64_t
 import numpy as np
 cimport gudhi.simplex_tree
 cimport cython
+from numpy.math cimport INFINITY
 
 __author__ = "Vincent Rouvreau"
 __copyright__ = "Copyright (C) 2016 Inria"
@@ -239,7 +240,7 @@ cdef class SimplexTree:
 
     @staticmethod
     @cython.boundscheck(False)
-    def create_from_array(filtrations, double max_filtration=np.inf):
+    def create_from_array(filtrations, double max_filtration=INFINITY):
         """Creates a new, empty complex and inserts vertices and edges. The vertices are numbered from 0 to n-1, and
         the filtration values are encoded in the array, with the diagonal representing the vertices. It is the
         caller's responsibility to ensure that this defines a filtration, which can be achieved with either::
@@ -281,6 +282,8 @@ cdef class SimplexTree:
 
         .. seealso:: :func:`insert_batch`
         """
+        # Without this, it could be slow if we end up inserting vertices in a bad order (flat_map).
+        self.get_ptr().insert_batch_vertices(np.unique(np.stack((edges.row, edges.col))), INFINITY)
         # TODO: optimize this?
         for edge in zip(edges.row, edges.col, edges.data):
             self.get_ptr().insert((edge[0], edge[1]), edge[2])
@@ -303,8 +306,7 @@ cdef class SimplexTree:
         :param filtrations: the filtration values.
         :type filtrations: numpy.array of shape (n,)
         """
-        # This may be slow if we end up inserting vertices in a bad order (flat_map).
-        # We could first insert the vertices from np.unique(vertex_array), or leave it to the caller.
+        cdef vector[int] vertices = np.unique(vertex_array)
         cdef Py_ssize_t k = vertex_array.shape[0]
         cdef Py_ssize_t n = vertex_array.shape[1]
         assert filtrations.shape[0] == n, 'inconsistent sizes for vertex_array and filtrations'
@@ -312,6 +314,9 @@ cdef class SimplexTree:
         cdef Py_ssize_t j
         cdef vector[int] v
         with nogil:
+            # Without this, it could be slow if we end up inserting vertices in a bad order (flat_map).
+            # NaN currently does the wrong thing
+            self.get_ptr().insert_batch_vertices(vertices, INFINITY)
             for i in range(n):
                 for j in range(k):
                     v.push_back(vertex_array[j, i])
-- 
cgit v1.2.3


From 8319f4d1042cffaf1a64b1ca5fc4344a2e00320f Mon Sep 17 00:00:00 2001
From: Marc Glisse <marc.glisse@inria.fr>
Date: Fri, 18 Nov 2022 20:28:40 +0100
Subject: Fix Atol doctest

the skip marks were on the wrong lines, which resulted in

WARNING: ignoring invalid doctest code

Reminder: testoutput was disabled because it was too random (the lines
are swapped if I run it right now).
---
 src/python/gudhi/representations/vector_methods.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index e1402aea..a6d86821 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -711,16 +711,16 @@ class Atol(BaseEstimator, TransformerMixin):
     >>> c = np.array([[3, 2, -1], [1, 2, -1]])
     >>> atol_vectoriser = Atol(quantiser=KMeans(n_clusters=2, random_state=202006))
     >>> atol_vectoriser.fit(X=[a, b, c]).centers # doctest: +SKIP
-    >>> # array([[ 2.        ,  0.66666667,  3.33333333],
-    >>> #        [ 2.6       ,  2.8       , -0.4       ]])
-    >>> atol_vectoriser(a)
-    >>> # array([1.18168665, 0.42375966]) # doctest: +SKIP
-    >>> atol_vectoriser(c)
-    >>> # array([0.02062512, 1.25157463]) # doctest: +SKIP
+    array([[ 2.        ,  0.66666667,  3.33333333],
+           [ 2.6       ,  2.8       , -0.4       ]])
+    >>> atol_vectoriser(a) # doctest: +SKIP
+    array([1.18168665, 0.42375966])
+    >>> atol_vectoriser(c) # doctest: +SKIP
+    array([0.02062512, 1.25157463])
     >>> atol_vectoriser.transform(X=[a, b, c]) # doctest: +SKIP
-    >>> # array([[1.18168665, 0.42375966],
-    >>> #        [0.29861028, 1.06330156],
-    >>> #        [0.02062512, 1.25157463]])
+    array([[1.18168665, 0.42375966],
+           [0.29861028, 1.06330156],
+           [0.02062512, 1.25157463]])
     """
     # Note the example above must be up to date with the one in tests called test_atol_doc
     def __init__(self, quantiser, weighting_method="cloud", contrast="gaussian"):
-- 
cgit v1.2.3


From 01fed963e4e51f0d2fdd342de5317822e7a55ecb Mon Sep 17 00:00:00 2001
From: Marc Glisse <marc.glisse@inria.fr>
Date: Sat, 17 Dec 2022 13:27:07 +0100
Subject: Make the order of atol centers a bit more reproducible.

---
 src/python/gudhi/representations/vector_methods.py | 24 ++++++++++++----------
 1 file changed, 13 insertions(+), 11 deletions(-)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index a6d86821..745fe1e5 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -710,17 +710,17 @@ class Atol(BaseEstimator, TransformerMixin):
     >>> b = np.array([[4, 2, 0], [4, 4, 0], [4, 0, 2]])
     >>> c = np.array([[3, 2, -1], [1, 2, -1]])
     >>> atol_vectoriser = Atol(quantiser=KMeans(n_clusters=2, random_state=202006))
-    >>> atol_vectoriser.fit(X=[a, b, c]).centers # doctest: +SKIP
-    array([[ 2.        ,  0.66666667,  3.33333333],
-           [ 2.6       ,  2.8       , -0.4       ]])
-    >>> atol_vectoriser(a) # doctest: +SKIP
-    array([1.18168665, 0.42375966])
-    >>> atol_vectoriser(c) # doctest: +SKIP
-    array([0.02062512, 1.25157463])
-    >>> atol_vectoriser.transform(X=[a, b, c]) # doctest: +SKIP
-    array([[1.18168665, 0.42375966],
-           [0.29861028, 1.06330156],
-           [0.02062512, 1.25157463]])
+    >>> atol_vectoriser.fit(X=[a, b, c]).centers
+    array([[ 2.6       ,  2.8       , -0.4       ],
+           [ 2.        ,  0.66666667,  3.33333333]])
+    >>> atol_vectoriser(a)
+    array([0.42375966, 1.18168665])
+    >>> atol_vectoriser(c)
+    array([1.25157463, 0.02062512])
+    >>> atol_vectoriser.transform(X=[a, b, c])
+    array([[0.42375966, 1.18168665],
+           [1.06330156, 0.29861028],
+           [1.25157463, 0.02062512]])
     """
     # Note the example above must be up to date with the one in tests called test_atol_doc
     def __init__(self, quantiser, weighting_method="cloud", contrast="gaussian"):
@@ -771,6 +771,8 @@ class Atol(BaseEstimator, TransformerMixin):
         measures_concat = np.concatenate(X)
         self.quantiser.fit(X=measures_concat, sample_weight=sample_weight)
         self.centers = self.quantiser.cluster_centers_
+        # Hack, but some people are unhappy if the order depends on the version of sklearn
+        self.centers = self.centers[np.lexsort(self.centers.T)]
         if self.quantiser.n_clusters == 1:
             dist_centers = pairwise.pairwise_distances(measures_concat)
             np.fill_diagonal(dist_centers, 0)
-- 
cgit v1.2.3


From 86689f89bf896e41683fd7b1a4568f2b34ea505d Mon Sep 17 00:00:00 2001
From: Marc Glisse <marc.glisse@inria.fr>
Date: Tue, 3 Jan 2023 21:23:49 +0100
Subject: fix get_params

---
 src/python/gudhi/representations/vector_methods.py | 18 +++++++++---------
 src/python/test/test_representations.py            |  6 +++++-
 2 files changed, 14 insertions(+), 10 deletions(-)

(limited to 'src/python/gudhi')

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 745fe1e5..ce74aee5 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -138,13 +138,13 @@ def _trim_endpoints(x, are_endpoints_nan):
 
 
 def _grid_from_sample_range(self, X):
-    sample_range = np.array(self.sample_range_init)
+    sample_range = np.array(self.sample_range)
     self.nan_in_range = np.isnan(sample_range)
     self.new_resolution = self.resolution
     if not self.keep_endpoints:
         self.new_resolution += self.nan_in_range.sum()
-    self.sample_range = _automatic_sample_range(sample_range, X)
-    self.grid_ = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution)
+    self.sample_range_fixed = _automatic_sample_range(sample_range, X)
+    self.grid_ = np.linspace(self.sample_range_fixed[0], self.sample_range_fixed[1], self.new_resolution)
     if not self.keep_endpoints:
         self.grid_ = _trim_endpoints(self.grid_, self.nan_in_range)
 
@@ -166,7 +166,7 @@ class Landscape(BaseEstimator, TransformerMixin):
             sample_range ([double, double]): minimum and maximum of all piecewise-linear function domains, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
             keep_endpoints (bool): when computing `sample_range`, use the exact extremities (where the value is always 0). This is mostly useful for plotting, the default is to use a slightly smaller range.
         """
-        self.num_landscapes, self.resolution, self.sample_range_init = num_landscapes, resolution, sample_range
+        self.num_landscapes, self.resolution, self.sample_range = num_landscapes, resolution, sample_range
         self.keep_endpoints = keep_endpoints
 
     def fit(self, X, y=None):
@@ -240,7 +240,7 @@ class Silhouette(BaseEstimator, TransformerMixin):
             sample_range ([double, double]): minimum and maximum for the weighted average domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
             keep_endpoints (bool): when computing `sample_range`, use the exact extremities (where the value is always 0). This is mostly useful for plotting, the default is to use a slightly smaller range.
         """
-        self.weight, self.resolution, self.sample_range_init = weight, resolution, sample_range
+        self.weight, self.resolution, self.sample_range = weight, resolution, sample_range
         self.keep_endpoints = keep_endpoints
 
     def fit(self, X, y=None):
@@ -334,7 +334,7 @@ class BettiCurve(BaseEstimator, TransformerMixin):
 
         self.predefined_grid = predefined_grid
         self.resolution = resolution
-        self.sample_range_init = sample_range
+        self.sample_range = sample_range
         self.keep_endpoints = keep_endpoints
 
     def is_fitted(self):
@@ -468,7 +468,7 @@ class Entropy(BaseEstimator, TransformerMixin):
             sample_range ([double, double]): minimum and maximum of the entropy summary function domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method. Used only if **mode** = "vector".
             keep_endpoints (bool): when computing `sample_range`, use the exact extremities. This is mostly useful for plotting, the default is to use a slightly smaller range.
         """
-        self.mode, self.normalized, self.resolution, self.sample_range_init = mode, normalized, resolution, sample_range
+        self.mode, self.normalized, self.resolution, self.sample_range = mode, normalized, resolution, sample_range
         self.keep_endpoints = keep_endpoints
 
     def fit(self, X, y=None):
@@ -509,8 +509,8 @@ class Entropy(BaseEstimator, TransformerMixin):
                 ent = np.zeros(self.resolution)
                 for j in range(num_pts_in_diag):
                     [px,py] = orig_diagram[j,:2]
-                    min_idx = np.clip(np.ceil((px - self.sample_range[0]) / self.step_).astype(int), 0, self.resolution)
-                    max_idx = np.clip(np.ceil((py - self.sample_range[0]) / self.step_).astype(int), 0, self.resolution)
+                    min_idx = np.clip(np.ceil((px - self.sample_range_fixed[0]) / self.step_).astype(int), 0, self.resolution)
+                    max_idx = np.clip(np.ceil((py - self.sample_range_fixed[0]) / self.step_).astype(int), 0, self.resolution)
                     ent[min_idx:max_idx]-=p[j]*np.log(p[j])
                 if self.normalized:
                     ent = ent / np.linalg.norm(ent, ord=1)
diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py
index ae0362f8..f4ffbdc1 100755
--- a/src/python/test/test_representations.py
+++ b/src/python/test/test_representations.py
@@ -249,7 +249,7 @@ def test_landscape_nan_range():
     dgm = np.array([[2., 6.], [3., 5.]])
     lds = Landscape(num_landscapes=2, resolution=9, sample_range=[np.nan, 6.])
     lds_dgm = lds(dgm)
-    assert (lds.sample_range[0] == 2) & (lds.sample_range[1] == 6)
+    assert (lds.sample_range_fixed[0] == 2) & (lds.sample_range_fixed[1] == 6)
     assert lds.new_resolution == 10
 
 def test_endpoints():
@@ -263,3 +263,7 @@ def test_endpoints():
     vec = BettiCurve(resolution=None)
     vec.fit(diags)
     assert np.equal(vec.grid_, [-np.inf, 2., 3.]).all()
+
+def test_get_params():
+    for vec in [ Landscape(), Silhouette(), BettiCurve(), Entropy(mode="vector") ]:
+        vec.get_params()
-- 
cgit v1.2.3