From 0a00c46d770699bbe467ade1c619dc94c8fad7b7 Mon Sep 17 00:00:00 2001
From: Marc Glisse <marc.glisse@inria.fr>
Date: Sat, 17 Apr 2021 14:53:53 +0200
Subject: Sparse Rips: disable `mini`, optimize a bit redundant points

---
 .../include/gudhi/Sparse_rips_complex.h            | 23 +++++++++++++++++-----
 src/python/test/test_rips_complex.py               | 21 ++++++++++++++++++++
 2 files changed, 39 insertions(+), 5 deletions(-)
diff --git a/src/Rips_complex/include/gudhi/Sparse_rips_complex.h b/src/Rips_complex/include/gudhi/Sparse_rips_complex.h
index a5501004..d7669dad 100644
--- a/src/Rips_complex/include/gudhi/Sparse_rips_complex.h
+++ b/src/Rips_complex/include/gudhi/Sparse_rips_complex.h
@@ -45,6 +45,7 @@ template <typename Filtration_value>
 class Sparse_rips_complex {
  private:
   // TODO(MG): use a different graph where we know we can safely insert in parallel.
+  //           Use a graph that lets us skip some vertices, for `mini` or redundant points.
   typedef typename boost::adjacency_list<boost::vecS, boost::vecS, boost::directedS,
                                          boost::property<vertex_filtration_t, Filtration_value>,
                                          boost::property<edge_filtration_t, Filtration_value>>
@@ -58,7 +59,8 @@ class Sparse_rips_complex {
    * @param[in] points Range of points.
    * @param[in] distance Distance function that returns a `Filtration_value` from 2 given points.
    * @param[in] epsilon Approximation parameter. epsilon must be positive.
-   * @param[in] mini Minimal filtration value. Ignore anything below this scale. This is a less efficient version of `Gudhi::subsampling::sparsify_point_set()`.
+   * @param[in] mini Not implemented yet, and broken in previous versions. Minimal filtration value.
+   * Ignore anything below this scale. This is a less efficient version of `Gudhi::subsampling::sparsify_point_set()`.
    * @param[in] maxi Maximal filtration value. Ignore anything above this scale.
    *
    */
@@ -67,6 +69,7 @@ class Sparse_rips_complex {
       : epsilon_(epsilon) {
     GUDHI_CHECK(epsilon > 0, "epsilon must be positive");
     auto dist_fun = [&](Vertex_handle i, Vertex_handle j) { return distance(points[i], points[j]); };
+    // TODO: stop choose_n_farthest_points once it reaches mini? Then the graph vertices would not be [0, ..., n-1] which complicates things.
     subsampling::choose_n_farthest_points(dist_fun, boost::irange<Vertex_handle>(0, boost::size(points)), -1, -1,
                                           std::back_inserter(sorted_points), std::back_inserter(params));
     compute_sparse_graph(dist_fun, epsilon, mini, maxi);
@@ -116,9 +119,9 @@ class Sparse_rips_complex {
     double cst = epsilon_ * (1 - epsilon_) / 2;
     auto block = [cst,&complex,&lambda](typename SimplicialComplexForRips::Simplex_handle sh){
       auto filt = complex.filtration(sh);
-      auto mini = filt * cst;
+      auto min_f = filt * cst;
       for(auto v : complex.simplex_vertex_range(sh)){
-        if(lambda[v] < mini)
+        if(lambda[v] < min_f)
           return true; // v died before this simplex could be born
       }
       return false;
@@ -149,12 +152,22 @@ class Sparse_rips_complex {
     for (int i = 0; i < n; ++i) {
       auto&& pi = points[i];
       auto li = params[i];
-      if (li < mini) break;
+      // FIXME: see below about mini. It might be ok to uncomment just this one, but it requires a proof.
+      // if ((li < mini || li <= 0) && i != 0) break;
+      if (li <= 0 && i != 0) break;
+      // The parameter of the first point is not very meaningful, it is supposed to be infinite,
+      // but if the type does not support it...
+      // Points with multiplicity get connected to their first representative, no need to handle
+      // the redundant ones in the outer loop.
       for (int j = i + 1; j < n; ++j) {
         auto&& pj = points[j];
         auto d = dist(pi, pj);
         auto lj = params[j];
-        if (lj < mini) break;
+        // FIXME: It would make sense to ignore the points with low param completely, but the current graph type we are
+        // using implicitly inserts all the vertices 0 ... n-1, so this would create isolated vertices, which is bad.
+        // If we do end up ignoring those points, we should do it early, around choose_n_farthest_points. But be careful
+        // that the size of lambda should reflect the original number of points then.
+        // if (lj < mini) break;
         GUDHI_CHECK(lj <= li, "Bad furthest point sorting");
         Filtration_value alpha;
 
diff --git a/src/python/test/test_rips_complex.py b/src/python/test/test_rips_complex.py
index b86e7498..cae21435 100755
--- a/src/python/test/test_rips_complex.py
+++ b/src/python/test/test_rips_complex.py
@@ -133,3 +133,24 @@ def test_filtered_rips_from_distance_matrix():
 
     assert simplex_tree.num_simplices() == 8
     assert simplex_tree.num_vertices() == 4
+
+
+def test_sparse_with_multiplicity():
+    points = [
+        [3, 4],
+        [0.1, 2],
+        [0.1, 2],
+        [0.1, 2],
+        [0.1, 2],
+        [0.1, 2],
+        [0.1, 2],
+        [0.1, 2],
+        [0.1, 2],
+        [0.1, 2],
+        [0.1, 2],
+        [3, 4.1],
+    ]
+    rips = RipsComplex(points=points, sparse=0.01)
+    simplex_tree = rips.create_simplex_tree(max_dimension=2)
+    assert simplex_tree.num_simplices() == 25
+    diag = simplex_tree.persistence()
-- 
cgit v1.2.3