From 6525c78704489b0c8cb62b2e3f882ce6113c0f0d Mon Sep 17 00:00:00 2001
From: Marc Glisse <marc.glisse@inria.fr>
Date: Tue, 16 Apr 2019 23:19:19 +0200
Subject: Add min and max filtration values for the sparse Rips

---
 src/Rips_complex/doc/Intro_rips_complex.h          | 11 +++++----
 .../include/gudhi/Sparse_rips_complex.h            | 27 ++++++++++++++--------
 src/cython/doc/rips_complex_user.rst               |  4 ++--
 src/cython/include/Rips_complex_interface.h        | 13 +++--------
 4 files changed, 30 insertions(+), 25 deletions(-)

(limited to 'src')
diff --git a/src/Rips_complex/doc/Intro_rips_complex.h b/src/Rips_complex/doc/Intro_rips_complex.h
index 1aac804b..1d8cd2ba 100644
--- a/src/Rips_complex/doc/Intro_rips_complex.h
+++ b/src/Rips_complex/doc/Intro_rips_complex.h
@@ -99,10 +99,13 @@ namespace rips_complex {
  * \cite cavanna15geometric, and in a video \cite cavanna15visualizing.
  *
  * The interface of `Sparse_rips_complex` is similar to the one for the usual
- * `Rips_complex`, except that one has to specify the approximation factor, and
- * there is no option to limit the maximum filtration value (the way the
- * approximation is done means that larger filtration values are much cheaper
- * to handle than low filtration values, so the gain would be too small).
+ * `Rips_complex`, except that one has to specify the approximation factor.
+ * There is an option to limit the minimum and maximum filtration values, but
+ * they are not recommended: the way the approximation is done means that
+ * larger filtration values are much cheaper to handle than low filtration
+ * values, so the gain in ignoring the large ones is small, and
+ * `Gudhi::subsampling::sparsify_point_set()` is a more efficient way of
+ * ignoring small filtration values.
  *
  * Theoretical guarantees are only available for \f$\epsilon<1\f$. The
  * construction accepts larger values of &epsilon;, and the size of the complex
diff --git a/src/Rips_complex/include/gudhi/Sparse_rips_complex.h b/src/Rips_complex/include/gudhi/Sparse_rips_complex.h
index 1e8fb6a3..a249cd8e 100644
--- a/src/Rips_complex/include/gudhi/Sparse_rips_complex.h
+++ b/src/Rips_complex/include/gudhi/Sparse_rips_complex.h
@@ -70,17 +70,19 @@ class Sparse_rips_complex {
    * @param[in] points Range of points.
    * @param[in] distance Distance function that returns a `Filtration_value` from 2 given points.
    * @param[in] epsilon Approximation parameter. epsilon must be positive.
+   * @param[in] mini Minimal filtration value. Ignore anything below this scale. This is a less efficient version of `Gudhi::subsampling::sparsify_point_set()`.
+   * @param[in] maxi Maximal filtration value. Ignore anything above this scale.
    *
    */
   template <typename RandomAccessPointRange, typename Distance>
-  Sparse_rips_complex(const RandomAccessPointRange& points, Distance distance, double epsilon)
+  Sparse_rips_complex(const RandomAccessPointRange& points, Distance distance, double epsilon, Filtration_value mini=-std::numeric_limits<Filtration_value>::infinity(), Filtration_value maxi=std::numeric_limits<Filtration_value>::infinity())
       : epsilon_(epsilon) {
     GUDHI_CHECK(epsilon > 0, "epsilon must be positive");
     auto dist_fun = [&](Vertex_handle i, Vertex_handle j) { return distance(points[i], points[j]); };
     Ker<decltype(dist_fun)> kernel(dist_fun);
     subsampling::choose_n_farthest_points(kernel, boost::irange<Vertex_handle>(0, boost::size(points)), -1, -1,
                                           std::back_inserter(sorted_points), std::back_inserter(params));
-    compute_sparse_graph(dist_fun, epsilon);
+    compute_sparse_graph(dist_fun, epsilon, mini, maxi);
   }
 
   /** \brief Sparse_rips_complex constructor from a distance matrix.
@@ -90,11 +92,14 @@ class Sparse_rips_complex {
    * \f$j\f$ as long as \f$ 0 \leqslant j < i \leqslant
    * distance\_matrix.size().\f$
    * @param[in] epsilon Approximation parameter. epsilon must be positive.
+   * @param[in] mini Minimal filtration value. Ignore anything below this scale. This is a less efficient version of `Gudhi::subsampling::sparsify_point_set()`.
+   * @param[in] maxi Maximal filtration value. Ignore anything above this scale.
    */
   template <typename DistanceMatrix>
-  Sparse_rips_complex(const DistanceMatrix& distance_matrix, double epsilon)
+  Sparse_rips_complex(const DistanceMatrix& distance_matrix, double epsilon, Filtration_value mini=-std::numeric_limits<Filtration_value>::infinity(), Filtration_value maxi=std::numeric_limits<Filtration_value>::infinity())
       : Sparse_rips_complex(boost::irange<Vertex_handle>(0, boost::size(distance_matrix)),
-                            [&](Vertex_handle i, Vertex_handle j) { return (i==j) ? 0 : (i<j) ? distance_matrix[j][i] : distance_matrix[i][j]; }, epsilon) {}
+                            [&](Vertex_handle i, Vertex_handle j) { return (i==j) ? 0 : (i<j) ? distance_matrix[j][i] : distance_matrix[i][j]; },
+                            epsilon, mini, maxi) {}
 
   /** \brief Fills the simplicial complex with the sparse Rips graph and
    * expands it with all the cliques, stopping at a given maximal dimension.
@@ -148,7 +153,7 @@ class Sparse_rips_complex {
 
   // PointRange must be random access.
   template <typename Distance>
-  void compute_sparse_graph(Distance& dist, double epsilon) {
+  void compute_sparse_graph(Distance& dist, double epsilon, Filtration_value mini, Filtration_value maxi) {
     const auto& points = sorted_points; // convenience alias
     const int n = boost::size(points);
     graph_.~Graph();
@@ -164,13 +169,15 @@ class Sparse_rips_complex {
     // TODO(MG):
     // - make it parallel
     // - only test near-enough neighbors
-    for (int i = 0; i < n; ++i)
+    for (int i = 0; i < n; ++i) {
+      auto&& pi = points[i];
+      auto li = params[i];
+      if (li < mini) break;
       for (int j = i + 1; j < n; ++j) {
-        auto&& pi = points[i];
         auto&& pj = points[j];
         auto d = dist(pi, pj);
-        auto li = params[i];
         auto lj = params[j];
+        if (lj < mini) break;
         GUDHI_CHECK(lj <= li, "Bad furthest point sorting");
         Filtration_value alpha;
 
@@ -182,8 +189,10 @@ class Sparse_rips_complex {
         else
           continue;
 
-        add_edge(pi, pj, alpha, graph_);
+        if (alpha <= maxi)
+          add_edge(pi, pj, alpha, graph_);
       }
+    }
   }
 
   Graph graph_;
diff --git a/src/cython/doc/rips_complex_user.rst b/src/cython/doc/rips_complex_user.rst
index e814b4c3..1d340dbe 100644
--- a/src/cython/doc/rips_complex_user.rst
+++ b/src/cython/doc/rips_complex_user.rst
@@ -50,8 +50,8 @@ by more than the length used to define "too close".
 A more general technique is to use a sparse approximation of the Rips
 introduced by Don Sheehy :cite:`sheehy13linear`. We are using the version
 described in :cite:`buchet16efficient` (except that we multiply all filtration
-values by 2, to match the usual Rips complex), which proves a
-:math:`\frac{1+\varepsilon}{1-\varepsilon}`-interleaving, although in practice the
+values by 2, to match the usual Rips complex). :cite:`cavanna15geometric` proves
+a :math:`\frac{1}{1-\varepsilon}`-interleaving, although in practice the
 error is usually smaller.  A more intuitive presentation of the idea is
 available in :cite:`cavanna15geometric`, and in a video
 :cite:`cavanna15visualizing`. Passing an extra argument `sparse=0.3` at the
diff --git a/src/cython/include/Rips_complex_interface.h b/src/cython/include/Rips_complex_interface.h
index 1a6e2477..40aff299 100644
--- a/src/cython/include/Rips_complex_interface.h
+++ b/src/cython/include/Rips_complex_interface.h
@@ -54,23 +54,17 @@ class Rips_complex_interface {
   }
 
   void init_points_sparse(const std::vector<std::vector<double>>& points, double threshold, double epsilon) {
-    sparse_rips_complex_.emplace(points, Gudhi::Euclidean_distance(), epsilon);
-    threshold_ = threshold;
+    sparse_rips_complex_.emplace(points, Gudhi::Euclidean_distance(), epsilon, -std::numeric_limits<double>::infinity(), threshold);
   }
   void init_matrix_sparse(const std::vector<std::vector<double>>& matrix, double threshold, double epsilon) {
-    sparse_rips_complex_.emplace(matrix, epsilon);
-    threshold_ = threshold;
+    sparse_rips_complex_.emplace(matrix, epsilon, -std::numeric_limits<double>::infinity(), threshold);
   }
 
   void create_simplex_tree(Simplex_tree_interface<>* simplex_tree, int dim_max) {
     if (rips_complex_)
       rips_complex_->create_complex(*simplex_tree, dim_max);
-    else {
+    else
       sparse_rips_complex_->create_complex(*simplex_tree, dim_max);
-      // This pruning should be done much earlier! It isn't that useful for sparse Rips,
-      // but it would be inconsistent not to do it.
-      simplex_tree->prune_above_filtration(threshold_);
-    }
     simplex_tree->initialize_filtration();
   }
 
@@ -79,7 +73,6 @@ class Rips_complex_interface {
   // Anyway, storing a graph would make more sense. Or changing the interface completely so there is no such storage.
   boost::optional<Rips_complex<Simplex_tree_interface<>::Filtration_value>> rips_complex_;
   boost::optional<Sparse_rips_complex<Simplex_tree_interface<>::Filtration_value>> sparse_rips_complex_;
-  double threshold_;
 };
 
 }  // namespace rips_complex
-- 
cgit v1.2.3