Merge pull request #49 from mglisse/sparsev3

All seems good to me and the new interface (with min and max) will allow to benchmark it with Strong collapse.
author: Vincent Rouvreau <VincentRouvreau@users.noreply.github.com> 2019-05-22 08:21:15 +0200
committer: GitHub <noreply@github.com> 2019-05-22 08:21:15 +0200
commit: 0c7d6ae4ddb68422e02a48b6a5575c176041d3e4 (patch)
tree: 3565472af8636cf8fe6dac525fd0eae3711cc1ff /src
parent: a33d17ebb1a5f7c830ee155ec2733a0f48211010 (diff)
parent: fe2f26629481faf316c74dafa7eae892bbc7a556 (diff)
8 files changed, 104 insertions, 43 deletions
diff --git a/src/Rips_complex/concept/SimplicialComplexForRips.h b/src/Rips_complex/concept/SimplicialComplexForRips.h
index 3c5acecf..36ab1b0c 100644
--- a/src/Rips_complex/concept/SimplicialComplexForRips.h
+++ b/src/Rips_complex/concept/SimplicialComplexForRips.h
@@ -34,6 +34,9 @@ struct SimplicialComplexForRips {
   /** \brief Type used to store the filtration values of the simplicial complex. */
   typedef unspecified Filtration_value;
 
+  /** \brief Handle type to a simplex contained in the simplicial complex. */
+  typedef unspecified Simplex_handle;
+
   /** \brief Inserts a given `Gudhi::rips_complex::Rips_complex::OneSkeletonGraph` in the simplicial complex. */
   template<class OneSkeletonGraph>
   void insert_graph(const OneSkeletonGraph& skel_graph);
@@ -42,6 +45,24 @@ struct SimplicialComplexForRips {
    * explained in \ref ripsdefinition. */
   void expansion(int max_dim);
 
+  /** \brief Expands a simplicial complex containing only a graph. Simplices corresponding to cliques in the graph are added
+   * incrementally, faces before cofaces, unless the simplex has dimension larger than `max_dim` or `block_simplex`
+   * returns true for this simplex.
+   *
+   * @param[in] max_dim Expansion maximal dimension value.
+   * @param[in] block_simplex Blocker oracle. Its concept is <CODE>bool block_simplex(Simplex_handle sh)</CODE>
+   *
+   * The function identifies a candidate simplex whose faces are all already in the complex, inserts
+   * it with a filtration value corresponding to the maximum of the filtration values of the faces, then calls
+   * `block_simplex` on a `Simplex_handle` for this new simplex. If `block_simplex` returns true, the simplex is
+   * removed, otherwise it is kept.
+   */
+  template< typename Blocker >
+  void expansion_with_blockers(int max_dim, Blocker block_simplex);
+
+  /** \brief Returns a range over the vertices of a simplex.  */
+  unspecified simplex_vertex_range(Simplex_handle sh);
+
   /** \brief Returns the number of vertices in the simplicial complex. */
   std::size_t num_vertices();
 
diff --git a/src/Rips_complex/doc/Intro_rips_complex.h b/src/Rips_complex/doc/Intro_rips_complex.h
index a2537036..97d66fbd 100644
--- a/src/Rips_complex/doc/Intro_rips_complex.h
+++ b/src/Rips_complex/doc/Intro_rips_complex.h
@@ -92,21 +92,27 @@ namespace rips_complex {
  * The sparse Rips filtration was introduced by Don Sheehy
  * \cite sheehy13linear. We are using the version described in
  * \cite buchet16efficient (except that we multiply all filtration values
- * by 2, to match the usual Rips complex), which proves a
- * \f$\frac{1+\epsilon}{1-\epsilon}\f$-interleaving, although in practice the
+ * by 2, to match the usual Rips complex), for which \cite cavanna15geometric proves a
+ * \f$(1,\frac{1}{1-\epsilon})\f$-interleaving, although in practice the
  * error is usually smaller.
  * A more intuitive presentation of the idea is available in
  * \cite cavanna15geometric, and in a video \cite cavanna15visualizing.
  *
  * The interface of `Sparse_rips_complex` is similar to the one for the usual
- * `Rips_complex`, except that one has to specify the approximation factor, and
- * there is no option to limit the maximum filtration value (the way the
- * approximation is done means that larger filtration values are much cheaper
- * to handle than low filtration values, so the gain would be too small).
+ * `Rips_complex`, except that one has to specify the approximation factor.
+ * There is an option to limit the minimum and maximum filtration values, but
+ * they are not recommended: the way the approximation is done means that
+ * larger filtration values are much cheaper to handle than low filtration
+ * values, so the gain in ignoring the large ones is small, and
+ * `Gudhi::subsampling::sparsify_point_set()` is a more efficient way of
+ * ignoring small filtration values.
  *
  * Theoretical guarantees are only available for \f$\epsilon<1\f$. The
  * construction accepts larger values of &epsilon;, and the size of the complex
  * keeps decreasing, but there is no guarantee on the quality of the result.
+ * Note that while the number of edges decreases when &epsilon; increases, the
+ * number of higher-dimensional simplices may not be monotonous when
+ * \f$\frac12\leq\epsilon\leq 1\f$.
  *
  * \section ripspointsdistance Point cloud and distance function
  * 
@@ -119,8 +125,8 @@ namespace rips_complex {
  * 
  * \include Rips_complex/example_one_skeleton_rips_from_points.cpp
  * 
- * When launching (Rips maximal distance between 2 points is 12.0, is expanded until dimension 1 - one skeleton graph
- * in other words):
+ * When launching (Rips maximal distance between 2 points is 12.0, is expanded
+ * until dimension 1 - one skeleton graph in other words):
  * 
  * \code $> ./Rips_complex_example_one_skeleton_from_points
  * \endcode
diff --git a/src/Rips_complex/include/gudhi/Rips_complex.h b/src/Rips_complex/include/gudhi/Rips_complex.h
index e902e52c..ee100867 100644
--- a/src/Rips_complex/include/gudhi/Rips_complex.h
+++ b/src/Rips_complex/include/gudhi/Rips_complex.h
@@ -90,7 +90,7 @@ class Rips_complex {
    * @param[in] threshold Rips value.
    * 
    * \tparam DistanceMatrix must have a `size()` method and on which `distance_matrix[i][j]` returns
-   * the distance between points \f$i\f$ and \f$j\f$ as long as \f$ 0 \leqslant i < j \leqslant
+   * the distance between points \f$i\f$ and \f$j\f$ as long as \f$ 0 \leqslant j < i \leqslant
    * distance\_matrix.size().\f$
    */
   template<typename DistanceMatrix>
diff --git a/src/Rips_complex/include/gudhi/Sparse_rips_complex.h b/src/Rips_complex/include/gudhi/Sparse_rips_complex.h
index 00da148f..8df6e387 100644
--- a/src/Rips_complex/include/gudhi/Sparse_rips_complex.h
+++ b/src/Rips_complex/include/gudhi/Sparse_rips_complex.h
@@ -47,7 +47,9 @@ namespace rips_complex {
  *
  * \details
  * This class is used to construct a sparse \f$(1+O(\epsilon))\f$-approximation of `Rips_complex`, i.e. a filtered
- * simplicial complex that is multiplicatively \f$(1+O(\epsilon))\f$-interleaved with the Rips filtration.
+ * simplicial complex that is multiplicatively
+ * \f$(1+O(\epsilon))\f$-interleaved with the Rips filtration. More precisely,
+ * this is a \f$(1,\frac{1}{1-\epsilon}\f$-interleaving.
  *
  * \tparam Filtration_value is the type used to store the filtration values of the simplicial complex.
  */
@@ -68,32 +70,36 @@ class Sparse_rips_complex {
    * @param[in] points Range of points.
    * @param[in] distance Distance function that returns a `Filtration_value` from 2 given points.
    * @param[in] epsilon Approximation parameter. epsilon must be positive.
+   * @param[in] mini Minimal filtration value. Ignore anything below this scale. This is a less efficient version of `Gudhi::subsampling::sparsify_point_set()`.
+   * @param[in] maxi Maximal filtration value. Ignore anything above this scale.
    *
    */
   template <typename RandomAccessPointRange, typename Distance>
-  Sparse_rips_complex(const RandomAccessPointRange& points, Distance distance, double epsilon) {
+  Sparse_rips_complex(const RandomAccessPointRange& points, Distance distance, double epsilon, Filtration_value mini=-std::numeric_limits<Filtration_value>::infinity(), Filtration_value maxi=std::numeric_limits<Filtration_value>::infinity())
+      : epsilon_(epsilon) {
     GUDHI_CHECK(epsilon > 0, "epsilon must be positive");
-    std::vector<Vertex_handle> sorted_points;
-    std::vector<Filtration_value> params;
     auto dist_fun = [&](Vertex_handle i, Vertex_handle j) { return distance(points[i], points[j]); };
     Ker<decltype(dist_fun)> kernel(dist_fun);
     subsampling::choose_n_farthest_points(kernel, boost::irange<Vertex_handle>(0, boost::size(points)), -1, -1,
                                           std::back_inserter(sorted_points), std::back_inserter(params));
-    compute_sparse_graph(sorted_points, params, dist_fun, epsilon);
+    compute_sparse_graph(dist_fun, epsilon, mini, maxi);
   }
 
   /** \brief Sparse_rips_complex constructor from a distance matrix.
    *
    * @param[in] distance_matrix Range of range of distances.
    * `distance_matrix[i][j]` returns the distance between points \f$i\f$ and
-   * \f$j\f$ as long as \f$ 0 \leqslant i < j \leqslant
+   * \f$j\f$ as long as \f$ 0 \leqslant j < i \leqslant
    * distance\_matrix.size().\f$
    * @param[in] epsilon Approximation parameter. epsilon must be positive.
+   * @param[in] mini Minimal filtration value. Ignore anything below this scale. This is a less efficient version of `Gudhi::subsampling::sparsify_point_set()`.
+   * @param[in] maxi Maximal filtration value. Ignore anything above this scale.
    */
   template <typename DistanceMatrix>
-  Sparse_rips_complex(const DistanceMatrix& distance_matrix, double epsilon)
+  Sparse_rips_complex(const DistanceMatrix& distance_matrix, double epsilon, Filtration_value mini=-std::numeric_limits<Filtration_value>::infinity(), Filtration_value maxi=std::numeric_limits<Filtration_value>::infinity())
       : Sparse_rips_complex(boost::irange<Vertex_handle>(0, boost::size(distance_matrix)),
-                            [&](Vertex_handle i, Vertex_handle j) { return distance_matrix[j][i]; }, epsilon) {}
+                            [&](Vertex_handle i, Vertex_handle j) { return (i==j) ? 0 : (i<j) ? distance_matrix[j][i] : distance_matrix[i][j]; },
+                            epsilon, mini, maxi) {}
 
   /** \brief Fills the simplicial complex with the sparse Rips graph and
    * expands it with all the cliques, stopping at a given maximal dimension.
@@ -111,7 +117,26 @@ class Sparse_rips_complex {
                 std::invalid_argument("Sparse_rips_complex::create_complex - simplicial complex is not empty"));
 
     complex.insert_graph(graph_);
-    complex.expansion(dim_max);
+    if(epsilon_ >= 1) {
+      complex.expansion(dim_max);
+      return;
+    }
+    const int n = boost::size(params);
+    std::vector<Filtration_value> lambda(n);
+    // lambda[original_order]=params[sorted_order]
+    for(int i=0;i<n;++i)
+      lambda[sorted_points[i]] = params[i];
+    double cst = epsilon_ * (1 - epsilon_) / 2;
+    auto block = [cst,&complex,&lambda](typename SimplicialComplexForRips::Simplex_handle sh){
+      auto filt = complex.filtration(sh);
+      auto mini = filt * cst;
+      for(auto v : complex.simplex_vertex_range(sh)){
+        if(lambda[v] < mini)
+          return true; // v died before this simplex could be born
+      }
+      return false;
+    };
+    complex.expansion_with_blockers(dim_max, block);
   }
 
  private:
@@ -127,9 +152,11 @@ class Sparse_rips_complex {
   };
 
   // PointRange must be random access.
-  template <typename PointRange, typename ParamRange, typename Distance>
-  void compute_sparse_graph(const PointRange& points, const ParamRange& params, Distance& dist, double epsilon) {
+  template <typename Distance>
+  void compute_sparse_graph(Distance& dist, double epsilon, Filtration_value mini, Filtration_value maxi) {
+    const auto& points = sorted_points; // convenience alias
     const int n = boost::size(points);
+    double cst = epsilon * (1 - epsilon) / 2;
     graph_.~Graph();
     new (&graph_) Graph(n);
     // for(auto v : vertices(g)) // doesn't work :-(
@@ -143,29 +170,43 @@ class Sparse_rips_complex {
     // TODO(MG):
     // - make it parallel
     // - only test near-enough neighbors
-    for (int i = 0; i < n; ++i)
+    for (int i = 0; i < n; ++i) {
+      auto&& pi = points[i];
+      auto li = params[i];
+      if (li < mini) break;
       for (int j = i + 1; j < n; ++j) {
-        auto&& pi = points[i];
         auto&& pj = points[j];
         auto d = dist(pi, pj);
-        auto li = params[i];
         auto lj = params[j];
+        if (lj < mini) break;
         GUDHI_CHECK(lj <= li, "Bad furthest point sorting");
         Filtration_value alpha;
 
         // The paper has d/2 and d-lj/e to match the Cech, but we use doubles to match the Rips
         if (d * epsilon <= 2 * lj)
           alpha = d;
-        else if (d * epsilon <= li + lj && (epsilon >= 1 || d * epsilon <= lj * (1 + 1 / (1 - epsilon))))
-          alpha = (d - lj / epsilon) * 2;
-        else
+        else if (d * epsilon > li + lj)
           continue;
+        else {
+          alpha = (d - lj / epsilon) * 2;
+          // Keep the test exactly the same as in block to avoid inconsistencies
+          if (epsilon < 1 && alpha * cst > lj)
+            continue;
+        }
 
-        add_edge(pi, pj, alpha, graph_);
+        if (alpha <= maxi)
+          add_edge(pi, pj, alpha, graph_);
       }
+    }
   }
 
   Graph graph_;
+  double epsilon_;
+  // Because of the arbitrary split between constructor and create_complex
+  // sorted_points[sorted_order]=original_order
+  std::vector<Vertex_handle> sorted_points;
+  // params[sorted_order]=distance to previous points
+  std::vector<Filtration_value> params;
 };
 
 }  // namespace rips_complex
diff --git a/src/Rips_complex/utilities/ripscomplex.md b/src/Rips_complex/utilities/ripscomplex.md
index 108cdd50..03838085 100644
--- a/src/Rips_complex/utilities/ripscomplex.md
+++ b/src/Rips_complex/utilities/ripscomplex.md
@@ -85,7 +85,7 @@ properly, this is a known issue.
 
 ## sparse_rips_persistence ##
 This program computes the persistent homology with coefficient field *Z/pZ*
-of a sparse (1+epsilon)/(1-epsilon)-approximation of the Rips complex defined on a set of input Euclidean points. The output diagram contains one bar per line, written with the convention:
+of a sparse 1/(1-epsilon)-approximation of the Rips complex defined on a set of input Euclidean points. The output diagram contains one bar per line, written with the convention:
 
 `p dim birth death`
 
@@ -100,7 +100,7 @@ where `dim` is the dimension of the homological feature, `birth` and `death` are
 * `-h [ --help ]` Produce help message
 * `-o [ --output-file ]` Name of file in which the persistence diagram is written. Default print in standard output.
 * `-e [ --approximation ]` (default = .5) Epsilon, where the sparse Rips complex is a (1+epsilon)/(1-epsilon)-approximation of the Rips complex.
-* `-d [ --cpx-dimension ]` (default = 1) Maximal dimension of the Rips complex we want to compute.
+* `-d [ --cpx-dimension ]` (default = INT_MAX) Maximal dimension of the Rips complex we want to compute.
 * `-p [ --field-charac ]` (default = 11)     Characteristic p of the coefficient field Z/pZ for computing homology.
 * `-m [ --min-persistence ]` (default = 0) Minimal lifetime of homology feature to be recorded. Enter a negative value to see zero length intervals.
 
diff --git a/src/Rips_complex/utilities/sparse_rips_persistence.cpp b/src/Rips_complex/utilities/sparse_rips_persistence.cpp
index 6d4d86fd..3840d9f7 100644
--- a/src/Rips_complex/utilities/sparse_rips_persistence.cpp
+++ b/src/Rips_complex/utilities/sparse_rips_persistence.cpp
@@ -98,7 +98,7 @@ void program_options(int argc, char* argv[], std::string& off_file_points, std::
       "Name of file in which the persistence diagram is written. Default print in std::cout")(
       "approximation,e", po::value<double>(&epsilon)->default_value(.5),
       "Epsilon, where the sparse Rips complex is a (1+epsilon)-approximation of the Rips complex.")(
-      "cpx-dimension,d", po::value<int>(&dim_max)->default_value(1),
+      "cpx-dimension,d", po::value<int>(&dim_max)->default_value(std::numeric_limits<int>::max()),
       "Maximal dimension of the Rips complex we want to compute.")(
       "field-charac,p", po::value<int>(&p)->default_value(11),
       "Characteristic p of the coefficient field Z/pZ for computing homology.")(
@@ -119,7 +119,7 @@ void program_options(int argc, char* argv[], std::string& off_file_points, std::
   if (vm.count("help") || !vm.count("input-file")) {
     std::cout << std::endl;
     std::cout << "Compute the persistent homology with coefficient field Z/pZ \n";
-    std::cout << "of a sparse (1+epsilon)-approximation of the Rips complex \ndefined on a set of input points.\n \n";
+    std::cout << "of a sparse 1/(1-epsilon)-approximation of the Rips complex \ndefined on a set of input points.\n \n";
     std::cout << "The output diagram contains one bar per line, written with the convention: \n";
     std::cout << "   p   dim b d \n";
     std::cout << "where dim is the dimension of the homological feature,\n";
diff --git a/src/cython/doc/rips_complex_user.rst b/src/cython/doc/rips_complex_user.rst
index e814b4c3..1d340dbe 100644
--- a/src/cython/doc/rips_complex_user.rst
+++ b/src/cython/doc/rips_complex_user.rst
@@ -50,8 +50,8 @@ by more than the length used to define "too close".
 A more general technique is to use a sparse approximation of the Rips
 introduced by Don Sheehy :cite:`sheehy13linear`. We are using the version
 described in :cite:`buchet16efficient` (except that we multiply all filtration
-values by 2, to match the usual Rips complex), which proves a
-:math:`\frac{1+\varepsilon}{1-\varepsilon}`-interleaving, although in practice the
+values by 2, to match the usual Rips complex). :cite:`cavanna15geometric` proves
+a :math:`\frac{1}{1-\varepsilon}`-interleaving, although in practice the
 error is usually smaller.  A more intuitive presentation of the idea is
 available in :cite:`cavanna15geometric`, and in a video
 :cite:`cavanna15visualizing`. Passing an extra argument `sparse=0.3` at the
diff --git a/src/cython/include/Rips_complex_interface.h b/src/cython/include/Rips_complex_interface.h
index 1a6e2477..40aff299 100644
--- a/src/cython/include/Rips_complex_interface.h
+++ b/src/cython/include/Rips_complex_interface.h
@@ -54,23 +54,17 @@ class Rips_complex_interface {
   }
 
   void init_points_sparse(const std::vector<std::vector<double>>& points, double threshold, double epsilon) {
-    sparse_rips_complex_.emplace(points, Gudhi::Euclidean_distance(), epsilon);
-    threshold_ = threshold;
+    sparse_rips_complex_.emplace(points, Gudhi::Euclidean_distance(), epsilon, -std::numeric_limits<double>::infinity(), threshold);
   }
   void init_matrix_sparse(const std::vector<std::vector<double>>& matrix, double threshold, double epsilon) {
-    sparse_rips_complex_.emplace(matrix, epsilon);
-    threshold_ = threshold;
+    sparse_rips_complex_.emplace(matrix, epsilon, -std::numeric_limits<double>::infinity(), threshold);
   }
 
   void create_simplex_tree(Simplex_tree_interface<>* simplex_tree, int dim_max) {
     if (rips_complex_)
       rips_complex_->create_complex(*simplex_tree, dim_max);
-    else {
+    else
       sparse_rips_complex_->create_complex(*simplex_tree, dim_max);
-      // This pruning should be done much earlier! It isn't that useful for sparse Rips,
-      // but it would be inconsistent not to do it.
-      simplex_tree->prune_above_filtration(threshold_);
-    }
     simplex_tree->initialize_filtration();
   }
 
@@ -79,7 +73,6 @@ class Rips_complex_interface {
   // Anyway, storing a graph would make more sense. Or changing the interface completely so there is no such storage.
   boost::optional<Rips_complex<Simplex_tree_interface<>::Filtration_value>> rips_complex_;
   boost::optional<Sparse_rips_complex<Simplex_tree_interface<>::Filtration_value>> sparse_rips_complex_;
-  double threshold_;
 };
 
 }  // namespace rips_complex
author	Vincent Rouvreau <VincentRouvreau@users.noreply.github.com>	2019-05-22 08:21:15 +0200
committer	GitHub <noreply@github.com>	2019-05-22 08:21:15 +0200
commit	0c7d6ae4ddb68422e02a48b6a5575c176041d3e4 (patch)
tree	3565472af8636cf8fe6dac525fd0eae3711cc1ff /src
parent	a33d17ebb1a5f7c830ee155ec2733a0f48211010 (diff)
parent	fe2f26629481faf316c74dafa7eae892bbc7a556 (diff)