5 files changed, 161 insertions, 54 deletions
diff --git a/src/Rips_complex/include/gudhi/Sparse_rips_complex.h b/src/Rips_complex/include/gudhi/Sparse_rips_complex.h
index a5501004..7ae7b317 100644
--- a/src/Rips_complex/include/gudhi/Sparse_rips_complex.h
+++ b/src/Rips_complex/include/gudhi/Sparse_rips_complex.h
@@ -15,12 +15,71 @@
 #include <gudhi/graph_simplicial_complex.h>
 #include <gudhi/choose_n_farthest_points.h>
 
-#include <boost/graph/adjacency_list.hpp>
+#include <boost/graph/graph_traits.hpp>
 #include <boost/range/metafunctions.hpp>
+#include <boost/iterator/counting_iterator.hpp>
 
 #include <vector>
 
 namespace Gudhi {
+namespace rips_complex {
+// A custom graph class, because boost::adjacency_list does not conveniently allow to choose vertex descriptors
+template <class Vertex_handle, class Filtration_value>
+struct Graph {
+  typedef std::vector<Vertex_handle> VList;
+  typedef std::vector<std::tuple<Vertex_handle, Vertex_handle, Filtration_value>> EList;
+  typedef typename VList::const_iterator vertex_iterator;
+  typedef boost::counting_iterator<std::size_t> edge_iterator;
+  VList vlist;
+  EList elist;
+};
+template <class Vertex_handle, class Filtration_value>
+void add_vertex(Vertex_handle v, Graph<Vertex_handle, Filtration_value>&g) { g.vlist.push_back(v); }
+template <class Vertex_handle, class Filtration_value>
+void add_edge(Vertex_handle u, Vertex_handle v, Filtration_value f, Graph<Vertex_handle, Filtration_value>&g) { g.elist.emplace_back(u, v, f); }
+template <class Vertex_handle, class Filtration_value>
+std::size_t num_vertices(Graph<Vertex_handle, Filtration_value> const&g) { return g.vlist.size(); }
+template <class Vertex_handle, class Filtration_value>
+std::size_t num_edges(Graph<Vertex_handle, Filtration_value> const&g) { return g.elist.size(); }
+template <class Vertex_handle, class Filtration_value, class Iter = typename Graph<Vertex_handle, Filtration_value>::vertex_iterator>
+std::pair<Iter, Iter>
+vertices(Graph<Vertex_handle, Filtration_value> const&g) {
+  return { g.vlist.begin(), g.vlist.end() };
+}
+template <class Vertex_handle, class Filtration_value>
+std::pair<boost::counting_iterator<std::size_t>, boost::counting_iterator<std::size_t>>
+edges(Graph<Vertex_handle, Filtration_value> const&g) {
+  typedef boost::counting_iterator<std::size_t> I;
+  return { I(0), I(g.elist.size()) };
+}
+template <class Vertex_handle, class Filtration_value>
+Vertex_handle source(std::size_t e, Graph<Vertex_handle, Filtration_value> const&g) { return std::get<0>(g.elist[e]); }
+template <class Vertex_handle, class Filtration_value>
+Vertex_handle target(std::size_t e, Graph<Vertex_handle, Filtration_value> const&g) { return std::get<1>(g.elist[e]); }
+template <class Vertex_handle, class Filtration_value>
+Filtration_value get(vertex_filtration_t, Graph<Vertex_handle, Filtration_value> const&, Vertex_handle) { return 0; }
+template <class Vertex_handle, class Filtration_value>
+Filtration_value get(edge_filtration_t, Graph<Vertex_handle, Filtration_value> const&g, std::size_t e) { return std::get<2>(g.elist[e]); }
+}  // namespace rips_complex
+}  // namespace Gudhi
+namespace boost {
+template <class Vertex_handle, class Filtration_value>
+struct graph_traits<Gudhi::rips_complex::Graph<Vertex_handle, Filtration_value>> {
+  typedef Gudhi::rips_complex::Graph<Vertex_handle, Filtration_value> G;
+  struct traversal_category : vertex_list_graph_tag, edge_list_graph_tag {};
+  typedef Vertex_handle vertex_descriptor;
+  typedef typename G::vertex_iterator vertex_iterator;
+  typedef std::size_t vertices_size_type;
+  typedef std::size_t edge_descriptor;
+  typedef typename G::edge_iterator edge_iterator;
+  typedef std::size_t edges_size_type;
+  typedef directed_tag directed_category;
+  typedef disallow_parallel_edge_tag edge_parallel_category;
+};
+// Etc, since we don't expose this graph to the world, we know we are not going to query property_traits for instance.
+}
+
+namespace Gudhi {
 
 namespace rips_complex {
 
@@ -45,12 +104,8 @@ template <typename Filtration_value>
 class Sparse_rips_complex {
  private:
   // TODO(MG): use a different graph where we know we can safely insert in parallel.
-  typedef typename boost::adjacency_list<boost::vecS, boost::vecS, boost::directedS,
-                                         boost::property<vertex_filtration_t, Filtration_value>,
-                                         boost::property<edge_filtration_t, Filtration_value>>
-      Graph;
-
   typedef int Vertex_handle;
+  typedef rips_complex::Graph<Vertex_handle, Filtration_value> Graph;
 
  public:
   /** \brief Sparse_rips_complex constructor from a list of points.
@@ -63,10 +118,11 @@ class Sparse_rips_complex {
    *
    */
   template <typename RandomAccessPointRange, typename Distance>
-  Sparse_rips_complex(const RandomAccessPointRange& points, Distance distance, double epsilon, Filtration_value mini=-std::numeric_limits<Filtration_value>::infinity(), Filtration_value maxi=std::numeric_limits<Filtration_value>::infinity())
+  Sparse_rips_complex(const RandomAccessPointRange& points, Distance distance, double const epsilon, Filtration_value const mini=-std::numeric_limits<Filtration_value>::infinity(), Filtration_value const maxi=std::numeric_limits<Filtration_value>::infinity())
       : epsilon_(epsilon) {
     GUDHI_CHECK(epsilon > 0, "epsilon must be positive");
     auto dist_fun = [&](Vertex_handle i, Vertex_handle j) { return distance(points[i], points[j]); };
+    // TODO: stop choose_n_farthest_points once it reaches mini or 0?
     subsampling::choose_n_farthest_points(dist_fun, boost::irange<Vertex_handle>(0, boost::size(points)), -1, -1,
                                           std::back_inserter(sorted_points), std::back_inserter(params));
     compute_sparse_graph(dist_fun, epsilon, mini, maxi);
@@ -83,7 +139,7 @@ class Sparse_rips_complex {
    * @param[in] maxi Maximal filtration value. Ignore anything above this scale.
    */
   template <typename DistanceMatrix>
-  Sparse_rips_complex(const DistanceMatrix& distance_matrix, double epsilon, Filtration_value mini=-std::numeric_limits<Filtration_value>::infinity(), Filtration_value maxi=std::numeric_limits<Filtration_value>::infinity())
+  Sparse_rips_complex(const DistanceMatrix& distance_matrix, double const epsilon, Filtration_value const mini=-std::numeric_limits<Filtration_value>::infinity(), Filtration_value const maxi=std::numeric_limits<Filtration_value>::infinity())
       : Sparse_rips_complex(boost::irange<Vertex_handle>(0, boost::size(distance_matrix)),
                             [&](Vertex_handle i, Vertex_handle j) { return (i==j) ? 0 : (i<j) ? distance_matrix[j][i] : distance_matrix[i][j]; },
                             epsilon, mini, maxi) {}
@@ -99,7 +155,7 @@ class Sparse_rips_complex {
    *
    */
   template <typename SimplicialComplexForRips>
-  void create_complex(SimplicialComplexForRips& complex, int dim_max) {
+  void create_complex(SimplicialComplexForRips& complex, int const dim_max) {
     GUDHI_CHECK(complex.num_vertices() == 0,
                 std::invalid_argument("Sparse_rips_complex::create_complex - simplicial complex is not empty"));
 
@@ -108,17 +164,17 @@ class Sparse_rips_complex {
       complex.expansion(dim_max);
       return;
     }
-    const int n = boost::size(params);
-    std::vector<Filtration_value> lambda(n);
+    const std::size_t n = num_vertices(graph_);
+    std::vector<Filtration_value> lambda(max_v + 1);
     // lambda[original_order]=params[sorted_order]
-    for(int i=0;i<n;++i)
+    for(std::size_t i=0;i<n;++i)
       lambda[sorted_points[i]] = params[i];
     double cst = epsilon_ * (1 - epsilon_) / 2;
     auto block = [cst,&complex,&lambda](typename SimplicialComplexForRips::Simplex_handle sh){
       auto filt = complex.filtration(sh);
-      auto mini = filt * cst;
+      auto min_f = filt * cst;
       for(auto v : complex.simplex_vertex_range(sh)){
-        if(lambda[v] < mini)
+        if(lambda[v] < min_f)
           return true; // v died before this simplex could be born
       }
       return false;
@@ -129,32 +185,34 @@ class Sparse_rips_complex {
  private:
   // PointRange must be random access.
   template <typename Distance>
-  void compute_sparse_graph(Distance& dist, double epsilon, Filtration_value mini, Filtration_value maxi) {
+  void compute_sparse_graph(Distance& dist, double const epsilon, Filtration_value const mini, Filtration_value const maxi) {
     const auto& points = sorted_points; // convenience alias
-    const int n = boost::size(points);
+    std::size_t n = boost::size(points);
     double cst = epsilon * (1 - epsilon) / 2;
-    graph_.~Graph();
-    new (&graph_) Graph(n);
-    // for(auto v : vertices(g)) // doesn't work :-(
-    typename boost::graph_traits<Graph>::vertex_iterator v_i, v_e;
-    for (std::tie(v_i, v_e) = vertices(graph_); v_i != v_e; ++v_i) {
-      auto v = *v_i;
-      // This whole loop might not be necessary, leave it until someone investigates if it is safe to remove.
-      put(vertex_filtration_t(), graph_, v, 0);
+    max_v = -1; // Useful for the size of the map lambda.
+    for (std::size_t i = 0; i < n; ++i) {
+      if ((params[i] < mini || params[i] <= 0) && i != 0) break;
+      // The parameter of the first point is not very meaningful, it is supposed to be infinite,
+      // but if the type does not support it...
+      // It would be better to do this reduction of the number of points earlier, around choose_n_farthest_points.
+      add_vertex(points[i], graph_);
+      max_v = std::max(max_v, points[i]);
     }
+    n = num_vertices(graph_);
 
     // TODO(MG):
     // - make it parallel
     // - only test near-enough neighbors
-    for (int i = 0; i < n; ++i) {
+    for (std::size_t i = 0; i < n; ++i) {
       auto&& pi = points[i];
       auto li = params[i];
-      if (li < mini) break;
-      for (int j = i + 1; j < n; ++j) {
+      // If we inserted all the points, points with multiplicity would get connected to their first representative,
+      // no need to handle the redundant ones in the outer loop.
+      // if (li <= 0 && i != 0) break;
+      for (std::size_t j = i + 1; j < n; ++j) {
         auto&& pj = points[j];
         auto d = dist(pi, pj);
         auto lj = params[j];
-        if (lj < mini) break;
         GUDHI_CHECK(lj <= li, "Bad furthest point sorting");
         Filtration_value alpha;
 
@@ -178,6 +236,7 @@ class Sparse_rips_complex {
 
   Graph graph_;
   double epsilon_;
+  Vertex_handle max_v;
   // Because of the arbitrary split between constructor and create_complex
   // sorted_points[sorted_order]=original_order
   std::vector<Vertex_handle> sorted_points;
diff --git a/src/Simplex_tree/include/gudhi/Simplex_tree.h b/src/Simplex_tree/include/gudhi/Simplex_tree.h
index 85d6c3b0..85790baf 100644
--- a/src/Simplex_tree/include/gudhi/Simplex_tree.h
+++ b/src/Simplex_tree/include/gudhi/Simplex_tree.h
@@ -1060,8 +1060,8 @@ class Simplex_tree {
    *
    * Inserts all vertices and edges given by a OneSkeletonGraph.
    * OneSkeletonGraph must be a model of
-   * <a href="http://www.boost.org/doc/libs/1_65_1/libs/graph/doc/EdgeListGraph.html">boost::EdgeListGraph</a>
-   * and <a href="http://www.boost.org/doc/libs/1_65_1/libs/graph/doc/PropertyGraph.html">boost::PropertyGraph</a>.
+   * <a href="http://www.boost.org/doc/libs/1_76_0/libs/graph/doc/VertexAndEdgeListGraph.html">boost::VertexAndEdgeListGraph</a>
+   * and <a href="http://www.boost.org/doc/libs/1_76_0/libs/graph/doc/PropertyGraph.html">boost::PropertyGraph</a>.
    *
    * The vertex filtration value is accessible through the property tag
    * vertex_filtration_t.
@@ -1081,7 +1081,10 @@ class Simplex_tree {
     // the simplex tree must be empty
     assert(num_simplices() == 0);
 
-    if (boost::num_vertices(skel_graph) == 0) {
+    // is there a better way to let the compiler know that we don't mean Simplex_tree::num_vertices?
+    using boost::num_vertices;
+
+    if (num_vertices(skel_graph) == 0) {
       return;
     }
     if (num_edges(skel_graph) == 0) {
@@ -1090,18 +1093,18 @@ class Simplex_tree {
       dimension_ = 1;
     }
 
-    root_.members_.reserve(boost::num_vertices(skel_graph));
+    root_.members_.reserve(num_vertices(skel_graph));
 
     typename boost::graph_traits<OneSkeletonGraph>::vertex_iterator v_it,
         v_it_end;
-    for (std::tie(v_it, v_it_end) = boost::vertices(skel_graph); v_it != v_it_end;
+    for (std::tie(v_it, v_it_end) = vertices(skel_graph); v_it != v_it_end;
          ++v_it) {
       root_.members_.emplace_hint(
                                   root_.members_.end(), *v_it,
-                                  Node(&root_, boost::get(vertex_filtration_t(), skel_graph, *v_it)));
+                                  Node(&root_, get(vertex_filtration_t(), skel_graph, *v_it)));
     }
     std::pair<typename boost::graph_traits<OneSkeletonGraph>::edge_iterator,
-              typename boost::graph_traits<OneSkeletonGraph>::edge_iterator> boost_edges = boost::edges(skel_graph);
+              typename boost::graph_traits<OneSkeletonGraph>::edge_iterator> boost_edges = edges(skel_graph);
     // boost_edges.first is the equivalent to boost_edges.begin()
     // boost_edges.second is the equivalent to boost_edges.end()
     for (; boost_edges.first != boost_edges.second; boost_edges.first++) {
@@ -1123,7 +1126,7 @@ class Simplex_tree {
       }
 
       sh->second.children()->members().emplace(v,
-          Node(sh->second.children(), boost::get(edge_filtration_t(), skel_graph, edge)));
+          Node(sh->second.children(), get(edge_filtration_t(), skel_graph, edge)));
     }
   }
 
diff --git a/src/Subsampling/include/gudhi/choose_n_farthest_points.h b/src/Subsampling/include/gudhi/choose_n_farthest_points.h
index e6347d96..44c02df1 100644
--- a/src/Subsampling/include/gudhi/choose_n_farthest_points.h
+++ b/src/Subsampling/include/gudhi/choose_n_farthest_points.h
@@ -42,7 +42,7 @@ enum : std::size_t {
  *  The iteration starts with the landmark `starting point` or, if `starting point==random_starting_point`,
  *  with a random landmark.
  *  It chooses `final_size` points from a random access range
- *  `input_pts` (or the number of distinct points if `final_size` is larger)
+ *  `input_pts` (or the number of input points if `final_size` is larger)
  *  and outputs them in the output iterator `output_it`. It also
  *  outputs the distance from each of those points to the set of previous
  *  points in `dist_it`.
@@ -88,34 +88,57 @@ void choose_n_farthest_points(Distance dist,
     starting_point = dis(gen);
   }
 
-  std::size_t current_number_of_landmarks = 0;  // counter for landmarks
-  static_assert(std::numeric_limits<double>::has_infinity, "the number type needs to support infinity()");
   // FIXME: don't hard-code the type as double. For Epeck_d, we also want to handle types that do not have an infinity.
-  const double infty = std::numeric_limits<double>::infinity();  // infinity (see next entry)
-  std::vector< double > dist_to_L(nb_points, infty);  // vector of current distances to L from input_pts
+  static_assert(std::numeric_limits<double>::has_infinity, "the number type needs to support infinity()");
+
+  *output_it++ = input_pts[starting_point];
+  *dist_it++ = std::numeric_limits<double>::infinity();
+  if (final_size == 1) return;
+
+  std::vector<std::size_t> points(nb_points);  // map from remaining points to indexes in input_pts
+  std::vector< double > dist_to_L(nb_points);  // vector of current distances to L from points
+  for(std::size_t i = 0; i < nb_points; ++i) {
+    points[i] = i;
+    dist_to_L[i] = dist(input_pts[i], input_pts[starting_point]);
+  }
+  // The indirection through points makes the program a bit slower. Some alternatives:
+  // - the original code never removed points and counted on them not
+  //   reappearing because of a self-distance of 0. This causes unnecessary
+  //   computations when final_size is large. It also causes trouble if there are
+  //   input points at distance 0 from each other.
+  // - copy input_pts and update the local copy when removing points.
 
   std::size_t curr_max_w = starting_point;
 
-  for (current_number_of_landmarks = 0; current_number_of_landmarks != final_size; current_number_of_landmarks++) {
-    // curr_max_w at this point is the next landmark
-    *output_it++ = input_pts[curr_max_w];
-    *dist_it++ = dist_to_L[curr_max_w];
+  for (std::size_t current_number_of_landmarks = 1; current_number_of_landmarks != final_size; current_number_of_landmarks++) {
+    std::size_t latest_landmark = points[curr_max_w];
+    // To remove the latest landmark at index curr_max_w, replace it
+    // with the last point and reduce the length of the vector.
+    std::size_t last = points.size() - 1;
+    if (curr_max_w != last) {
+      points[curr_max_w] = points[last];
+      dist_to_L[curr_max_w] = dist_to_L[last];
+    }
+    points.pop_back();
+
+    // Update distances to L.
     std::size_t i = 0;
-    for (auto&& p : input_pts) {
-      double curr_dist = dist(p, input_pts[curr_max_w]);
+    for (auto p : points) {
+      double curr_dist = dist(input_pts[p], input_pts[latest_landmark]);
       if (curr_dist < dist_to_L[i])
         dist_to_L[i] = curr_dist;
       ++i;
     }
-    // choose the next curr_max_w
-    double curr_max_dist = 0;  // used for defining the furhest point from L
-    for (i = 0; i < dist_to_L.size(); i++)
+    // choose the next landmark
+    curr_max_w = 0;
+    double curr_max_dist = dist_to_L[curr_max_w];  // used for defining the furthest point from L
+    for (i = 1; i < points.size(); i++)
       if (dist_to_L[i] > curr_max_dist) {
         curr_max_dist = dist_to_L[i];
         curr_max_w = i;
       }
-    // If all that remains are duplicates of points already taken, stop.
-    if (curr_max_dist == 0) break;
+    *output_it++ = input_pts[points[curr_max_w]];
+    *dist_it++ = dist_to_L[curr_max_w];
   }
 }
 
diff --git a/src/Subsampling/test/test_choose_n_farthest_points.cpp b/src/Subsampling/test/test_choose_n_farthest_points.cpp
index 94793295..c384c61b 100644
--- a/src/Subsampling/test/test_choose_n_farthest_points.cpp
+++ b/src/Subsampling/test/test_choose_n_farthest_points.cpp
@@ -102,11 +102,12 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(test_choose_farthest_point_limits, Kernel, list_of
   BOOST_CHECK(distances[1] == 1);
   landmarks.clear(); distances.clear();
 
-  // Ignore duplicated points
+  // Accept duplicated points
   points.emplace_back(point.begin(), point.end());
   Gudhi::subsampling::choose_n_farthest_points(d, points, -1, -1, std::back_inserter(landmarks), std::back_inserter(distances));
-  BOOST_CHECK(landmarks.size() == 2 && distances.size() == 2);
+  BOOST_CHECK(landmarks.size() == 3 && distances.size() == 3);
   BOOST_CHECK(distances[0] == std::numeric_limits<FT>::infinity());
   BOOST_CHECK(distances[1] == 1);
+  BOOST_CHECK(distances[2] == 0);
   landmarks.clear(); distances.clear();
 }
diff --git a/src/python/test/test_rips_complex.py b/src/python/test/test_rips_complex.py
index b86e7498..a2f43a1b 100755
--- a/src/python/test/test_rips_complex.py
+++ b/src/python/test/test_rips_complex.py
@@ -133,3 +133,24 @@ def test_filtered_rips_from_distance_matrix():
 
     assert simplex_tree.num_simplices() == 8
     assert simplex_tree.num_vertices() == 4
+
+
+def test_sparse_with_multiplicity():
+    points = [
+        [3, 4],
+        [0.1, 2],
+        [0.1, 2],
+        [0.1, 2],
+        [0.1, 2],
+        [0.1, 2],
+        [0.1, 2],
+        [0.1, 2],
+        [0.1, 2],
+        [0.1, 2],
+        [0.1, 2],
+        [3, 4.1],
+    ]
+    rips = RipsComplex(points=points, sparse=0.01)
+    simplex_tree = rips.create_simplex_tree(max_dimension=2)
+    assert simplex_tree.num_simplices() == 7
+    diag = simplex_tree.persistence()