3 files changed, 296 insertions, 0 deletions
diff --git a/src/Subsampling/include/gudhi/choose_n_farthest_points.h b/src/Subsampling/include/gudhi/choose_n_farthest_points.h
new file mode 100644
index 00000000..66421a69
--- /dev/null
+++ b/src/Subsampling/include/gudhi/choose_n_farthest_points.h
@@ -0,0 +1,121 @@
+/*    This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+ *    See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+ *    Author(s):       Siargey Kachanovich
+ *
+ *    Copyright (C) 2016 Inria
+ *
+ *    Modification(s):
+ *      - YYYY/MM Author: Description of the modification
+ */
+
+#ifndef CHOOSE_N_FARTHEST_POINTS_H_
+#define CHOOSE_N_FARTHEST_POINTS_H_
+
+#include <boost/range.hpp>
+
+#include <gudhi/Null_output_iterator.h>
+
+#include <iterator>
+#include <vector>
+#include <random>
+#include <limits>  // for numeric_limits<>
+
+namespace Gudhi {
+
+namespace subsampling {
+
+/**
+ *  \ingroup subsampling
+ */
+enum : std::size_t {
+/**
+ *  Argument for `choose_n_farthest_points` to indicate that the starting point should be picked randomly.
+ */
+  random_starting_point = std::size_t(-1)
+};
+
+/** 
+ *  \ingroup subsampling
+ *  \brief Subsample by a greedy strategy of iteratively adding the farthest point from the
+ *  current chosen point set to the subsampling. 
+ *  The iteration starts with the landmark `starting point` or, if `starting point==random_starting_point`, with a random landmark.
+ *  \tparam Kernel must provide a type Kernel::Squared_distance_d which is a model of the 
+ *          concept <a target="_blank"
+ *   href="http://doc.cgal.org/latest/Kernel_d/classKernel__d_1_1Squared__distance__d.html">Kernel_d::Squared_distance_d</a> (despite the name, taken from CGAL, this can be any kind of metric or proximity measure).
+ *  It must also contain a public member `squared_distance_d_object()` that returns an object of this type.
+ *  \tparam Point_range Range whose value type is Kernel::Point_d.  It must provide random-access 
+ *         via `operator[]` and the points should be stored contiguously in memory.
+ *  \tparam PointOutputIterator Output iterator whose value type is Kernel::Point_d.
+ *  \tparam DistanceOutputIterator Output iterator for distances.
+ *  \details It chooses `final_size` points from a random access range
+ *  `input_pts` and outputs them in the output iterator `output_it`. It also
+ *  outputs the distance from each of those points to the set of previous
+ *  points in `dist_it`.
+ * @param[in] k A kernel object.
+ * @param[in] input_pts Const reference to the input points.
+ * @param[in] final_size The size of the subsample to compute.
+ * @param[in] starting_point The seed in the farthest point algorithm.
+ * @param[out] output_it The output iterator for points.
+ * @param[out] dist_it The optional output iterator for distances.
+ *  
+ */
+template < typename Kernel,
+typename Point_range,
+typename PointOutputIterator,
+typename DistanceOutputIterator = Null_output_iterator>
+void choose_n_farthest_points(Kernel const &k,
+                              Point_range const &input_pts,
+                              std::size_t final_size,
+                              std::size_t starting_point,
+                              PointOutputIterator output_it,
+                              DistanceOutputIterator dist_it = {}) {
+  std::size_t nb_points = boost::size(input_pts);
+  if (final_size > nb_points)
+    final_size = nb_points;
+
+  // Tests to the limit
+  if (final_size < 1)
+    return;
+
+  if (starting_point == random_starting_point) {
+    // Choose randomly the first landmark
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_int_distribution<std::size_t> dis(0, nb_points - 1);
+    starting_point = dis(gen);
+  }
+
+  typename Kernel::Squared_distance_d sqdist = k.squared_distance_d_object();
+
+  std::size_t current_number_of_landmarks = 0;  // counter for landmarks
+  const double infty = std::numeric_limits<double>::infinity();  // infinity (see next entry)
+  std::vector< double > dist_to_L(nb_points, infty);  // vector of current distances to L from input_pts
+
+  std::size_t curr_max_w = starting_point;
+
+  for (current_number_of_landmarks = 0; current_number_of_landmarks != final_size; current_number_of_landmarks++) {
+    // curr_max_w at this point is the next landmark
+    *output_it++ = input_pts[curr_max_w];
+    *dist_it++ = dist_to_L[curr_max_w];
+    std::size_t i = 0;
+    for (auto&& p : input_pts) {
+      double curr_dist = sqdist(p, *(std::begin(input_pts) + curr_max_w));
+      if (curr_dist < dist_to_L[i])
+        dist_to_L[i] = curr_dist;
+      ++i;
+    }
+    // choose the next curr_max_w
+    double curr_max_dist = 0;  // used for defining the furhest point from L
+    for (i = 0; i < dist_to_L.size(); i++)
+      if (dist_to_L[i] > curr_max_dist) {
+        curr_max_dist = dist_to_L[i];
+        curr_max_w = i;
+      }
+  }
+}
+
+}  // namespace subsampling
+
+}  // namespace Gudhi
+
+#endif  // CHOOSE_N_FARTHEST_POINTS_H_
diff --git a/src/Subsampling/include/gudhi/pick_n_random_points.h b/src/Subsampling/include/gudhi/pick_n_random_points.h
new file mode 100644
index 00000000..a67b2b84
--- /dev/null
+++ b/src/Subsampling/include/gudhi/pick_n_random_points.h
@@ -0,0 +1,74 @@
+/*    This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+ *    See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+ *    Author(s):       Siargey Kachanovich
+ *
+ *    Copyright (C) 2016 Inria
+ *
+ *    Modification(s):
+ *      - YYYY/MM Author: Description of the modification
+ */
+
+#ifndef PICK_N_RANDOM_POINTS_H_
+#define PICK_N_RANDOM_POINTS_H_
+
+#include <gudhi/Clock.h>
+
+#include <boost/range/size.hpp>
+
+#include <cstddef>
+#include <random>     // random_device, mt19937
+#include <algorithm>  // shuffle
+#include <numeric>    // iota
+#include <iterator>
+#include <vector>
+
+
+namespace Gudhi {
+
+namespace subsampling {
+
+/**
+ *  \ingroup subsampling
+ * \brief Subsample a point set by picking random vertices.
+ *
+ *  \details It chooses `final_size` distinct points from a random access range `points`
+ *  and outputs them to the output iterator `output_it`.
+ *  Point_container::iterator should be ValueSwappable and RandomAccessIterator.
+ */
+template <typename Point_container,
+typename OutputIterator>
+void pick_n_random_points(Point_container const &points,
+                          std::size_t final_size,
+                          OutputIterator output_it) {
+#ifdef GUDHI_SUBSAMPLING_PROFILING
+  Gudhi::Clock t;
+#endif
+
+  std::size_t nbP = boost::size(points);
+  if (final_size > nbP)
+      final_size = nbP;
+
+  std::vector<int> landmarks(nbP);
+  std::iota(landmarks.begin(), landmarks.end(), 0);
+
+  std::random_device rd;
+  std::mt19937 g(rd());
+
+  std::shuffle(landmarks.begin(), landmarks.end(), g);
+  landmarks.resize(final_size);
+
+  for (int l : landmarks)
+    *output_it++ = points[l];
+
+#ifdef GUDHI_SUBSAMPLING_PROFILING
+  t.end();
+  std::cerr << "Random landmark choice took " << t.num_seconds()
+      << " seconds." << std::endl;
+#endif
+}
+
+}  // namespace subsampling
+
+}  // namespace Gudhi
+
+#endif  // PICK_N_RANDOM_POINTS_H_
diff --git a/src/Subsampling/include/gudhi/sparsify_point_set.h b/src/Subsampling/include/gudhi/sparsify_point_set.h
new file mode 100644
index 00000000..b30cec80
--- /dev/null
+++ b/src/Subsampling/include/gudhi/sparsify_point_set.h
@@ -0,0 +1,101 @@
+/*    This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+ *    See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+ *    Author(s):       Clement Jamin
+ *
+ *    Copyright (C) 2016 Inria
+ *
+ *    Modification(s):
+ *      - YYYY/MM Author: Description of the modification
+ */
+
+#ifndef SPARSIFY_POINT_SET_H_
+#define SPARSIFY_POINT_SET_H_
+
+#include <gudhi/Kd_tree_search.h>
+#ifdef GUDHI_SUBSAMPLING_PROFILING
+#include <gudhi/Clock.h>
+#endif
+
+#include <cstddef>
+#include <vector>
+
+namespace Gudhi {
+
+namespace subsampling {
+
+/**
+ *  \ingroup subsampling
+ *  \brief Outputs a subset of the input points so that the 
+ *         squared distance between any two points
+ *         is greater than or equal to `min_squared_dist`.
+ *
+ * \tparam Kernel must be a model of the <a target="_blank"
+ *   href="http://doc.cgal.org/latest/Spatial_searching/classSearchTraits.html">SearchTraits</a>
+ *   concept, such as the <a target="_blank"
+ *   href="http://doc.cgal.org/latest/Kernel_d/classCGAL_1_1Epick__d.html">CGAL::Epick_d</a> class, which
+ *   can be static if you know the ambiant dimension at compile-time, or dynamic if you don't.
+ * \tparam Point_range Range whose value type is Kernel::Point_d.  It must provide random-access 
+ *         via `operator[]` and the points should be stored contiguously in memory.
+ * \tparam OutputIterator Output iterator whose value type is Kernel::Point_d.
+ *
+ * @param[in] k A kernel object.
+ * @param[in] input_pts Const reference to the input points.
+ * @param[in] min_squared_dist Minimum squared distance separating the output points.
+ * @param[out] output_it The output iterator.
+ */
+template <typename Kernel, typename Point_range, typename OutputIterator>
+void
+sparsify_point_set(
+                   const Kernel &k, Point_range const& input_pts,
+                   typename Kernel::FT min_squared_dist,
+                   OutputIterator output_it) {
+  typedef typename Gudhi::spatial_searching::Kd_tree_search<
+      Kernel, Point_range> Points_ds;
+
+#ifdef GUDHI_SUBSAMPLING_PROFILING
+  Gudhi::Clock t;
+#endif
+
+  Points_ds points_ds(input_pts);
+
+  std::vector<bool> dropped_points(input_pts.size(), false);
+
+  // Parse the input points, and add them if they are not too close to
+  // the other points
+  std::size_t pt_idx = 0;
+  for (typename Point_range::const_iterator it_pt = input_pts.begin();
+       it_pt != input_pts.end();
+       ++it_pt, ++pt_idx) {
+    if (dropped_points[pt_idx])
+      continue;
+
+    *output_it++ = *it_pt;
+
+    auto ins_range = points_ds.incremental_nearest_neighbors(*it_pt);
+
+    // If another point Q is closer that min_squared_dist, mark Q to be dropped
+    for (auto const& neighbor : ins_range) {
+      std::size_t neighbor_point_idx = neighbor.first;
+      // If the neighbor is too close, we drop the neighbor
+      if (neighbor.second < min_squared_dist) {
+        // N.B.: If neighbor_point_idx < pt_idx,
+        // dropped_points[neighbor_point_idx] is already true but adding a
+        // test doesn't make things faster, so why bother?
+        dropped_points[neighbor_point_idx] = true;
+      } else {
+        break;
+      }
+    }
+  }
+
+#ifdef GUDHI_SUBSAMPLING_PROFILING
+  t.end();
+  std::cerr << "Point set sparsified in " << t.num_seconds()
+      << " seconds." << std::endl;
+#endif
+}
+
+}  // namespace subsampling
+}  // namespace Gudhi
+
+#endif  // SPARSIFY_POINT_SET_H_