diff options
Diffstat (limited to 'src/Subsampling/include/gudhi')
-rw-r--r-- | src/Subsampling/include/gudhi/choose_n_farthest_points.h | 41 | ||||
-rw-r--r-- | src/Subsampling/include/gudhi/pick_n_random_points.h | 14 | ||||
-rw-r--r-- | src/Subsampling/include/gudhi/sparsify_point_set.h | 33 |
3 files changed, 46 insertions, 42 deletions
diff --git a/src/Subsampling/include/gudhi/choose_n_farthest_points.h b/src/Subsampling/include/gudhi/choose_n_farthest_points.h index 66421a69..e6347d96 100644 --- a/src/Subsampling/include/gudhi/choose_n_farthest_points.h +++ b/src/Subsampling/include/gudhi/choose_n_farthest_points.h @@ -38,32 +38,35 @@ enum : std::size_t { * \ingroup subsampling * \brief Subsample by a greedy strategy of iteratively adding the farthest point from the * current chosen point set to the subsampling. - * The iteration starts with the landmark `starting point` or, if `starting point==random_starting_point`, with a random landmark. - * \tparam Kernel must provide a type Kernel::Squared_distance_d which is a model of the - * concept <a target="_blank" - * href="http://doc.cgal.org/latest/Kernel_d/classKernel__d_1_1Squared__distance__d.html">Kernel_d::Squared_distance_d</a> (despite the name, taken from CGAL, this can be any kind of metric or proximity measure). - * It must also contain a public member `squared_distance_d_object()` that returns an object of this type. - * \tparam Point_range Range whose value type is Kernel::Point_d. It must provide random-access - * via `operator[]` and the points should be stored contiguously in memory. - * \tparam PointOutputIterator Output iterator whose value type is Kernel::Point_d. - * \tparam DistanceOutputIterator Output iterator for distances. - * \details It chooses `final_size` points from a random access range - * `input_pts` and outputs them in the output iterator `output_it`. It also + * \details + * The iteration starts with the landmark `starting point` or, if `starting point==random_starting_point`, + * with a random landmark. + * It chooses `final_size` points from a random access range + * `input_pts` (or the number of distinct points if `final_size` is larger) + * and outputs them in the output iterator `output_it`. It also * outputs the distance from each of those points to the set of previous * points in `dist_it`. - * @param[in] k A kernel object. - * @param[in] input_pts Const reference to the input points. + * \tparam Distance must provide an operator() that takes 2 points (value type of the range) + * and returns their distance (or some more general proximity measure) as a `double`. + * \tparam Point_range Random access range of points. + * \tparam PointOutputIterator Output iterator whose value type is the point type. + * \tparam DistanceOutputIterator Output iterator for distances. + * @param[in] dist A distance function. + * @param[in] input_pts The input points. * @param[in] final_size The size of the subsample to compute. * @param[in] starting_point The seed in the farthest point algorithm. * @param[out] output_it The output iterator for points. * @param[out] dist_it The optional output iterator for distances. + * + * \warning Older versions of this function took a CGAL kernel as argument. Users need to replace `k` with + * `k.squared_distance_d_object()` in the first argument of every call to `choose_n_farthest_points`. * */ -template < typename Kernel, +template < typename Distance, typename Point_range, typename PointOutputIterator, typename DistanceOutputIterator = Null_output_iterator> -void choose_n_farthest_points(Kernel const &k, +void choose_n_farthest_points(Distance dist, Point_range const &input_pts, std::size_t final_size, std::size_t starting_point, @@ -85,9 +88,9 @@ void choose_n_farthest_points(Kernel const &k, starting_point = dis(gen); } - typename Kernel::Squared_distance_d sqdist = k.squared_distance_d_object(); - std::size_t current_number_of_landmarks = 0; // counter for landmarks + static_assert(std::numeric_limits<double>::has_infinity, "the number type needs to support infinity()"); + // FIXME: don't hard-code the type as double. For Epeck_d, we also want to handle types that do not have an infinity. const double infty = std::numeric_limits<double>::infinity(); // infinity (see next entry) std::vector< double > dist_to_L(nb_points, infty); // vector of current distances to L from input_pts @@ -99,7 +102,7 @@ void choose_n_farthest_points(Kernel const &k, *dist_it++ = dist_to_L[curr_max_w]; std::size_t i = 0; for (auto&& p : input_pts) { - double curr_dist = sqdist(p, *(std::begin(input_pts) + curr_max_w)); + double curr_dist = dist(p, input_pts[curr_max_w]); if (curr_dist < dist_to_L[i]) dist_to_L[i] = curr_dist; ++i; @@ -111,6 +114,8 @@ void choose_n_farthest_points(Kernel const &k, curr_max_dist = dist_to_L[i]; curr_max_w = i; } + // If all that remains are duplicates of points already taken, stop. + if (curr_max_dist == 0) break; } } diff --git a/src/Subsampling/include/gudhi/pick_n_random_points.h b/src/Subsampling/include/gudhi/pick_n_random_points.h index a67b2b84..e4246c29 100644 --- a/src/Subsampling/include/gudhi/pick_n_random_points.h +++ b/src/Subsampling/include/gudhi/pick_n_random_points.h @@ -11,7 +11,9 @@ #ifndef PICK_N_RANDOM_POINTS_H_ #define PICK_N_RANDOM_POINTS_H_ -#include <gudhi/Clock.h> +#ifdef GUDHI_SUBSAMPLING_PROFILING +# include <gudhi/Clock.h> +#endif #include <boost/range/size.hpp> @@ -44,6 +46,12 @@ void pick_n_random_points(Point_container const &points, Gudhi::Clock t; #endif + std::random_device rd; + std::mt19937 g(rd()); + +#if __cplusplus >= 201703L + std::sample(std::begin(points), std::end(points), output_it, final_size, g); +#else std::size_t nbP = boost::size(points); if (final_size > nbP) final_size = nbP; @@ -51,14 +59,12 @@ void pick_n_random_points(Point_container const &points, std::vector<int> landmarks(nbP); std::iota(landmarks.begin(), landmarks.end(), 0); - std::random_device rd; - std::mt19937 g(rd()); - std::shuffle(landmarks.begin(), landmarks.end(), g); landmarks.resize(final_size); for (int l : landmarks) *output_it++ = points[l]; +#endif #ifdef GUDHI_SUBSAMPLING_PROFILING t.end(); diff --git a/src/Subsampling/include/gudhi/sparsify_point_set.h b/src/Subsampling/include/gudhi/sparsify_point_set.h index b30cec80..4571b8f3 100644 --- a/src/Subsampling/include/gudhi/sparsify_point_set.h +++ b/src/Subsampling/include/gudhi/sparsify_point_set.h @@ -11,6 +11,13 @@ #ifndef SPARSIFY_POINT_SET_H_ #define SPARSIFY_POINT_SET_H_ +#include <boost/version.hpp> +#if BOOST_VERSION < 106600 +# include <boost/function_output_iterator.hpp> +#else +# include <boost/iterator/function_output_iterator.hpp> +#endif + #include <gudhi/Kd_tree_search.h> #ifdef GUDHI_SUBSAMPLING_PROFILING #include <gudhi/Clock.h> @@ -27,7 +34,7 @@ namespace subsampling { * \ingroup subsampling * \brief Outputs a subset of the input points so that the * squared distance between any two points - * is greater than or equal to `min_squared_dist`. + * is greater than `min_squared_dist`. * * \tparam Kernel must be a model of the <a target="_blank" * href="http://doc.cgal.org/latest/Spatial_searching/classSearchTraits.html">SearchTraits</a> @@ -63,29 +70,15 @@ sparsify_point_set( // Parse the input points, and add them if they are not too close to // the other points std::size_t pt_idx = 0; - for (typename Point_range::const_iterator it_pt = input_pts.begin(); - it_pt != input_pts.end(); - ++it_pt, ++pt_idx) { - if (dropped_points[pt_idx]) + for (auto const& pt : input_pts) { + if (dropped_points[pt_idx++]) continue; - *output_it++ = *it_pt; - - auto ins_range = points_ds.incremental_nearest_neighbors(*it_pt); + *output_it++ = pt; // If another point Q is closer that min_squared_dist, mark Q to be dropped - for (auto const& neighbor : ins_range) { - std::size_t neighbor_point_idx = neighbor.first; - // If the neighbor is too close, we drop the neighbor - if (neighbor.second < min_squared_dist) { - // N.B.: If neighbor_point_idx < pt_idx, - // dropped_points[neighbor_point_idx] is already true but adding a - // test doesn't make things faster, so why bother? - dropped_points[neighbor_point_idx] = true; - } else { - break; - } - } + auto drop = [&dropped_points] (std::ptrdiff_t neighbor_point_idx) { dropped_points[neighbor_point_idx] = true; }; + points_ds.all_near_neighbors2(pt, min_squared_dist, min_squared_dist, boost::make_function_output_iterator(std::ref(drop))); } #ifdef GUDHI_SUBSAMPLING_PROFILING |