diff options
Diffstat (limited to 'src/Subsampling/include')
-rw-r--r-- | src/Subsampling/include/gudhi/choose_n_farthest_points.h | 121 | ||||
-rw-r--r-- | src/Subsampling/include/gudhi/pick_n_random_points.h | 74 | ||||
-rw-r--r-- | src/Subsampling/include/gudhi/sparsify_point_set.h | 101 |
3 files changed, 296 insertions, 0 deletions
diff --git a/src/Subsampling/include/gudhi/choose_n_farthest_points.h b/src/Subsampling/include/gudhi/choose_n_farthest_points.h new file mode 100644 index 00000000..66421a69 --- /dev/null +++ b/src/Subsampling/include/gudhi/choose_n_farthest_points.h @@ -0,0 +1,121 @@ +/* This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. + * See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. + * Author(s): Siargey Kachanovich + * + * Copyright (C) 2016 Inria + * + * Modification(s): + * - YYYY/MM Author: Description of the modification + */ + +#ifndef CHOOSE_N_FARTHEST_POINTS_H_ +#define CHOOSE_N_FARTHEST_POINTS_H_ + +#include <boost/range.hpp> + +#include <gudhi/Null_output_iterator.h> + +#include <iterator> +#include <vector> +#include <random> +#include <limits> // for numeric_limits<> + +namespace Gudhi { + +namespace subsampling { + +/** + * \ingroup subsampling + */ +enum : std::size_t { +/** + * Argument for `choose_n_farthest_points` to indicate that the starting point should be picked randomly. + */ + random_starting_point = std::size_t(-1) +}; + +/** + * \ingroup subsampling + * \brief Subsample by a greedy strategy of iteratively adding the farthest point from the + * current chosen point set to the subsampling. + * The iteration starts with the landmark `starting point` or, if `starting point==random_starting_point`, with a random landmark. + * \tparam Kernel must provide a type Kernel::Squared_distance_d which is a model of the + * concept <a target="_blank" + * href="http://doc.cgal.org/latest/Kernel_d/classKernel__d_1_1Squared__distance__d.html">Kernel_d::Squared_distance_d</a> (despite the name, taken from CGAL, this can be any kind of metric or proximity measure). + * It must also contain a public member `squared_distance_d_object()` that returns an object of this type. + * \tparam Point_range Range whose value type is Kernel::Point_d. It must provide random-access + * via `operator[]` and the points should be stored contiguously in memory. + * \tparam PointOutputIterator Output iterator whose value type is Kernel::Point_d. + * \tparam DistanceOutputIterator Output iterator for distances. + * \details It chooses `final_size` points from a random access range + * `input_pts` and outputs them in the output iterator `output_it`. It also + * outputs the distance from each of those points to the set of previous + * points in `dist_it`. + * @param[in] k A kernel object. + * @param[in] input_pts Const reference to the input points. + * @param[in] final_size The size of the subsample to compute. + * @param[in] starting_point The seed in the farthest point algorithm. + * @param[out] output_it The output iterator for points. + * @param[out] dist_it The optional output iterator for distances. + * + */ +template < typename Kernel, +typename Point_range, +typename PointOutputIterator, +typename DistanceOutputIterator = Null_output_iterator> +void choose_n_farthest_points(Kernel const &k, + Point_range const &input_pts, + std::size_t final_size, + std::size_t starting_point, + PointOutputIterator output_it, + DistanceOutputIterator dist_it = {}) { + std::size_t nb_points = boost::size(input_pts); + if (final_size > nb_points) + final_size = nb_points; + + // Tests to the limit + if (final_size < 1) + return; + + if (starting_point == random_starting_point) { + // Choose randomly the first landmark + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<std::size_t> dis(0, nb_points - 1); + starting_point = dis(gen); + } + + typename Kernel::Squared_distance_d sqdist = k.squared_distance_d_object(); + + std::size_t current_number_of_landmarks = 0; // counter for landmarks + const double infty = std::numeric_limits<double>::infinity(); // infinity (see next entry) + std::vector< double > dist_to_L(nb_points, infty); // vector of current distances to L from input_pts + + std::size_t curr_max_w = starting_point; + + for (current_number_of_landmarks = 0; current_number_of_landmarks != final_size; current_number_of_landmarks++) { + // curr_max_w at this point is the next landmark + *output_it++ = input_pts[curr_max_w]; + *dist_it++ = dist_to_L[curr_max_w]; + std::size_t i = 0; + for (auto&& p : input_pts) { + double curr_dist = sqdist(p, *(std::begin(input_pts) + curr_max_w)); + if (curr_dist < dist_to_L[i]) + dist_to_L[i] = curr_dist; + ++i; + } + // choose the next curr_max_w + double curr_max_dist = 0; // used for defining the furhest point from L + for (i = 0; i < dist_to_L.size(); i++) + if (dist_to_L[i] > curr_max_dist) { + curr_max_dist = dist_to_L[i]; + curr_max_w = i; + } + } +} + +} // namespace subsampling + +} // namespace Gudhi + +#endif // CHOOSE_N_FARTHEST_POINTS_H_ diff --git a/src/Subsampling/include/gudhi/pick_n_random_points.h b/src/Subsampling/include/gudhi/pick_n_random_points.h new file mode 100644 index 00000000..a67b2b84 --- /dev/null +++ b/src/Subsampling/include/gudhi/pick_n_random_points.h @@ -0,0 +1,74 @@ +/* This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. + * See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. + * Author(s): Siargey Kachanovich + * + * Copyright (C) 2016 Inria + * + * Modification(s): + * - YYYY/MM Author: Description of the modification + */ + +#ifndef PICK_N_RANDOM_POINTS_H_ +#define PICK_N_RANDOM_POINTS_H_ + +#include <gudhi/Clock.h> + +#include <boost/range/size.hpp> + +#include <cstddef> +#include <random> // random_device, mt19937 +#include <algorithm> // shuffle +#include <numeric> // iota +#include <iterator> +#include <vector> + + +namespace Gudhi { + +namespace subsampling { + +/** + * \ingroup subsampling + * \brief Subsample a point set by picking random vertices. + * + * \details It chooses `final_size` distinct points from a random access range `points` + * and outputs them to the output iterator `output_it`. + * Point_container::iterator should be ValueSwappable and RandomAccessIterator. + */ +template <typename Point_container, +typename OutputIterator> +void pick_n_random_points(Point_container const &points, + std::size_t final_size, + OutputIterator output_it) { +#ifdef GUDHI_SUBSAMPLING_PROFILING + Gudhi::Clock t; +#endif + + std::size_t nbP = boost::size(points); + if (final_size > nbP) + final_size = nbP; + + std::vector<int> landmarks(nbP); + std::iota(landmarks.begin(), landmarks.end(), 0); + + std::random_device rd; + std::mt19937 g(rd()); + + std::shuffle(landmarks.begin(), landmarks.end(), g); + landmarks.resize(final_size); + + for (int l : landmarks) + *output_it++ = points[l]; + +#ifdef GUDHI_SUBSAMPLING_PROFILING + t.end(); + std::cerr << "Random landmark choice took " << t.num_seconds() + << " seconds." << std::endl; +#endif +} + +} // namespace subsampling + +} // namespace Gudhi + +#endif // PICK_N_RANDOM_POINTS_H_ diff --git a/src/Subsampling/include/gudhi/sparsify_point_set.h b/src/Subsampling/include/gudhi/sparsify_point_set.h new file mode 100644 index 00000000..b30cec80 --- /dev/null +++ b/src/Subsampling/include/gudhi/sparsify_point_set.h @@ -0,0 +1,101 @@ +/* This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. + * See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. + * Author(s): Clement Jamin + * + * Copyright (C) 2016 Inria + * + * Modification(s): + * - YYYY/MM Author: Description of the modification + */ + +#ifndef SPARSIFY_POINT_SET_H_ +#define SPARSIFY_POINT_SET_H_ + +#include <gudhi/Kd_tree_search.h> +#ifdef GUDHI_SUBSAMPLING_PROFILING +#include <gudhi/Clock.h> +#endif + +#include <cstddef> +#include <vector> + +namespace Gudhi { + +namespace subsampling { + +/** + * \ingroup subsampling + * \brief Outputs a subset of the input points so that the + * squared distance between any two points + * is greater than or equal to `min_squared_dist`. + * + * \tparam Kernel must be a model of the <a target="_blank" + * href="http://doc.cgal.org/latest/Spatial_searching/classSearchTraits.html">SearchTraits</a> + * concept, such as the <a target="_blank" + * href="http://doc.cgal.org/latest/Kernel_d/classCGAL_1_1Epick__d.html">CGAL::Epick_d</a> class, which + * can be static if you know the ambiant dimension at compile-time, or dynamic if you don't. + * \tparam Point_range Range whose value type is Kernel::Point_d. It must provide random-access + * via `operator[]` and the points should be stored contiguously in memory. + * \tparam OutputIterator Output iterator whose value type is Kernel::Point_d. + * + * @param[in] k A kernel object. + * @param[in] input_pts Const reference to the input points. + * @param[in] min_squared_dist Minimum squared distance separating the output points. + * @param[out] output_it The output iterator. + */ +template <typename Kernel, typename Point_range, typename OutputIterator> +void +sparsify_point_set( + const Kernel &k, Point_range const& input_pts, + typename Kernel::FT min_squared_dist, + OutputIterator output_it) { + typedef typename Gudhi::spatial_searching::Kd_tree_search< + Kernel, Point_range> Points_ds; + +#ifdef GUDHI_SUBSAMPLING_PROFILING + Gudhi::Clock t; +#endif + + Points_ds points_ds(input_pts); + + std::vector<bool> dropped_points(input_pts.size(), false); + + // Parse the input points, and add them if they are not too close to + // the other points + std::size_t pt_idx = 0; + for (typename Point_range::const_iterator it_pt = input_pts.begin(); + it_pt != input_pts.end(); + ++it_pt, ++pt_idx) { + if (dropped_points[pt_idx]) + continue; + + *output_it++ = *it_pt; + + auto ins_range = points_ds.incremental_nearest_neighbors(*it_pt); + + // If another point Q is closer that min_squared_dist, mark Q to be dropped + for (auto const& neighbor : ins_range) { + std::size_t neighbor_point_idx = neighbor.first; + // If the neighbor is too close, we drop the neighbor + if (neighbor.second < min_squared_dist) { + // N.B.: If neighbor_point_idx < pt_idx, + // dropped_points[neighbor_point_idx] is already true but adding a + // test doesn't make things faster, so why bother? + dropped_points[neighbor_point_idx] = true; + } else { + break; + } + } + } + +#ifdef GUDHI_SUBSAMPLING_PROFILING + t.end(); + std::cerr << "Point set sparsified in " << t.num_seconds() + << " seconds." << std::endl; +#endif +} + +} // namespace subsampling +} // namespace Gudhi + +#endif // SPARSIFY_POINT_SET_H_ |