diff options
Diffstat (limited to 'src/Subsampling')
8 files changed, 115 insertions, 125 deletions
diff --git a/src/Subsampling/example/CMakeLists.txt b/src/Subsampling/example/CMakeLists.txt index 28aab103..f4a23d22 100644 --- a/src/Subsampling/example/CMakeLists.txt +++ b/src/Subsampling/example/CMakeLists.txt @@ -3,7 +3,6 @@ project(Subsampling_examples) if(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) add_executable(Subsampling_example_pick_n_random_points example_pick_n_random_points.cpp) add_executable(Subsampling_example_choose_n_farthest_points example_choose_n_farthest_points.cpp) - add_executable(Subsampling_example_custom_kernel example_custom_kernel.cpp) add_executable(Subsampling_example_sparsify_point_set example_sparsify_point_set.cpp) target_link_libraries(Subsampling_example_sparsify_point_set ${CGAL_LIBRARY}) @@ -13,10 +12,6 @@ if(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) COMMAND $<TARGET_FILE:Subsampling_example_choose_n_farthest_points>) add_test(NAME Subsampling_example_sparsify_point_set COMMAND $<TARGET_FILE:Subsampling_example_sparsify_point_set>) - - install(TARGETS Subsampling_example_pick_n_random_points DESTINATION bin) - install(TARGETS Subsampling_example_choose_n_farthest_points DESTINATION bin) - install(TARGETS Subsampling_example_custom_kernel DESTINATION bin) - install(TARGETS Subsampling_example_sparsify_point_set DESTINATION bin) - endif(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) + +add_executable(Subsampling_example_custom_distance example_custom_distance.cpp) diff --git a/src/Subsampling/example/example_choose_n_farthest_points.cpp b/src/Subsampling/example/example_choose_n_farthest_points.cpp index 27cf5d4e..e8b3ce2d 100644 --- a/src/Subsampling/example/example_choose_n_farthest_points.cpp +++ b/src/Subsampling/example/example_choose_n_farthest_points.cpp @@ -20,7 +20,7 @@ int main(void) { K k; std::vector<Point_d> results; - Gudhi::subsampling::choose_n_farthest_points(k, points, 100, + Gudhi::subsampling::choose_n_farthest_points(k.squared_distance_d_object(), points, 100, Gudhi::subsampling::random_starting_point, std::back_inserter(results)); std::clog << "Before sparsification: " << points.size() << " points.\n"; diff --git a/src/Subsampling/example/example_custom_distance.cpp b/src/Subsampling/example/example_custom_distance.cpp new file mode 100644 index 00000000..3325b12d --- /dev/null +++ b/src/Subsampling/example/example_custom_distance.cpp @@ -0,0 +1,44 @@ +#include <gudhi/choose_n_farthest_points.h> + +#include <iostream> +#include <vector> +#include <iterator> + + +typedef unsigned Point; + +/* The class Distance contains a distance function defined on the set of points {0, 1, 2, 3} + * and computes a distance according to the matrix: + * 0 1 2 4 + * 1 0 4 2 + * 2 4 0 1 + * 4 2 1 0 + */ +class Distance { + private: + std::vector<std::vector<double>> matrix_; + + public: + Distance() { + matrix_.push_back({0, 1, 2, 4}); + matrix_.push_back({1, 0, 4, 2}); + matrix_.push_back({2, 4, 0, 1}); + matrix_.push_back({4, 2, 1, 0}); + } + + double operator()(Point p1, Point p2) const { + return matrix_[p1][p2]; + } +}; + +int main(void) { + std::vector<Point> points = {0, 1, 2, 3}; + std::vector<Point> results; + + Gudhi::subsampling::choose_n_farthest_points(Distance(), points, 2, + Gudhi::subsampling::random_starting_point, + std::back_inserter(results)); + std::clog << "Before sparsification: " << points.size() << " points.\n"; + std::clog << "After sparsification: " << results.size() << " points.\n"; + std::clog << "Result table: {" << results[0] << "," << results[1] << "}\n"; +} diff --git a/src/Subsampling/example/example_custom_kernel.cpp b/src/Subsampling/example/example_custom_kernel.cpp deleted file mode 100644 index 535bf42a..00000000 --- a/src/Subsampling/example/example_custom_kernel.cpp +++ /dev/null @@ -1,63 +0,0 @@ -#include <gudhi/choose_n_farthest_points.h> - -#include <iostream> -#include <vector> -#include <iterator> - - -/* The class Kernel contains a distance function defined on the set of points {0, 1, 2, 3} - * and computes a distance according to the matrix: - * 0 1 2 4 - * 1 0 4 2 - * 2 4 0 1 - * 4 2 1 0 - */ -class Kernel { - public: - typedef double FT; - typedef unsigned Point_d; - - // Class Squared_distance_d - class Squared_distance_d { - private: - std::vector<std::vector<FT>> matrix_; - - public: - Squared_distance_d() { - matrix_.push_back(std::vector<FT>({0, 1, 2, 4})); - matrix_.push_back(std::vector<FT>({1, 0, 4, 2})); - matrix_.push_back(std::vector<FT>({2, 4, 0, 1})); - matrix_.push_back(std::vector<FT>({4, 2, 1, 0})); - } - - FT operator()(Point_d p1, Point_d p2) { - return matrix_[p1][p2]; - } - }; - - // Constructor - Kernel() {} - - // Object of type Squared_distance_d - Squared_distance_d squared_distance_d_object() const { - return Squared_distance_d(); - } -}; - -int main(void) { - typedef Kernel K; - typedef typename K::Point_d Point_d; - - K k; - std::vector<Point_d> points = {0, 1, 2, 3}; - std::vector<Point_d> results; - - Gudhi::subsampling::choose_n_farthest_points(k, points, 2, - Gudhi::subsampling::random_starting_point, - std::back_inserter(results)); - std::clog << "Before sparsification: " << points.size() << " points.\n"; - std::clog << "After sparsification: " << results.size() << " points.\n"; - std::clog << "Result table: {" << results[0] << "," << results[1] << "}\n"; - - return 0; -} diff --git a/src/Subsampling/include/gudhi/choose_n_farthest_points.h b/src/Subsampling/include/gudhi/choose_n_farthest_points.h index 66421a69..e6347d96 100644 --- a/src/Subsampling/include/gudhi/choose_n_farthest_points.h +++ b/src/Subsampling/include/gudhi/choose_n_farthest_points.h @@ -38,32 +38,35 @@ enum : std::size_t { * \ingroup subsampling * \brief Subsample by a greedy strategy of iteratively adding the farthest point from the * current chosen point set to the subsampling. - * The iteration starts with the landmark `starting point` or, if `starting point==random_starting_point`, with a random landmark. - * \tparam Kernel must provide a type Kernel::Squared_distance_d which is a model of the - * concept <a target="_blank" - * href="http://doc.cgal.org/latest/Kernel_d/classKernel__d_1_1Squared__distance__d.html">Kernel_d::Squared_distance_d</a> (despite the name, taken from CGAL, this can be any kind of metric or proximity measure). - * It must also contain a public member `squared_distance_d_object()` that returns an object of this type. - * \tparam Point_range Range whose value type is Kernel::Point_d. It must provide random-access - * via `operator[]` and the points should be stored contiguously in memory. - * \tparam PointOutputIterator Output iterator whose value type is Kernel::Point_d. - * \tparam DistanceOutputIterator Output iterator for distances. - * \details It chooses `final_size` points from a random access range - * `input_pts` and outputs them in the output iterator `output_it`. It also + * \details + * The iteration starts with the landmark `starting point` or, if `starting point==random_starting_point`, + * with a random landmark. + * It chooses `final_size` points from a random access range + * `input_pts` (or the number of distinct points if `final_size` is larger) + * and outputs them in the output iterator `output_it`. It also * outputs the distance from each of those points to the set of previous * points in `dist_it`. - * @param[in] k A kernel object. - * @param[in] input_pts Const reference to the input points. + * \tparam Distance must provide an operator() that takes 2 points (value type of the range) + * and returns their distance (or some more general proximity measure) as a `double`. + * \tparam Point_range Random access range of points. + * \tparam PointOutputIterator Output iterator whose value type is the point type. + * \tparam DistanceOutputIterator Output iterator for distances. + * @param[in] dist A distance function. + * @param[in] input_pts The input points. * @param[in] final_size The size of the subsample to compute. * @param[in] starting_point The seed in the farthest point algorithm. * @param[out] output_it The output iterator for points. * @param[out] dist_it The optional output iterator for distances. + * + * \warning Older versions of this function took a CGAL kernel as argument. Users need to replace `k` with + * `k.squared_distance_d_object()` in the first argument of every call to `choose_n_farthest_points`. * */ -template < typename Kernel, +template < typename Distance, typename Point_range, typename PointOutputIterator, typename DistanceOutputIterator = Null_output_iterator> -void choose_n_farthest_points(Kernel const &k, +void choose_n_farthest_points(Distance dist, Point_range const &input_pts, std::size_t final_size, std::size_t starting_point, @@ -85,9 +88,9 @@ void choose_n_farthest_points(Kernel const &k, starting_point = dis(gen); } - typename Kernel::Squared_distance_d sqdist = k.squared_distance_d_object(); - std::size_t current_number_of_landmarks = 0; // counter for landmarks + static_assert(std::numeric_limits<double>::has_infinity, "the number type needs to support infinity()"); + // FIXME: don't hard-code the type as double. For Epeck_d, we also want to handle types that do not have an infinity. const double infty = std::numeric_limits<double>::infinity(); // infinity (see next entry) std::vector< double > dist_to_L(nb_points, infty); // vector of current distances to L from input_pts @@ -99,7 +102,7 @@ void choose_n_farthest_points(Kernel const &k, *dist_it++ = dist_to_L[curr_max_w]; std::size_t i = 0; for (auto&& p : input_pts) { - double curr_dist = sqdist(p, *(std::begin(input_pts) + curr_max_w)); + double curr_dist = dist(p, input_pts[curr_max_w]); if (curr_dist < dist_to_L[i]) dist_to_L[i] = curr_dist; ++i; @@ -111,6 +114,8 @@ void choose_n_farthest_points(Kernel const &k, curr_max_dist = dist_to_L[i]; curr_max_w = i; } + // If all that remains are duplicates of points already taken, stop. + if (curr_max_dist == 0) break; } } diff --git a/src/Subsampling/include/gudhi/pick_n_random_points.h b/src/Subsampling/include/gudhi/pick_n_random_points.h index a67b2b84..e4246c29 100644 --- a/src/Subsampling/include/gudhi/pick_n_random_points.h +++ b/src/Subsampling/include/gudhi/pick_n_random_points.h @@ -11,7 +11,9 @@ #ifndef PICK_N_RANDOM_POINTS_H_ #define PICK_N_RANDOM_POINTS_H_ -#include <gudhi/Clock.h> +#ifdef GUDHI_SUBSAMPLING_PROFILING +# include <gudhi/Clock.h> +#endif #include <boost/range/size.hpp> @@ -44,6 +46,12 @@ void pick_n_random_points(Point_container const &points, Gudhi::Clock t; #endif + std::random_device rd; + std::mt19937 g(rd()); + +#if __cplusplus >= 201703L + std::sample(std::begin(points), std::end(points), output_it, final_size, g); +#else std::size_t nbP = boost::size(points); if (final_size > nbP) final_size = nbP; @@ -51,14 +59,12 @@ void pick_n_random_points(Point_container const &points, std::vector<int> landmarks(nbP); std::iota(landmarks.begin(), landmarks.end(), 0); - std::random_device rd; - std::mt19937 g(rd()); - std::shuffle(landmarks.begin(), landmarks.end(), g); landmarks.resize(final_size); for (int l : landmarks) *output_it++ = points[l]; +#endif #ifdef GUDHI_SUBSAMPLING_PROFILING t.end(); diff --git a/src/Subsampling/include/gudhi/sparsify_point_set.h b/src/Subsampling/include/gudhi/sparsify_point_set.h index b30cec80..4571b8f3 100644 --- a/src/Subsampling/include/gudhi/sparsify_point_set.h +++ b/src/Subsampling/include/gudhi/sparsify_point_set.h @@ -11,6 +11,13 @@ #ifndef SPARSIFY_POINT_SET_H_ #define SPARSIFY_POINT_SET_H_ +#include <boost/version.hpp> +#if BOOST_VERSION < 106600 +# include <boost/function_output_iterator.hpp> +#else +# include <boost/iterator/function_output_iterator.hpp> +#endif + #include <gudhi/Kd_tree_search.h> #ifdef GUDHI_SUBSAMPLING_PROFILING #include <gudhi/Clock.h> @@ -27,7 +34,7 @@ namespace subsampling { * \ingroup subsampling * \brief Outputs a subset of the input points so that the * squared distance between any two points - * is greater than or equal to `min_squared_dist`. + * is greater than `min_squared_dist`. * * \tparam Kernel must be a model of the <a target="_blank" * href="http://doc.cgal.org/latest/Spatial_searching/classSearchTraits.html">SearchTraits</a> @@ -63,29 +70,15 @@ sparsify_point_set( // Parse the input points, and add them if they are not too close to // the other points std::size_t pt_idx = 0; - for (typename Point_range::const_iterator it_pt = input_pts.begin(); - it_pt != input_pts.end(); - ++it_pt, ++pt_idx) { - if (dropped_points[pt_idx]) + for (auto const& pt : input_pts) { + if (dropped_points[pt_idx++]) continue; - *output_it++ = *it_pt; - - auto ins_range = points_ds.incremental_nearest_neighbors(*it_pt); + *output_it++ = pt; // If another point Q is closer that min_squared_dist, mark Q to be dropped - for (auto const& neighbor : ins_range) { - std::size_t neighbor_point_idx = neighbor.first; - // If the neighbor is too close, we drop the neighbor - if (neighbor.second < min_squared_dist) { - // N.B.: If neighbor_point_idx < pt_idx, - // dropped_points[neighbor_point_idx] is already true but adding a - // test doesn't make things faster, so why bother? - dropped_points[neighbor_point_idx] = true; - } else { - break; - } - } + auto drop = [&dropped_points] (std::ptrdiff_t neighbor_point_idx) { dropped_points[neighbor_point_idx] = true; }; + points_ds.all_near_neighbors2(pt, min_squared_dist, min_squared_dist, boost::make_function_output_iterator(std::ref(drop))); } #ifdef GUDHI_SUBSAMPLING_PROFILING diff --git a/src/Subsampling/test/test_choose_n_farthest_points.cpp b/src/Subsampling/test/test_choose_n_farthest_points.cpp index 5c4bd4cb..94793295 100644 --- a/src/Subsampling/test/test_choose_n_farthest_points.cpp +++ b/src/Subsampling/test/test_choose_n_farthest_points.cpp @@ -39,12 +39,13 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(test_choose_farthest_point, Kernel, list_of_tested for (FT k = 0; k < 5; k += 1.0) for (FT l = 0; l < 5; l += 1.0) { std::vector<FT> point({i, j, k, l}); - points.push_back(Point_d(point.begin(), point.end())); + points.emplace_back(point.begin(), point.end()); } landmarks.clear(); Kernel k; - Gudhi::subsampling::choose_n_farthest_points(k, points, 100, Gudhi::subsampling::random_starting_point, std::back_inserter(landmarks)); + auto d = k.squared_distance_d_object(); + Gudhi::subsampling::choose_n_farthest_points(d, points, 100, Gudhi::subsampling::random_starting_point, std::back_inserter(landmarks)); BOOST_CHECK(landmarks.size() == 100); for (auto landmark : landmarks) @@ -61,40 +62,49 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(test_choose_farthest_point_limits, Kernel, list_of std::vector< FT > distances; landmarks.clear(); Kernel k; + auto d = k.squared_distance_d_object(); // Choose -1 farthest points in an empty point cloud - Gudhi::subsampling::choose_n_farthest_points(k, points, -1, -1, std::back_inserter(landmarks), std::back_inserter(distances)); + Gudhi::subsampling::choose_n_farthest_points(d, points, -1, -1, std::back_inserter(landmarks), std::back_inserter(distances)); BOOST_CHECK(landmarks.size() == 0); landmarks.clear(); distances.clear(); // Choose 0 farthest points in an empty point cloud - Gudhi::subsampling::choose_n_farthest_points(k, points, 0, -1, std::back_inserter(landmarks), std::back_inserter(distances)); + Gudhi::subsampling::choose_n_farthest_points(d, points, 0, -1, std::back_inserter(landmarks), std::back_inserter(distances)); BOOST_CHECK(landmarks.size() == 0); landmarks.clear(); distances.clear(); // Choose 1 farthest points in an empty point cloud - Gudhi::subsampling::choose_n_farthest_points(k, points, 1, -1, std::back_inserter(landmarks), std::back_inserter(distances)); + Gudhi::subsampling::choose_n_farthest_points(d, points, 1, -1, std::back_inserter(landmarks), std::back_inserter(distances)); BOOST_CHECK(landmarks.size() == 0); landmarks.clear(); distances.clear(); std::vector<FT> point({0.0, 0.0, 0.0, 0.0}); - points.push_back(Point_d(point.begin(), point.end())); + points.emplace_back(point.begin(), point.end()); // Choose -1 farthest points in a one point cloud - Gudhi::subsampling::choose_n_farthest_points(k, points, -1, -1, std::back_inserter(landmarks), std::back_inserter(distances)); + Gudhi::subsampling::choose_n_farthest_points(d, points, -1, -1, std::back_inserter(landmarks), std::back_inserter(distances)); BOOST_CHECK(landmarks.size() == 1 && distances.size() == 1); BOOST_CHECK(distances[0] == std::numeric_limits<FT>::infinity()); landmarks.clear(); distances.clear(); // Choose 0 farthest points in a one point cloud - Gudhi::subsampling::choose_n_farthest_points(k, points, 0, -1, std::back_inserter(landmarks), std::back_inserter(distances)); + Gudhi::subsampling::choose_n_farthest_points(d, points, 0, -1, std::back_inserter(landmarks), std::back_inserter(distances)); BOOST_CHECK(landmarks.size() == 0 && distances.size() == 0); landmarks.clear(); distances.clear(); // Choose 1 farthest points in a one point cloud - Gudhi::subsampling::choose_n_farthest_points(k, points, 1, -1, std::back_inserter(landmarks), std::back_inserter(distances)); + Gudhi::subsampling::choose_n_farthest_points(d, points, 1, -1, std::back_inserter(landmarks), std::back_inserter(distances)); BOOST_CHECK(landmarks.size() == 1 && distances.size() == 1); BOOST_CHECK(distances[0] == std::numeric_limits<FT>::infinity()); landmarks.clear(); distances.clear(); std::vector<FT> point2({1.0, 0.0, 0.0, 0.0}); - points.push_back(Point_d(point2.begin(), point2.end())); - // Choose all farthest points in a one point cloud - Gudhi::subsampling::choose_n_farthest_points(k, points, -1, -1, std::back_inserter(landmarks), std::back_inserter(distances)); + points.emplace_back(point2.begin(), point2.end()); + // Choose all farthest points among 2 points + Gudhi::subsampling::choose_n_farthest_points(d, points, -1, -1, std::back_inserter(landmarks), std::back_inserter(distances)); + BOOST_CHECK(landmarks.size() == 2 && distances.size() == 2); + BOOST_CHECK(distances[0] == std::numeric_limits<FT>::infinity()); + BOOST_CHECK(distances[1] == 1); + landmarks.clear(); distances.clear(); + + // Ignore duplicated points + points.emplace_back(point.begin(), point.end()); + Gudhi::subsampling::choose_n_farthest_points(d, points, -1, -1, std::back_inserter(landmarks), std::back_inserter(distances)); BOOST_CHECK(landmarks.size() == 2 && distances.size() == 2); BOOST_CHECK(distances[0] == std::numeric_limits<FT>::infinity()); BOOST_CHECK(distances[1] == 1); |