From c311cace889373c141dc9c9319501d7ff6888b14 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sat, 31 Oct 2020 22:28:36 +0100 Subject: Clean-ups in subsampling --- .../include/gudhi/pick_n_random_points.h | 10 ++++++--- src/Subsampling/include/gudhi/sparsify_point_set.h | 26 ++++++---------------- 2 files changed, 14 insertions(+), 22 deletions(-) diff --git a/src/Subsampling/include/gudhi/pick_n_random_points.h b/src/Subsampling/include/gudhi/pick_n_random_points.h index a67b2b84..c2c71f83 100644 --- a/src/Subsampling/include/gudhi/pick_n_random_points.h +++ b/src/Subsampling/include/gudhi/pick_n_random_points.h @@ -44,6 +44,12 @@ void pick_n_random_points(Point_container const &points, Gudhi::Clock t; #endif + std::random_device rd; + std::mt19937 g(rd()); + +#if __cplusplus >= 201703L + std::sample(std::begin(points), std::end(points), output_it, final_size, g); +#else std::size_t nbP = boost::size(points); if (final_size > nbP) final_size = nbP; @@ -51,14 +57,12 @@ void pick_n_random_points(Point_container const &points, std::vector landmarks(nbP); std::iota(landmarks.begin(), landmarks.end(), 0); - std::random_device rd; - std::mt19937 g(rd()); - std::shuffle(landmarks.begin(), landmarks.end(), g); landmarks.resize(final_size); for (int l : landmarks) *output_it++ = points[l]; +#endif #ifdef GUDHI_SUBSAMPLING_PROFILING t.end(); diff --git a/src/Subsampling/include/gudhi/sparsify_point_set.h b/src/Subsampling/include/gudhi/sparsify_point_set.h index b30cec80..78e0da4a 100644 --- a/src/Subsampling/include/gudhi/sparsify_point_set.h +++ b/src/Subsampling/include/gudhi/sparsify_point_set.h @@ -11,6 +11,8 @@ #ifndef SPARSIFY_POINT_SET_H_ #define SPARSIFY_POINT_SET_H_ +#include + #include #ifdef GUDHI_SUBSAMPLING_PROFILING #include @@ -63,29 +65,15 @@ sparsify_point_set( // Parse the input points, and add them if they are not too close to // the other points std::size_t pt_idx = 0; - for (typename Point_range::const_iterator it_pt = input_pts.begin(); - it_pt != input_pts.end(); - ++it_pt, ++pt_idx) { - if (dropped_points[pt_idx]) + for (auto const& pt : input_pts) { + if (dropped_points[pt_idx++]) continue; - *output_it++ = *it_pt; - - auto ins_range = points_ds.incremental_nearest_neighbors(*it_pt); + *output_it++ = pt; // If another point Q is closer that min_squared_dist, mark Q to be dropped - for (auto const& neighbor : ins_range) { - std::size_t neighbor_point_idx = neighbor.first; - // If the neighbor is too close, we drop the neighbor - if (neighbor.second < min_squared_dist) { - // N.B.: If neighbor_point_idx < pt_idx, - // dropped_points[neighbor_point_idx] is already true but adding a - // test doesn't make things faster, so why bother? - dropped_points[neighbor_point_idx] = true; - } else { - break; - } - } + auto drop = [&dropped_points] (std::ptrdiff_t neighbor_point_idx) { dropped_points[neighbor_point_idx] = true; }; + points_ds.all_near_neighbors(pt, min_squared_dist, boost::make_function_output_iterator(std::ref(drop))); } #ifdef GUDHI_SUBSAMPLING_PROFILING -- cgit v1.2.3 From 0a071114ad08d2ce149f8b484dd8ff1b96b61fb1 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 9 Nov 2020 22:55:00 +0100 Subject: Don't test the equality case in sparsify_point_set. sqrt. --- src/Subsampling/include/gudhi/sparsify_point_set.h | 6 ++++-- src/python/gudhi/subsampling.pyx | 2 +- src/python/test/test_subsampling.py | 16 ++++++++++------ 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/Subsampling/include/gudhi/sparsify_point_set.h b/src/Subsampling/include/gudhi/sparsify_point_set.h index 78e0da4a..afa6d45a 100644 --- a/src/Subsampling/include/gudhi/sparsify_point_set.h +++ b/src/Subsampling/include/gudhi/sparsify_point_set.h @@ -29,7 +29,7 @@ namespace subsampling { * \ingroup subsampling * \brief Outputs a subset of the input points so that the * squared distance between any two points - * is greater than or equal to `min_squared_dist`. + * is greater than `min_squared_dist`. * * \tparam Kernel must be a model of the SearchTraits @@ -53,6 +53,7 @@ sparsify_point_set( OutputIterator output_it) { typedef typename Gudhi::spatial_searching::Kd_tree_search< Kernel, Point_range> Points_ds; + using std::sqrt; #ifdef GUDHI_SUBSAMPLING_PROFILING Gudhi::Clock t; @@ -73,7 +74,8 @@ sparsify_point_set( // If another point Q is closer that min_squared_dist, mark Q to be dropped auto drop = [&dropped_points] (std::ptrdiff_t neighbor_point_idx) { dropped_points[neighbor_point_idx] = true; }; - points_ds.all_near_neighbors(pt, min_squared_dist, boost::make_function_output_iterator(std::ref(drop))); + // FIXME: what if FT does not support sqrt? + points_ds.all_near_neighbors(pt, sqrt(min_squared_dist), boost::make_function_output_iterator(std::ref(drop))); } #ifdef GUDHI_SUBSAMPLING_PROFILING diff --git a/src/python/gudhi/subsampling.pyx b/src/python/gudhi/subsampling.pyx index b11d07e5..46f32335 100644 --- a/src/python/gudhi/subsampling.pyx +++ b/src/python/gudhi/subsampling.pyx @@ -105,7 +105,7 @@ def pick_n_random_points(points=None, off_file='', nb_points=0): def sparsify_point_set(points=None, off_file='', min_squared_dist=0.0): """Outputs a subset of the input points so that the squared distance - between any two points is greater than or equal to min_squared_dist. + between any two points is greater than min_squared_dist. :param points: The input point set. :type points: Iterable[Iterable[float]] diff --git a/src/python/test/test_subsampling.py b/src/python/test/test_subsampling.py index 31f64e32..4019852e 100755 --- a/src/python/test/test_subsampling.py +++ b/src/python/test/test_subsampling.py @@ -141,12 +141,16 @@ def test_simple_sparsify_points(): # assert gudhi.sparsify_point_set(points = [], min_squared_dist = 0.0) == [] # assert gudhi.sparsify_point_set(points = [], min_squared_dist = 10.0) == [] assert gudhi.sparsify_point_set(points=point_set, min_squared_dist=0.0) == point_set - assert gudhi.sparsify_point_set(points=point_set, min_squared_dist=1.0) == point_set - assert gudhi.sparsify_point_set(points=point_set, min_squared_dist=2.0) == [ + assert gudhi.sparsify_point_set(points=point_set, min_squared_dist=0.999) == point_set + assert gudhi.sparsify_point_set(points=point_set, min_squared_dist=1.001) == [ [0, 1], [1, 0], ] - assert gudhi.sparsify_point_set(points=point_set, min_squared_dist=2.01) == [[0, 1]] + assert gudhi.sparsify_point_set(points=point_set, min_squared_dist=1.999) == [ + [0, 1], + [1, 0], + ] + assert gudhi.sparsify_point_set(points=point_set, min_squared_dist=2.001) == [[0, 1]] assert ( len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=0.0)) @@ -157,11 +161,11 @@ def test_simple_sparsify_points(): == 5 ) assert ( - len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=40.0)) + len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=40.1)) == 4 ) assert ( - len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=90.0)) + len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=89.9)) == 3 ) assert ( @@ -169,7 +173,7 @@ def test_simple_sparsify_points(): == 2 ) assert ( - len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=325.0)) + len(gudhi.sparsify_point_set(off_file="subsample.off", min_squared_dist=324.9)) == 2 ) assert ( -- cgit v1.2.3 From 4a34c0b7b8be8b8e275b13823da31127bbd5f3b2 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sun, 22 Nov 2020 23:37:18 +0100 Subject: Handle squared radius Make it work without a breaking change, we can always make a change later in a separate PR. --- .../include/gudhi/Kd_tree_search.h | 98 ++++++++++++++++++++-- .../include/gudhi/pick_n_random_points.h | 4 +- src/Subsampling/include/gudhi/sparsify_point_set.h | 3 +- 3 files changed, 96 insertions(+), 9 deletions(-) diff --git a/src/Spatial_searching/include/gudhi/Kd_tree_search.h b/src/Spatial_searching/include/gudhi/Kd_tree_search.h index 87969dd9..a50a8537 100644 --- a/src/Spatial_searching/include/gudhi/Kd_tree_search.h +++ b/src/Spatial_searching/include/gudhi/Kd_tree_search.h @@ -12,11 +12,12 @@ #ifndef KD_TREE_SEARCH_H_ #define KD_TREE_SEARCH_H_ +#include + #include #include #include #include -#include #include #include // for CGAL_VERSION_NR @@ -40,7 +41,6 @@ namespace Gudhi { namespace spatial_searching { - /** * \class Kd_tree_search Kd_tree_search.h gudhi/Kd_tree_search.h * \brief Spatial tree data structure to perform (approximate) nearest and furthest neighbor search. @@ -83,7 +83,8 @@ class Kd_tree_search { typedef CGAL::Search_traits< FT, Point, typename Traits::Cartesian_const_iterator_d, - typename Traits::Construct_cartesian_const_iterator_d> Traits_base; + typename Traits::Construct_cartesian_const_iterator_d, + typename Traits::Dimension> Traits_base; typedef CGAL::Search_traits_adapter< std::ptrdiff_t, @@ -110,7 +111,76 @@ class Kd_tree_search { /// of a point P and `second` is the squared distance between P and the query point. typedef Incremental_neighbor_search INS_range; - typedef CGAL::Fuzzy_sphere Fuzzy_sphere; + // Because CGAL::Fuzzy_sphere takes the radius and not its square + struct Sphere_for_kdtree_search + { + typedef typename Traits::Point_d Point_d; + typedef typename Traits::FT FT; + typedef typename Traits::Dimension D; + typedef D Dimension; + + private: + STraits traits; + Point_d c; + FT sqradmin, sqradmax; + bool use_max; + + public: + // `prefer_max` means that we prefer outputting more points at squared distance between r2min and r2max, + // while `!prefer_max` means we prefer fewer. + Sphere_for_kdtree_search(Point_d const& c_, FT const& r2min, FT const& r2max, bool prefer_max=true, STraits const& traits_ = {}) + : traits(traits_), c(c_), sqradmin(r2min), sqradmax(r2max), use_max(prefer_max) + { GUDHI_CHECK(r2min >= 0 && r2max >= r2min, "0 <= r2min <= r2max"); } + + bool contains(std::ptrdiff_t i) const { + const Point_d& p = get(traits.point_property_map(), i); + auto ccci = traits.construct_cartesian_const_iterator_d_object(); + return contains_point_given_as_coordinates(ccci(p), ccci(p, 0)); + } + + template + bool contains_point_given_as_coordinates(Coord_iterator pi, Coord_iterator CGAL_UNUSED) const { + FT distance = 0; + auto ccci = traits.construct_cartesian_const_iterator_d_object(); + auto ci = ccci(c); + auto ce = ccci(c, 0); + FT const& limit = use_max ? sqradmax : sqradmin; + while (ci != ce) { + distance += CGAL::square(*pi++ - *ci++); + // I think Clément advised to check the distance at every step instead of + // just at the end, especially when the dimension becomes large. Distance + // isn't part of the concept anyway. + if (distance > limit) return false; + } + return true; + } + + bool inner_range_intersects(CGAL::Kd_tree_rectangle const& rect) const { + auto ccci = traits.construct_cartesian_const_iterator_d_object(); + FT distance = 0; + auto ci = ccci(c); + auto ce = ccci(c, 0); + for (int i = 0; ci != ce; ++i, ++ci) { + distance += CGAL::square(CGAL::max(CGAL::max(*ci - rect.max_coord(i), rect.min_coord(i) - *ci), 0 )); + if (distance > sqradmin) return false; + } + return true; + } + + + bool outer_range_contains(CGAL::Kd_tree_rectangle const& rect) const { + auto ccci = traits.construct_cartesian_const_iterator_d_object(); + FT distance = 0; + auto ci = ccci(c); + auto ce = ccci(c, 0); + for (int i = 0; ci != ce; ++i, ++ci) { + distance += CGAL::square(CGAL::max(*ci - rect.min_coord(i), rect.max_coord(i) - *ci)); + if (distance > sqradmax) return false; + } + return true; + } + }; + /// \brief Constructor /// @param[in] points Const reference to the point range. This range /// is not copied, so it should not be destroyed or modified afterwards. @@ -266,10 +336,26 @@ class Kd_tree_search { /// @param[in] eps Approximation factor. template void all_near_neighbors(Point const& p, - FT radius, + FT const& radius, OutputIterator it, FT eps = FT(0)) const { - m_tree.search(it, Fuzzy_sphere(p, radius, eps, m_tree.traits())); + all_near_neighbors2(p, CGAL::square(radius - eps), CGAL::square(radius + eps), it); + } + + /// \brief Search for all the neighbors in a ball. This is similar to `all_near_neighbors` but takes directly + /// the square of the minimum distance below which points must be considered neighbors and square of the + /// maximum distance above which they cannot be. + /// @param[in] p The query point. + /// @param[in] sq_radius_min The square of the minimum search radius + /// @param[in] sq_radius_max The square of the maximum search radius + /// @param[out] it The points that lie inside the sphere of center `p` and squared radius `sq_radius`. + /// Note: `it` is used this way: `*it++ = each_point`. + template + void all_near_neighbors2(Point const& p, + FT const& sq_radius_min, + FT const& sq_radius_max, + OutputIterator it) const { + m_tree.search(it, Sphere_for_kdtree_search(p, sq_radius_min, sq_radius_max, true, m_tree.traits())); } int tree_depth() const { diff --git a/src/Subsampling/include/gudhi/pick_n_random_points.h b/src/Subsampling/include/gudhi/pick_n_random_points.h index c2c71f83..e4246c29 100644 --- a/src/Subsampling/include/gudhi/pick_n_random_points.h +++ b/src/Subsampling/include/gudhi/pick_n_random_points.h @@ -11,7 +11,9 @@ #ifndef PICK_N_RANDOM_POINTS_H_ #define PICK_N_RANDOM_POINTS_H_ -#include +#ifdef GUDHI_SUBSAMPLING_PROFILING +# include +#endif #include diff --git a/src/Subsampling/include/gudhi/sparsify_point_set.h b/src/Subsampling/include/gudhi/sparsify_point_set.h index afa6d45a..71e8917b 100644 --- a/src/Subsampling/include/gudhi/sparsify_point_set.h +++ b/src/Subsampling/include/gudhi/sparsify_point_set.h @@ -74,8 +74,7 @@ sparsify_point_set( // If another point Q is closer that min_squared_dist, mark Q to be dropped auto drop = [&dropped_points] (std::ptrdiff_t neighbor_point_idx) { dropped_points[neighbor_point_idx] = true; }; - // FIXME: what if FT does not support sqrt? - points_ds.all_near_neighbors(pt, sqrt(min_squared_dist), boost::make_function_output_iterator(std::ref(drop))); + points_ds.all_near_neighbors2(pt, min_squared_dist, min_squared_dist, boost::make_function_output_iterator(std::ref(drop))); } #ifdef GUDHI_SUBSAMPLING_PROFILING -- cgit v1.2.3 From 23cd685d7a46cc5d601c9b937f6a849c8753fa32 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Thu, 26 Nov 2020 22:20:53 +0100 Subject: handle old boost --- src/Subsampling/include/gudhi/sparsify_point_set.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/Subsampling/include/gudhi/sparsify_point_set.h b/src/Subsampling/include/gudhi/sparsify_point_set.h index 71e8917b..4571b8f3 100644 --- a/src/Subsampling/include/gudhi/sparsify_point_set.h +++ b/src/Subsampling/include/gudhi/sparsify_point_set.h @@ -11,7 +11,12 @@ #ifndef SPARSIFY_POINT_SET_H_ #define SPARSIFY_POINT_SET_H_ -#include +#include +#if BOOST_VERSION < 106600 +# include +#else +# include +#endif #include #ifdef GUDHI_SUBSAMPLING_PROFILING @@ -53,7 +58,6 @@ sparsify_point_set( OutputIterator output_it) { typedef typename Gudhi::spatial_searching::Kd_tree_search< Kernel, Point_range> Points_ds; - using std::sqrt; #ifdef GUDHI_SUBSAMPLING_PROFILING Gudhi::Clock t; -- cgit v1.2.3