From c66b9126429e1ff18f9ca69b27c5f357f071a697 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 19 Oct 2020 01:05:04 +0200 Subject: Handle duplicated points --- src/Subsampling/include/gudhi/choose_n_farthest_points.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Subsampling/include/gudhi/choose_n_farthest_points.h b/src/Subsampling/include/gudhi/choose_n_farthest_points.h index 66421a69..38c3a76b 100644 --- a/src/Subsampling/include/gudhi/choose_n_farthest_points.h +++ b/src/Subsampling/include/gudhi/choose_n_farthest_points.h @@ -111,6 +111,8 @@ void choose_n_farthest_points(Kernel const &k, curr_max_dist = dist_to_L[i]; curr_max_w = i; } + // If all that remains are duplicates of points already taken, stop. + if (curr_max_dist == 0) break; } } -- cgit v1.2.3 From dda7885005c343601c6630796eb56bdcf91a559f Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Thu, 22 Oct 2020 22:23:28 +0200 Subject: Document the change It would be possible to emit the duplicate points instead of stopping, but the current implementation makes that inconvenient. --- .../include/gudhi/choose_n_farthest_points.h | 3 ++- src/python/gudhi/subsampling.pyx | 21 +++++++++++---------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/src/Subsampling/include/gudhi/choose_n_farthest_points.h b/src/Subsampling/include/gudhi/choose_n_farthest_points.h index 38c3a76b..0e13fc5a 100644 --- a/src/Subsampling/include/gudhi/choose_n_farthest_points.h +++ b/src/Subsampling/include/gudhi/choose_n_farthest_points.h @@ -48,7 +48,8 @@ enum : std::size_t { * \tparam PointOutputIterator Output iterator whose value type is Kernel::Point_d. * \tparam DistanceOutputIterator Output iterator for distances. * \details It chooses `final_size` points from a random access range - * `input_pts` and outputs them in the output iterator `output_it`. It also + * `input_pts` (or the number of distinct points if `final_size` is larger) + * and outputs them in the output iterator `output_it`. It also * outputs the distance from each of those points to the set of previous * points in `dist_it`. * @param[in] k A kernel object. diff --git a/src/python/gudhi/subsampling.pyx b/src/python/gudhi/subsampling.pyx index f77c6f75..b11d07e5 100644 --- a/src/python/gudhi/subsampling.pyx +++ b/src/python/gudhi/subsampling.pyx @@ -33,7 +33,7 @@ def choose_n_farthest_points(points=None, off_file='', nb_points=0, starting_poi The iteration starts with the landmark `starting point`. :param points: The input point set. - :type points: Iterable[Iterable[float]]. + :type points: Iterable[Iterable[float]] Or @@ -42,14 +42,15 @@ def choose_n_farthest_points(points=None, off_file='', nb_points=0, starting_poi And in both cases - :param nb_points: Number of points of the subsample. - :type nb_points: unsigned. + :param nb_points: Number of points of the subsample (the subsample may be \ + smaller if there are fewer than nb_points distinct input points) + :type nb_points: int :param starting_point: The iteration starts with the landmark `starting \ - point`,which is the index of the point to start with. If not set, this \ + point`, which is the index of the point to start with. If not set, this \ index is chosen randomly. - :type starting_point: unsigned. + :type starting_point: int :returns: The subsample point set. - :rtype: List[List[float]]. + :rtype: List[List[float]] """ if off_file: if os.path.isfile(off_file): @@ -76,7 +77,7 @@ def pick_n_random_points(points=None, off_file='', nb_points=0): """Subsample a point set by picking random vertices. :param points: The input point set. - :type points: Iterable[Iterable[float]]. + :type points: Iterable[Iterable[float]] Or @@ -86,7 +87,7 @@ def pick_n_random_points(points=None, off_file='', nb_points=0): And in both cases :param nb_points: Number of points of the subsample. - :type nb_points: unsigned. + :type nb_points: int :returns: The subsample point set. :rtype: List[List[float]] """ @@ -107,7 +108,7 @@ def sparsify_point_set(points=None, off_file='', min_squared_dist=0.0): between any two points is greater than or equal to min_squared_dist. :param points: The input point set. - :type points: Iterable[Iterable[float]]. + :type points: Iterable[Iterable[float]] Or @@ -118,7 +119,7 @@ def sparsify_point_set(points=None, off_file='', min_squared_dist=0.0): :param min_squared_dist: Minimum squared distance separating the output \ points. - :type min_squared_dist: float. + :type min_squared_dist: float :returns: The subsample point set. :rtype: List[List[float]] """ -- cgit v1.2.3 From 8aea376ed0b3c9066fb7e649f1cd66ffbed99a8d Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Thu, 22 Oct 2020 22:27:10 +0200 Subject: Simplify strange iterator use the syntax with [] is already used a few lines above --- src/Subsampling/include/gudhi/choose_n_farthest_points.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Subsampling/include/gudhi/choose_n_farthest_points.h b/src/Subsampling/include/gudhi/choose_n_farthest_points.h index 0e13fc5a..b70af8a0 100644 --- a/src/Subsampling/include/gudhi/choose_n_farthest_points.h +++ b/src/Subsampling/include/gudhi/choose_n_farthest_points.h @@ -100,7 +100,7 @@ void choose_n_farthest_points(Kernel const &k, *dist_it++ = dist_to_L[curr_max_w]; std::size_t i = 0; for (auto&& p : input_pts) { - double curr_dist = sqdist(p, *(std::begin(input_pts) + curr_max_w)); + double curr_dist = sqdist(p, input_pts[curr_max_w]); if (curr_dist < dist_to_L[i]) dist_to_L[i] = curr_dist; ++i; -- cgit v1.2.3 From 3ae0bc89f6ef853c1c52fc609bfe08097d3594db Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Thu, 22 Oct 2020 22:49:23 +0200 Subject: test with duplicated point --- src/Subsampling/test/test_choose_n_farthest_points.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/Subsampling/test/test_choose_n_farthest_points.cpp b/src/Subsampling/test/test_choose_n_farthest_points.cpp index 5c4bd4cb..08b82d61 100644 --- a/src/Subsampling/test/test_choose_n_farthest_points.cpp +++ b/src/Subsampling/test/test_choose_n_farthest_points.cpp @@ -93,7 +93,15 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(test_choose_farthest_point_limits, Kernel, list_of std::vector point2({1.0, 0.0, 0.0, 0.0}); points.push_back(Point_d(point2.begin(), point2.end())); - // Choose all farthest points in a one point cloud + // Choose all farthest points among 2 points + Gudhi::subsampling::choose_n_farthest_points(k, points, -1, -1, std::back_inserter(landmarks), std::back_inserter(distances)); + BOOST_CHECK(landmarks.size() == 2 && distances.size() == 2); + BOOST_CHECK(distances[0] == std::numeric_limits::infinity()); + BOOST_CHECK(distances[1] == 1); + landmarks.clear(); distances.clear(); + + // Ignore duplicated points + points.push_back(Point_d(point.begin(), point.end())); Gudhi::subsampling::choose_n_farthest_points(k, points, -1, -1, std::back_inserter(landmarks), std::back_inserter(distances)); BOOST_CHECK(landmarks.size() == 2 && distances.size() == 2); BOOST_CHECK(distances[0] == std::numeric_limits::infinity()); -- cgit v1.2.3 From 705aa3b7ddc0a2bbbcc31c4b45e19792bd4ce9a5 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Wed, 28 Oct 2020 00:04:37 +0100 Subject: emplace_back --- src/Subsampling/test/test_choose_n_farthest_points.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Subsampling/test/test_choose_n_farthest_points.cpp b/src/Subsampling/test/test_choose_n_farthest_points.cpp index 08b82d61..b318d58e 100644 --- a/src/Subsampling/test/test_choose_n_farthest_points.cpp +++ b/src/Subsampling/test/test_choose_n_farthest_points.cpp @@ -39,7 +39,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(test_choose_farthest_point, Kernel, list_of_tested for (FT k = 0; k < 5; k += 1.0) for (FT l = 0; l < 5; l += 1.0) { std::vector point({i, j, k, l}); - points.push_back(Point_d(point.begin(), point.end())); + points.emplace_back(point.begin(), point.end()); } landmarks.clear(); @@ -75,7 +75,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(test_choose_farthest_point_limits, Kernel, list_of landmarks.clear(); distances.clear(); std::vector point({0.0, 0.0, 0.0, 0.0}); - points.push_back(Point_d(point.begin(), point.end())); + points.emplace_back(point.begin(), point.end()); // Choose -1 farthest points in a one point cloud Gudhi::subsampling::choose_n_farthest_points(k, points, -1, -1, std::back_inserter(landmarks), std::back_inserter(distances)); BOOST_CHECK(landmarks.size() == 1 && distances.size() == 1); @@ -92,7 +92,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(test_choose_farthest_point_limits, Kernel, list_of landmarks.clear(); distances.clear(); std::vector point2({1.0, 0.0, 0.0, 0.0}); - points.push_back(Point_d(point2.begin(), point2.end())); + points.emplace_back(point2.begin(), point2.end()); // Choose all farthest points among 2 points Gudhi::subsampling::choose_n_farthest_points(k, points, -1, -1, std::back_inserter(landmarks), std::back_inserter(distances)); BOOST_CHECK(landmarks.size() == 2 && distances.size() == 2); @@ -101,7 +101,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(test_choose_farthest_point_limits, Kernel, list_of landmarks.clear(); distances.clear(); // Ignore duplicated points - points.push_back(Point_d(point.begin(), point.end())); + points.emplace_back(point.begin(), point.end()); Gudhi::subsampling::choose_n_farthest_points(k, points, -1, -1, std::back_inserter(landmarks), std::back_inserter(distances)); BOOST_CHECK(landmarks.size() == 2 && distances.size() == 2); BOOST_CHECK(distances[0] == std::numeric_limits::infinity()); -- cgit v1.2.3