diff options
author | Marc Glisse <marc.glisse@inria.fr> | 2021-04-16 22:49:55 +0200 |
---|---|---|
committer | Marc Glisse <marc.glisse@inria.fr> | 2021-04-17 18:52:37 +0200 |
commit | 9616e2b8f616366393bf0b74a76029ae8a95d77a (patch) | |
tree | 1a257a9d4986c844a5f3a099fb7f602a048eac19 /src | |
parent | 7e05e915adc1be285e04eb00d3ab7ba1b797f38d (diff) |
Rewrite choose_n_farthest_points
Introduce an indirection to help with points with multiplicity.
Limit the use of magic values like 0 and infinity.
Diffstat (limited to 'src')
-rw-r--r-- | src/Subsampling/include/gudhi/choose_n_farthest_points.h | 55 | ||||
-rw-r--r-- | src/Subsampling/test/test_choose_n_farthest_points.cpp | 5 |
2 files changed, 42 insertions, 18 deletions
diff --git a/src/Subsampling/include/gudhi/choose_n_farthest_points.h b/src/Subsampling/include/gudhi/choose_n_farthest_points.h index e6347d96..44c02df1 100644 --- a/src/Subsampling/include/gudhi/choose_n_farthest_points.h +++ b/src/Subsampling/include/gudhi/choose_n_farthest_points.h @@ -42,7 +42,7 @@ enum : std::size_t { * The iteration starts with the landmark `starting point` or, if `starting point==random_starting_point`, * with a random landmark. * It chooses `final_size` points from a random access range - * `input_pts` (or the number of distinct points if `final_size` is larger) + * `input_pts` (or the number of input points if `final_size` is larger) * and outputs them in the output iterator `output_it`. It also * outputs the distance from each of those points to the set of previous * points in `dist_it`. @@ -88,34 +88,57 @@ void choose_n_farthest_points(Distance dist, starting_point = dis(gen); } - std::size_t current_number_of_landmarks = 0; // counter for landmarks - static_assert(std::numeric_limits<double>::has_infinity, "the number type needs to support infinity()"); // FIXME: don't hard-code the type as double. For Epeck_d, we also want to handle types that do not have an infinity. - const double infty = std::numeric_limits<double>::infinity(); // infinity (see next entry) - std::vector< double > dist_to_L(nb_points, infty); // vector of current distances to L from input_pts + static_assert(std::numeric_limits<double>::has_infinity, "the number type needs to support infinity()"); + + *output_it++ = input_pts[starting_point]; + *dist_it++ = std::numeric_limits<double>::infinity(); + if (final_size == 1) return; + + std::vector<std::size_t> points(nb_points); // map from remaining points to indexes in input_pts + std::vector< double > dist_to_L(nb_points); // vector of current distances to L from points + for(std::size_t i = 0; i < nb_points; ++i) { + points[i] = i; + dist_to_L[i] = dist(input_pts[i], input_pts[starting_point]); + } + // The indirection through points makes the program a bit slower. Some alternatives: + // - the original code never removed points and counted on them not + // reappearing because of a self-distance of 0. This causes unnecessary + // computations when final_size is large. It also causes trouble if there are + // input points at distance 0 from each other. + // - copy input_pts and update the local copy when removing points. std::size_t curr_max_w = starting_point; - for (current_number_of_landmarks = 0; current_number_of_landmarks != final_size; current_number_of_landmarks++) { - // curr_max_w at this point is the next landmark - *output_it++ = input_pts[curr_max_w]; - *dist_it++ = dist_to_L[curr_max_w]; + for (std::size_t current_number_of_landmarks = 1; current_number_of_landmarks != final_size; current_number_of_landmarks++) { + std::size_t latest_landmark = points[curr_max_w]; + // To remove the latest landmark at index curr_max_w, replace it + // with the last point and reduce the length of the vector. + std::size_t last = points.size() - 1; + if (curr_max_w != last) { + points[curr_max_w] = points[last]; + dist_to_L[curr_max_w] = dist_to_L[last]; + } + points.pop_back(); + + // Update distances to L. std::size_t i = 0; - for (auto&& p : input_pts) { - double curr_dist = dist(p, input_pts[curr_max_w]); + for (auto p : points) { + double curr_dist = dist(input_pts[p], input_pts[latest_landmark]); if (curr_dist < dist_to_L[i]) dist_to_L[i] = curr_dist; ++i; } - // choose the next curr_max_w - double curr_max_dist = 0; // used for defining the furhest point from L - for (i = 0; i < dist_to_L.size(); i++) + // choose the next landmark + curr_max_w = 0; + double curr_max_dist = dist_to_L[curr_max_w]; // used for defining the furthest point from L + for (i = 1; i < points.size(); i++) if (dist_to_L[i] > curr_max_dist) { curr_max_dist = dist_to_L[i]; curr_max_w = i; } - // If all that remains are duplicates of points already taken, stop. - if (curr_max_dist == 0) break; + *output_it++ = input_pts[points[curr_max_w]]; + *dist_it++ = dist_to_L[curr_max_w]; } } diff --git a/src/Subsampling/test/test_choose_n_farthest_points.cpp b/src/Subsampling/test/test_choose_n_farthest_points.cpp index 94793295..c384c61b 100644 --- a/src/Subsampling/test/test_choose_n_farthest_points.cpp +++ b/src/Subsampling/test/test_choose_n_farthest_points.cpp @@ -102,11 +102,12 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(test_choose_farthest_point_limits, Kernel, list_of BOOST_CHECK(distances[1] == 1); landmarks.clear(); distances.clear(); - // Ignore duplicated points + // Accept duplicated points points.emplace_back(point.begin(), point.end()); Gudhi::subsampling::choose_n_farthest_points(d, points, -1, -1, std::back_inserter(landmarks), std::back_inserter(distances)); - BOOST_CHECK(landmarks.size() == 2 && distances.size() == 2); + BOOST_CHECK(landmarks.size() == 3 && distances.size() == 3); BOOST_CHECK(distances[0] == std::numeric_limits<FT>::infinity()); BOOST_CHECK(distances[1] == 1); + BOOST_CHECK(distances[2] == 0); landmarks.clear(); distances.clear(); } |