From c66b9126429e1ff18f9ca69b27c5f357f071a697 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 19 Oct 2020 01:05:04 +0200 Subject: Handle duplicated points --- src/Subsampling/include/gudhi/choose_n_farthest_points.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/Subsampling/include/gudhi/choose_n_farthest_points.h') diff --git a/src/Subsampling/include/gudhi/choose_n_farthest_points.h b/src/Subsampling/include/gudhi/choose_n_farthest_points.h index 66421a69..38c3a76b 100644 --- a/src/Subsampling/include/gudhi/choose_n_farthest_points.h +++ b/src/Subsampling/include/gudhi/choose_n_farthest_points.h @@ -111,6 +111,8 @@ void choose_n_farthest_points(Kernel const &k, curr_max_dist = dist_to_L[i]; curr_max_w = i; } + // If all that remains are duplicates of points already taken, stop. + if (curr_max_dist == 0) break; } } -- cgit v1.2.3 From dda7885005c343601c6630796eb56bdcf91a559f Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Thu, 22 Oct 2020 22:23:28 +0200 Subject: Document the change It would be possible to emit the duplicate points instead of stopping, but the current implementation makes that inconvenient. --- .../include/gudhi/choose_n_farthest_points.h | 3 ++- src/python/gudhi/subsampling.pyx | 21 +++++++++++---------- 2 files changed, 13 insertions(+), 11 deletions(-) (limited to 'src/Subsampling/include/gudhi/choose_n_farthest_points.h') diff --git a/src/Subsampling/include/gudhi/choose_n_farthest_points.h b/src/Subsampling/include/gudhi/choose_n_farthest_points.h index 38c3a76b..0e13fc5a 100644 --- a/src/Subsampling/include/gudhi/choose_n_farthest_points.h +++ b/src/Subsampling/include/gudhi/choose_n_farthest_points.h @@ -48,7 +48,8 @@ enum : std::size_t { * \tparam PointOutputIterator Output iterator whose value type is Kernel::Point_d. * \tparam DistanceOutputIterator Output iterator for distances. * \details It chooses `final_size` points from a random access range - * `input_pts` and outputs them in the output iterator `output_it`. It also + * `input_pts` (or the number of distinct points if `final_size` is larger) + * and outputs them in the output iterator `output_it`. It also * outputs the distance from each of those points to the set of previous * points in `dist_it`. * @param[in] k A kernel object. diff --git a/src/python/gudhi/subsampling.pyx b/src/python/gudhi/subsampling.pyx index f77c6f75..b11d07e5 100644 --- a/src/python/gudhi/subsampling.pyx +++ b/src/python/gudhi/subsampling.pyx @@ -33,7 +33,7 @@ def choose_n_farthest_points(points=None, off_file='', nb_points=0, starting_poi The iteration starts with the landmark `starting point`. :param points: The input point set. - :type points: Iterable[Iterable[float]]. + :type points: Iterable[Iterable[float]] Or @@ -42,14 +42,15 @@ def choose_n_farthest_points(points=None, off_file='', nb_points=0, starting_poi And in both cases - :param nb_points: Number of points of the subsample. - :type nb_points: unsigned. + :param nb_points: Number of points of the subsample (the subsample may be \ + smaller if there are fewer than nb_points distinct input points) + :type nb_points: int :param starting_point: The iteration starts with the landmark `starting \ - point`,which is the index of the point to start with. If not set, this \ + point`, which is the index of the point to start with. If not set, this \ index is chosen randomly. - :type starting_point: unsigned. + :type starting_point: int :returns: The subsample point set. - :rtype: List[List[float]]. + :rtype: List[List[float]] """ if off_file: if os.path.isfile(off_file): @@ -76,7 +77,7 @@ def pick_n_random_points(points=None, off_file='', nb_points=0): """Subsample a point set by picking random vertices. :param points: The input point set. - :type points: Iterable[Iterable[float]]. + :type points: Iterable[Iterable[float]] Or @@ -86,7 +87,7 @@ def pick_n_random_points(points=None, off_file='', nb_points=0): And in both cases :param nb_points: Number of points of the subsample. - :type nb_points: unsigned. + :type nb_points: int :returns: The subsample point set. :rtype: List[List[float]] """ @@ -107,7 +108,7 @@ def sparsify_point_set(points=None, off_file='', min_squared_dist=0.0): between any two points is greater than or equal to min_squared_dist. :param points: The input point set. - :type points: Iterable[Iterable[float]]. + :type points: Iterable[Iterable[float]] Or @@ -118,7 +119,7 @@ def sparsify_point_set(points=None, off_file='', min_squared_dist=0.0): :param min_squared_dist: Minimum squared distance separating the output \ points. - :type min_squared_dist: float. + :type min_squared_dist: float :returns: The subsample point set. :rtype: List[List[float]] """ -- cgit v1.2.3 From 8aea376ed0b3c9066fb7e649f1cd66ffbed99a8d Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Thu, 22 Oct 2020 22:27:10 +0200 Subject: Simplify strange iterator use the syntax with [] is already used a few lines above --- src/Subsampling/include/gudhi/choose_n_farthest_points.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/Subsampling/include/gudhi/choose_n_farthest_points.h') diff --git a/src/Subsampling/include/gudhi/choose_n_farthest_points.h b/src/Subsampling/include/gudhi/choose_n_farthest_points.h index 0e13fc5a..b70af8a0 100644 --- a/src/Subsampling/include/gudhi/choose_n_farthest_points.h +++ b/src/Subsampling/include/gudhi/choose_n_farthest_points.h @@ -100,7 +100,7 @@ void choose_n_farthest_points(Kernel const &k, *dist_it++ = dist_to_L[curr_max_w]; std::size_t i = 0; for (auto&& p : input_pts) { - double curr_dist = sqdist(p, *(std::begin(input_pts) + curr_max_w)); + double curr_dist = sqdist(p, input_pts[curr_max_w]); if (curr_dist < dist_to_L[i]) dist_to_L[i] = curr_dist; ++i; -- cgit v1.2.3