summaryrefslogtreecommitdiff
path: root/src/Subsampling/include/gudhi/choose_n_farthest_points.h
blob: ea387bf9a19e89f2e9e00600237c0523552a2233 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
/*    This file is part of the Gudhi Library. The Gudhi library
 *    (Geometric Understanding in Higher Dimensions) is a generic C++
 *    library for computational topology.
 *
 *    Author(s):       Siargey Kachanovich
 *
 *    Copyright (C) 2016 INRIA
 *
 *    This program is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#ifndef CHOOSE_N_FARTHEST_POINTS_H_
#define CHOOSE_N_FARTHEST_POINTS_H_

#include <boost/range.hpp>

#include <gudhi/Kd_tree_search.h>

#include <gudhi/Clock.h>

#include <CGAL/Search_traits.h>
#include <CGAL/Search_traits_adapter.h>
#include <CGAL/Fuzzy_sphere.h>

#include <iterator>
#include <algorithm>  // for sort
#include <vector>
#include <random>
#include <limits>  // for numeric_limits<>

namespace Gudhi {

namespace subsampling {

/** 
 *  \ingroup subsampling
 *  \brief Subsample by a greedy strategy of iteratively adding the farthest point from the
 *  current chosen point set to the subsampling. 
 *  The iteration starts with the landmark `starting point`.
 *  \tparam Kernel must provide a type Kernel::Squared_distance_d which is a model of the 
 *          concept <a target="_blank"
 *   href="http://doc.cgal.org/latest/Kernel_d/classKernel__d_1_1Squared__distance__d.html">Kernel_d::Squared_distance_d</a>
 *   concept.
 *  It must also contain a public member 'squared_distance_d_object' of this type.
 *  \tparam Point_range Range whose value type is Kernel::Point_d.  It must provide random-access 
 *         via `operator[]` and the points should be stored contiguously in memory.
 *  \tparam OutputIterator Output iterator whose value type is Kernel::Point_d.
 *  \details It chooses `final_size` points from a random access range `input_pts` and
 *  outputs it in the output iterator `output_it`.
 * @param[in] k A kernel object.
 * @param[in] input_pts Const reference to the input points.
 * @param[in] final_size The size of the subsample to compute.
 * @param[in] starting_point The seed in the farthest point algorithm.
 * @param[out] output_it The output iterator.
 *  
 */
template < typename Kernel,
typename Point_range,
typename OutputIterator>
void choose_n_farthest_points(Kernel const &k,
                              Point_range const &input_pts,
                              std::size_t final_size,
                              std::size_t starting_point,
                              OutputIterator output_it) {
  std::size_t nb_points = boost::size(input_pts);
  if (final_size > nb_points)
    final_size = nb_points;

  // Tests to the limit
  if (final_size < 1)
    return;

  typename Kernel::Squared_distance_d sqdist = k.squared_distance_d_object();

  std::size_t current_number_of_landmarks = 0;  // counter for landmarks
  const double infty = std::numeric_limits<double>::infinity();  // infinity (see next entry)
  std::vector< double > dist_to_L(nb_points, infty);  // vector of current distances to L from input_pts

  std::size_t curr_max_w = starting_point;

  for (current_number_of_landmarks = 0; current_number_of_landmarks != final_size; current_number_of_landmarks++) {
    // curr_max_w at this point is the next landmark
    *output_it++ = input_pts[curr_max_w];
    std::size_t i = 0;
    for (auto& p : input_pts) {
      double curr_dist = sqdist(p, *(std::begin(input_pts) + curr_max_w));
      if (curr_dist < dist_to_L[i])
        dist_to_L[i] = curr_dist;
      ++i;
    }
    // choose the next curr_max_w
    double curr_max_dist = 0;  // used for defining the furhest point from L
    for (i = 0; i < dist_to_L.size(); i++)
      if (dist_to_L[i] > curr_max_dist) {
        curr_max_dist = dist_to_L[i];
        curr_max_w = i;
      }
  }
}

/** 
 *  \ingroup subsampling
 *  \brief Subsample by a greedy strategy of iteratively adding the farthest point from the
 *  current chosen point set to the subsampling. 
 *  The iteration starts with a random landmark.
 *  \tparam Kernel must provide a type Kernel::Squared_distance_d which is a model of the 
 *          concept <a target="_blank"
 *   href="http://doc.cgal.org/latest/Kernel_d/classKernel__d_1_1Squared__distance__d.html">Kernel_d::Squared_distance_d</a>
 *   concept.
 *  It must also contain a public member 'squared_distance_d_object' of this type.
 *  \tparam Point_range Range whose value type is Kernel::Point_d.  It must provide random-access 
 *         via `operator[]` and the points should be stored contiguously in memory.
 *  \tparam OutputIterator Output iterator whose value type is Kernel::Point_d.
 *  \details It chooses `final_size` points from a random access range `input_pts` and
 *  outputs it in the output iterator `output_it`.
 * @param[in] k A kernel object.
 * @param[in] input_pts Const reference to the input points.
 * @param[in] final_size The size of the subsample to compute.
 * @param[out] output_it The output iterator.
 *  
 */
template < typename Kernel,
typename Point_range,
typename OutputIterator>
void choose_n_farthest_points(Kernel const& k,
                              Point_range const &input_pts,
                              unsigned final_size,
                              OutputIterator output_it) {
  // Tests to the limit
  if ((final_size < 1) || (input_pts.size() == 0))
    return;

  // Choose randomly the first landmark
  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_int_distribution<> dis(0, (input_pts.size() - 1));
  int starting_point = dis(gen);
  choose_n_farthest_points(k, input_pts, final_size, starting_point, output_it);
}

}  // namespace subsampling

}  // namespace Gudhi

#endif  // CHOOSE_N_FARTHEST_POINTS_H_