summaryrefslogtreecommitdiff
path: root/include/gudhi/choose_n_farthest_points.h
blob: 8390b4c9ea17e58bc203f51097b0a6c8b9b1f839 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
/*    This file is part of the Gudhi Library. The Gudhi library
 *    (Geometric Understanding in Higher Dimensions) is a generic C++
 *    library for computational topology.
 *
 *    Author(s):       Siargey Kachanovich
 *
 *    Copyright (C) 2016 INRIA
 *
 *    This program is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#ifndef CHOOSE_N_FARTHEST_POINTS_H_
#define CHOOSE_N_FARTHEST_POINTS_H_

#include <boost/range.hpp>

#include <gudhi/Null_output_iterator.h>

#include <iterator>
#include <vector>
#include <random>
#include <limits>  // for numeric_limits<>

namespace Gudhi {

namespace subsampling {

/**
 *  \ingroup subsampling
 */
enum : std::size_t {
/**
 *  Argument for `choose_n_farthest_points` to indicate that the starting point should be picked randomly.
 */
  random_starting_point = std::size_t(-1)
};

/** 
 *  \ingroup subsampling
 *  \brief Subsample by a greedy strategy of iteratively adding the farthest point from the
 *  current chosen point set to the subsampling. 
 *  The iteration starts with the landmark `starting point` or, if `starting point==random_starting_point`, with a random landmark.
 *  \tparam Kernel must provide a type Kernel::Squared_distance_d which is a model of the 
 *          concept <a target="_blank"
 *   href="http://doc.cgal.org/latest/Kernel_d/classKernel__d_1_1Squared__distance__d.html">Kernel_d::Squared_distance_d</a> (despite the name, taken from CGAL, this can be any kind of metric or proximity measure).
 *  It must also contain a public member `squared_distance_d_object()` that returns an object of this type.
 *  \tparam Point_range Range whose value type is Kernel::Point_d.  It must provide random-access 
 *         via `operator[]` and the points should be stored contiguously in memory.
 *  \tparam PointOutputIterator Output iterator whose value type is Kernel::Point_d.
 *  \tparam DistanceOutputIterator Output iterator for distances.
 *  \details It chooses `final_size` points from a random access range
 *  `input_pts` and outputs them in the output iterator `output_it`. It also
 *  outputs the distance from each of those points to the set of previous
 *  points in `dist_it`.
 * @param[in] k A kernel object.
 * @param[in] input_pts Const reference to the input points.
 * @param[in] final_size The size of the subsample to compute.
 * @param[in] starting_point The seed in the farthest point algorithm.
 * @param[out] output_it The output iterator for points.
 * @param[out] dist_it The optional output iterator for distances.
 *  
 */
template < typename Kernel,
typename Point_range,
typename PointOutputIterator,
typename DistanceOutputIterator = Null_output_iterator>
void choose_n_farthest_points(Kernel const &k,
                              Point_range const &input_pts,
                              std::size_t final_size,
                              std::size_t starting_point,
                              PointOutputIterator output_it,
                              DistanceOutputIterator dist_it = {}) {
  std::size_t nb_points = boost::size(input_pts);
  if (final_size > nb_points)
    final_size = nb_points;

  // Tests to the limit
  if (final_size < 1)
    return;

  if (starting_point == random_starting_point) {
    // Choose randomly the first landmark
    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_int_distribution<std::size_t> dis(0, nb_points - 1);
    starting_point = dis(gen);
  }

  typename Kernel::Squared_distance_d sqdist = k.squared_distance_d_object();

  std::size_t current_number_of_landmarks = 0;  // counter for landmarks
  const double infty = std::numeric_limits<double>::infinity();  // infinity (see next entry)
  std::vector< double > dist_to_L(nb_points, infty);  // vector of current distances to L from input_pts

  std::size_t curr_max_w = starting_point;

  for (current_number_of_landmarks = 0; current_number_of_landmarks != final_size; current_number_of_landmarks++) {
    // curr_max_w at this point is the next landmark
    *output_it++ = input_pts[curr_max_w];
    *dist_it++ = dist_to_L[curr_max_w];
    std::size_t i = 0;
    for (auto&& p : input_pts) {
      double curr_dist = sqdist(p, *(std::begin(input_pts) + curr_max_w));
      if (curr_dist < dist_to_L[i])
        dist_to_L[i] = curr_dist;
      ++i;
    }
    // choose the next curr_max_w
    double curr_max_dist = 0;  // used for defining the furhest point from L
    for (i = 0; i < dist_to_L.size(); i++)
      if (dist_to_L[i] > curr_max_dist) {
        curr_max_dist = dist_to_L[i];
        curr_max_w = i;
      }
  }
}

}  // namespace subsampling

}  // namespace Gudhi

#endif  // CHOOSE_N_FARTHEST_POINTS_H_