1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
|
//----------------------------------------------------------------------
// File: bd_tree.cpp
// Programmer: David Mount
// Description: Basic methods for bd-trees.
// Last modified: 01/04/05 (Version 1.0)
//----------------------------------------------------------------------
// Copyright (c) 1997-2005 University of Maryland and Sunil Arya and
// David Mount. All Rights Reserved.
//
// This software and related documentation is part of the Approximate
// Nearest Neighbor Library (ANN). This software is provided under
// the provisions of the Lesser GNU Public License (LGPL). See the
// file ../ReadMe.txt for further information.
//
// The University of Maryland (U.M.) and the authors make no
// representations about the suitability or fitness of this software for
// any purpose. It is provided "as is" without express or implied
// warranty.
//----------------------------------------------------------------------
// History:
// Revision 0.1 03/04/98
// Initial release
// Revision l.0 04/01/05
// Fixed centroid shrink threshold condition to depend on the
// dimension.
// Moved dump routine to kd_dump.cpp.
//----------------------------------------------------------------------
#include "bd_tree.h" // bd-tree declarations
#include "kd_util.h" // kd-tree utilities
#include "kd_split.h" // kd-tree splitting rules
#include <ANN/ANNperf.h> // performance evaluation
#include "def_debug_bt.h"
namespace geom_bt {
//----------------------------------------------------------------------
// Printing a bd-tree
// These routines print a bd-tree. See the analogous procedure
// in kd_tree.cpp for more information.
//----------------------------------------------------------------------
void ANNbd_shrink::print( // print shrinking node
int level, // depth of node in tree
ostream &out) // output stream
{
#ifndef FOR_R_TDA
child[ANN_OUT]->print(level+1, out); // print out-child
out << " ";
for (int i = 0; i < level; i++) // print indentation
out << "..";
out << "Shrink";
for (int j = 0; j < n_bnds; j++) { // print sides, 2 per line
if (j % 2 == 0) {
out << "\n"; // newline and indentation
for (int i = 0; i < level+2; i++) out << " ";
}
out << " ([" << bnds[j].cd << "]"
<< (bnds[j].sd > 0 ? ">=" : "< ")
<< bnds[j].cv << ")";
}
out << "\n";
child[ANN_IN]->print(level+1, out); // print in-child
#endif
}
//----------------------------------------------------------------------
// kd_tree statistics utility (for performance evaluation)
// This routine computes various statistics information for
// shrinking nodes. See file kd_tree.cpp for more information.
//----------------------------------------------------------------------
void ANNbd_shrink::getStats( // get subtree statistics
int dim, // dimension of space
ANNkdStats &st, // stats (modified)
ANNorthRect &bnd_box) // bounding box
{
ANNkdStats ch_stats; // stats for children
ANNorthRect inner_box(dim); // inner box of shrink
annBnds2Box(bnd_box, // enclosing box
dim, // dimension
n_bnds, // number of bounds
bnds, // bounds array
inner_box); // inner box (modified)
// get stats for inner child
ch_stats.reset(); // reset
child[ANN_IN]->getStats(dim, ch_stats, inner_box);
st.merge(ch_stats); // merge them
// get stats for outer child
ch_stats.reset(); // reset
child[ANN_OUT]->getStats(dim, ch_stats, bnd_box);
st.merge(ch_stats); // merge them
st.depth++; // increment depth
st.n_shr++; // increment number of shrinks
}
//----------------------------------------------------------------------
// bd-tree constructor
// This is the main constructor for bd-trees given a set of points.
// It first builds a skeleton kd-tree as a basis, then computes the
// bounding box of the data points, and then invokes rbd_tree() to
// actually build the tree, passing it the appropriate splitting
// and shrinking information.
//----------------------------------------------------------------------
ANNkd_ptr rbd_tree( // recursive construction of bd-tree
ANNpointArray pa, // point array
ANNidxArray pidx, // point indices to store in subtree
int n, // number of points
int dim, // dimension of space
int bsp, // bucket space
ANNorthRect &bnd_box, // bounding box for current node
ANNkd_splitter splitter, // splitting routine
ANNshrinkRule shrink); // shrinking rule
ANNbd_tree::ANNbd_tree( // construct from point array
ANNpointArray pa, // point array (with at least n pts)
int n, // number of points
int dd, // dimension
int bs, // bucket size
ANNsplitRule split, // splitting rule
ANNshrinkRule shrink) // shrinking rule
: ANNkd_tree(n, dd, bs) // build skeleton base tree
{
pts = pa; // where the points are
if (n == 0) return; // no points--no sweat
ANNorthRect bnd_box(dd); // bounding box for points
// construct bounding rectangle
annEnclRect(pa, pidx, n, dd, bnd_box);
// copy to tree structure
bnd_box_lo = annCopyPt(dd, bnd_box.lo);
bnd_box_hi = annCopyPt(dd, bnd_box.hi);
switch (split) { // build by rule
case ANN_KD_STD: // standard kd-splitting rule
root = rbd_tree(pa, pidx, n, dd, bs, bnd_box, kd_split, shrink);
break;
case ANN_KD_MIDPT: // midpoint split
root = rbd_tree(pa, pidx, n, dd, bs, bnd_box, midpt_split, shrink);
break;
case ANN_KD_SUGGEST: // best (in our opinion)
case ANN_KD_SL_MIDPT: // sliding midpoint split
root = rbd_tree(pa, pidx, n, dd, bs, bnd_box, sl_midpt_split, shrink);
break;
case ANN_KD_FAIR: // fair split
root = rbd_tree(pa, pidx, n, dd, bs, bnd_box, fair_split, shrink);
break;
case ANN_KD_SL_FAIR: // sliding fair split
root = rbd_tree(pa, pidx, n, dd, bs,
bnd_box, sl_fair_split, shrink);
break;
default:
annError("Illegal splitting method", ANNabort);
}
}
//----------------------------------------------------------------------
// Shrinking rules
//----------------------------------------------------------------------
enum ANNdecomp {SPLIT, SHRINK}; // decomposition methods
//----------------------------------------------------------------------
// trySimpleShrink - Attempt a simple shrink
//
// We compute the tight bounding box of the points, and compute
// the 2*dim ``gaps'' between the sides of the tight box and the
// bounding box. If any of the gaps is large enough relative to
// the longest side of the tight bounding box, then we shrink
// all sides whose gaps are large enough. (The reason for
// comparing against the tight bounding box, is that after
// shrinking the longest box size will decrease, and if we use
// the standard bounding box, we may decide to shrink twice in
// a row. Since the tight box is fixed, we cannot shrink twice
// consecutively.)
//----------------------------------------------------------------------
const float BD_GAP_THRESH = 0.5; // gap threshold (must be < 1)
const int BD_CT_THRESH = 2; // min number of shrink sides
ANNdecomp trySimpleShrink( // try a simple shrink
ANNpointArray pa, // point array
ANNidxArray pidx, // point indices to store in subtree
int n, // number of points
int dim, // dimension of space
const ANNorthRect &bnd_box, // current bounding box
ANNorthRect &inner_box) // inner box if shrinking (returned)
{
int i;
// compute tight bounding box
annEnclRect(pa, pidx, n, dim, inner_box);
ANNcoord max_length = 0; // find longest box side
for (i = 0; i < dim; i++) {
ANNcoord length = inner_box.hi[i] - inner_box.lo[i];
if (length > max_length) {
max_length = length;
}
}
int shrink_ct = 0; // number of sides we shrunk
for (i = 0; i < dim; i++) { // select which sides to shrink
// gap between boxes
ANNcoord gap_hi = bnd_box.hi[i] - inner_box.hi[i];
// big enough gap to shrink?
if (gap_hi < max_length*BD_GAP_THRESH)
inner_box.hi[i] = bnd_box.hi[i]; // no - expand
else shrink_ct++; // yes - shrink this side
// repeat for high side
ANNcoord gap_lo = inner_box.lo[i] - bnd_box.lo[i];
if (gap_lo < max_length*BD_GAP_THRESH)
inner_box.lo[i] = bnd_box.lo[i]; // no - expand
else shrink_ct++; // yes - shrink this side
}
if (shrink_ct >= BD_CT_THRESH) // did we shrink enough sides?
return SHRINK;
else return SPLIT;
}
//----------------------------------------------------------------------
// tryCentroidShrink - Attempt a centroid shrink
//
// We repeatedly apply the splitting rule, always to the larger subset
// of points, until the number of points decreases by the constant
// fraction BD_FRACTION. If this takes more than dim*BD_MAX_SPLIT_FAC
// splits for this to happen, then we shrink to the final inner box
// Otherwise we split.
//----------------------------------------------------------------------
const float BD_MAX_SPLIT_FAC = 0.5; // maximum number of splits allowed
const float BD_FRACTION = 0.5; // ...to reduce points by this fraction
// ...This must be < 1.
ANNdecomp tryCentroidShrink( // try a centroid shrink
ANNpointArray pa, // point array
ANNidxArray pidx, // point indices to store in subtree
int n, // number of points
int dim, // dimension of space
const ANNorthRect &bnd_box, // current bounding box
ANNkd_splitter splitter, // splitting procedure
ANNorthRect &inner_box) // inner box if shrinking (returned)
{
int n_sub = n; // number of points in subset
int n_goal = (int) (n*BD_FRACTION); // number of point in goal
int n_splits = 0; // number of splits needed
// initialize inner box to bounding box
annAssignRect(dim, inner_box, bnd_box);
while (n_sub > n_goal) { // keep splitting until goal reached
int cd; // cut dim from splitter (ignored)
ANNcoord cv; // cut value from splitter (ignored)
int n_lo; // number of points on low side
// invoke splitting procedure
(*splitter)(pa, pidx, inner_box, n_sub, dim, cd, cv, n_lo);
n_splits++; // increment split count
if (n_lo >= n_sub/2) { // most points on low side
inner_box.hi[cd] = cv; // collapse high side
n_sub = n_lo; // recurse on lower points
}
else { // most points on high side
inner_box.lo[cd] = cv; // collapse low side
pidx += n_lo; // recurse on higher points
n_sub -= n_lo;
}
}
if (n_splits > dim*BD_MAX_SPLIT_FAC)// took too many splits
return SHRINK; // shrink to final subset
else
return SPLIT;
}
//----------------------------------------------------------------------
// selectDecomp - select which decomposition to use
//----------------------------------------------------------------------
ANNdecomp selectDecomp( // select decomposition method
ANNpointArray pa, // point array
ANNidxArray pidx, // point indices to store in subtree
int n, // number of points
int dim, // dimension of space
const ANNorthRect &bnd_box, // current bounding box
ANNkd_splitter splitter, // splitting procedure
ANNshrinkRule shrink, // shrinking rule
ANNorthRect &inner_box) // inner box if shrinking (returned)
{
ANNdecomp decomp = SPLIT; // decomposition
switch (shrink) { // check shrinking rule
case ANN_BD_NONE: // no shrinking allowed
decomp = SPLIT;
break;
case ANN_BD_SUGGEST: // author's suggestion
case ANN_BD_SIMPLE: // simple shrink
decomp = trySimpleShrink(
pa, pidx, // points and indices
n, dim, // number of points and dimension
bnd_box, // current bounding box
inner_box); // inner box if shrinking (returned)
break;
case ANN_BD_CENTROID: // centroid shrink
decomp = tryCentroidShrink(
pa, pidx, // points and indices
n, dim, // number of points and dimension
bnd_box, // current bounding box
splitter, // splitting procedure
inner_box); // inner box if shrinking (returned)
break;
default:
annError("Illegal shrinking rule", ANNabort);
}
return decomp;
}
//----------------------------------------------------------------------
// rbd_tree - recursive procedure to build a bd-tree
//
// This is analogous to rkd_tree, but for bd-trees. See the
// procedure rkd_tree() in kd_split.cpp for more information.
//
// If the number of points falls below the bucket size, then a
// leaf node is created for the points. Otherwise we invoke the
// procedure selectDecomp() which determines whether we are to
// split or shrink. If splitting is chosen, then we essentially
// do exactly as rkd_tree() would, and invoke the specified
// splitting procedure to the points. Otherwise, the selection
// procedure returns a bounding box, from which we extract the
// appropriate shrinking bounds, and create a shrinking node.
// Finally the points are subdivided, and the procedure is
// invoked recursively on the two subsets to form the children.
//----------------------------------------------------------------------
ANNkd_ptr rbd_tree( // recursive construction of bd-tree
ANNpointArray pa, // point array
ANNidxArray pidx, // point indices to store in subtree
int n, // number of points
int dim, // dimension of space
int bsp, // bucket space
ANNorthRect &bnd_box, // bounding box for current node
ANNkd_splitter splitter, // splitting routine
ANNshrinkRule shrink) // shrinking rule
{
ANNdecomp decomp; // decomposition method
ANNorthRect inner_box(dim); // inner box (if shrinking)
if (n <= bsp) { // n small, make a leaf node
if (n == 0) // empty leaf node
return KD_TRIVIAL; // return (canonical) empty leaf
else // construct the node and return
return new ANNkd_leaf(n, pidx);
}
decomp = selectDecomp( // select decomposition method
pa, pidx, // points and indices
n, dim, // number of points and dimension
bnd_box, // current bounding box
splitter, shrink, // splitting/shrinking methods
inner_box); // inner box if shrinking (returned)
if (decomp == SPLIT) { // split selected
int cd; // cutting dimension
ANNcoord cv; // cutting value
int n_lo; // number on low side of cut
// invoke splitting procedure
(*splitter)(pa, pidx, bnd_box, n, dim, cd, cv, n_lo);
ANNcoord lv = bnd_box.lo[cd]; // save bounds for cutting dimension
ANNcoord hv = bnd_box.hi[cd];
bnd_box.hi[cd] = cv; // modify bounds for left subtree
ANNkd_ptr lo = rbd_tree( // build left subtree
pa, pidx, n_lo, // ...from pidx[0..n_lo-1]
dim, bsp, bnd_box, splitter, shrink);
bnd_box.hi[cd] = hv; // restore bounds
bnd_box.lo[cd] = cv; // modify bounds for right subtree
ANNkd_ptr hi = rbd_tree( // build right subtree
pa, pidx + n_lo, n-n_lo,// ...from pidx[n_lo..n-1]
dim, bsp, bnd_box, splitter, shrink);
bnd_box.lo[cd] = lv; // restore bounds
// create the splitting node
return new ANNkd_split(cd, cv, lv, hv, lo, hi);
}
else { // shrink selected
int n_in; // number of points in box
int n_bnds; // number of bounding sides
annBoxSplit( // split points around inner box
pa, // points to split
pidx, // point indices
n, // number of points
dim, // dimension
inner_box, // inner box
n_in); // number of points inside (returned)
ANNkd_ptr in = rbd_tree( // build inner subtree pidx[0..n_in-1]
pa, pidx, n_in, dim, bsp, inner_box, splitter, shrink);
ANNkd_ptr out = rbd_tree( // build outer subtree pidx[n_in..n]
pa, pidx+n_in, n - n_in, dim, bsp, bnd_box, splitter, shrink);
ANNorthHSArray bnds = NULL; // bounds (alloc in Box2Bnds and
// ...freed in bd_shrink destroyer)
annBox2Bnds( // convert inner box to bounds
inner_box, // inner box
bnd_box, // enclosing box
dim, // dimension
n_bnds, // number of bounds (returned)
bnds); // bounds array (modified)
// return shrinking node
return new ANNbd_shrink(n_bnds, bnds, in, out);
}
}
}
|