summaryrefslogtreecommitdiff
path: root/geom_bottleneck/bottleneck/src/ann/bd_tree.cpp
blob: a5dd69cd58c1564a75ed80e6b3fabc8ce3bbf49f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
//----------------------------------------------------------------------
// File:			bd_tree.cpp
// Programmer:		David Mount
// Description:		Basic methods for bd-trees.
// Last modified:	01/04/05 (Version 1.0)
//----------------------------------------------------------------------
// Copyright (c) 1997-2005 University of Maryland and Sunil Arya and
// David Mount.  All Rights Reserved.
// 
// This software and related documentation is part of the Approximate
// Nearest Neighbor Library (ANN).  This software is provided under
// the provisions of the Lesser GNU Public License (LGPL).  See the
// file ../ReadMe.txt for further information.
// 
// The University of Maryland (U.M.) and the authors make no
// representations about the suitability or fitness of this software for
// any purpose.  It is provided "as is" without express or implied
// warranty.
//----------------------------------------------------------------------
// History:
//	Revision 0.1  03/04/98
//		Initial release
//	Revision l.0  04/01/05
//		Fixed centroid shrink threshold condition to depend on the
//			dimension.
//		Moved dump routine to kd_dump.cpp.
//----------------------------------------------------------------------

#include "bd_tree.h"					// bd-tree declarations
#include "kd_util.h"					// kd-tree utilities
#include "kd_split.h"					// kd-tree splitting rules

#include <ANN/ANNperf.h>				// performance evaluation
#include "def_debug_bt.h"

namespace geom_bt {
//----------------------------------------------------------------------
//	Printing a bd-tree 
//		These routines print a bd-tree.   See the analogous procedure
//		in kd_tree.cpp for more information.
//----------------------------------------------------------------------

void ANNbd_shrink::print(				// print shrinking node
		int level,						// depth of node in tree
		ostream &out)					// output stream
{
#ifndef FOR_R_TDA
	child[ANN_OUT]->print(level+1, out);		// print out-child

	out << "    ";
	for (int i = 0; i < level; i++)				// print indentation
		out << "..";
	out << "Shrink";
	for (int j = 0; j < n_bnds; j++) {			// print sides, 2 per line
		if (j % 2 == 0) {
			out << "\n";						// newline and indentation
			for (int i = 0; i < level+2; i++) out << "  ";
		}
		out << "  ([" << bnds[j].cd << "]"
			 << (bnds[j].sd > 0 ? ">=" : "< ")
			 << bnds[j].cv << ")";
	}
	out << "\n";

	child[ANN_IN]->print(level+1, out);			// print in-child
#endif
}

//----------------------------------------------------------------------
//	kd_tree statistics utility (for performance evaluation)
//		This routine computes various statistics information for
//		shrinking nodes.  See file kd_tree.cpp for more information.
//----------------------------------------------------------------------

void ANNbd_shrink::getStats(					// get subtree statistics
	int					dim,					// dimension of space
	ANNkdStats			&st,					// stats (modified)
	ANNorthRect			&bnd_box)				// bounding box
{
	ANNkdStats ch_stats;						// stats for children
	ANNorthRect inner_box(dim);					// inner box of shrink

	annBnds2Box(bnd_box,						// enclosing box
				dim,							// dimension
				n_bnds,							// number of bounds
				bnds,							// bounds array
				inner_box);						// inner box (modified)
												// get stats for inner child
	ch_stats.reset();							// reset
	child[ANN_IN]->getStats(dim, ch_stats, inner_box);
	st.merge(ch_stats);							// merge them
												// get stats for outer child
	ch_stats.reset();							// reset
	child[ANN_OUT]->getStats(dim, ch_stats, bnd_box);
	st.merge(ch_stats);							// merge them

	st.depth++;									// increment depth
	st.n_shr++;									// increment number of shrinks
}

//----------------------------------------------------------------------
// bd-tree constructor
//		This is the main constructor for bd-trees given a set of points.
//		It first builds a skeleton kd-tree as a basis, then computes the
//		bounding box of the data points, and then invokes rbd_tree() to
//		actually build the tree, passing it the appropriate splitting
//		and shrinking information.
//----------------------------------------------------------------------

ANNkd_ptr rbd_tree(						// recursive construction of bd-tree
	ANNpointArray		pa,				// point array
	ANNidxArray			pidx,			// point indices to store in subtree
	int					n,				// number of points
	int					dim,			// dimension of space
	int					bsp,			// bucket space
	ANNorthRect			&bnd_box,		// bounding box for current node
	ANNkd_splitter		splitter,		// splitting routine
	ANNshrinkRule		shrink);		// shrinking rule

ANNbd_tree::ANNbd_tree(					// construct from point array
	ANNpointArray		pa,				// point array (with at least n pts)
	int					n,				// number of points
	int					dd,				// dimension
	int					bs,				// bucket size
	ANNsplitRule		split,			// splitting rule
	ANNshrinkRule		shrink)			// shrinking rule
	: ANNkd_tree(n, dd, bs)				// build skeleton base tree
{
	pts = pa;							// where the points are
	if (n == 0) return;					// no points--no sweat

	ANNorthRect bnd_box(dd);			// bounding box for points
										// construct bounding rectangle
	annEnclRect(pa, pidx, n, dd, bnd_box);
										// copy to tree structure
	bnd_box_lo = annCopyPt(dd, bnd_box.lo);
	bnd_box_hi = annCopyPt(dd, bnd_box.hi);

	switch (split) {					// build by rule
	case ANN_KD_STD:					// standard kd-splitting rule
		root = rbd_tree(pa, pidx, n, dd, bs, bnd_box, kd_split, shrink);
		break;
	case ANN_KD_MIDPT:					// midpoint split
		root = rbd_tree(pa, pidx, n, dd, bs, bnd_box, midpt_split, shrink);
		break;
	case ANN_KD_SUGGEST:				// best (in our opinion)
	case ANN_KD_SL_MIDPT:				// sliding midpoint split
		root = rbd_tree(pa, pidx, n, dd, bs, bnd_box, sl_midpt_split, shrink);
		break;
	case ANN_KD_FAIR:					// fair split
		root = rbd_tree(pa, pidx, n, dd, bs, bnd_box, fair_split, shrink);
		break;
	case ANN_KD_SL_FAIR:				// sliding fair split
		root = rbd_tree(pa, pidx, n, dd, bs,
						bnd_box, sl_fair_split, shrink);
		break;
	default:
		annError("Illegal splitting method", ANNabort);
	}
}

//----------------------------------------------------------------------
//	Shrinking rules
//----------------------------------------------------------------------

enum ANNdecomp {SPLIT, SHRINK};			// decomposition methods

//----------------------------------------------------------------------
//	trySimpleShrink - Attempt a simple shrink
//
//		We compute the tight bounding box of the points, and compute
//		the 2*dim ``gaps'' between the sides of the tight box and the
//		bounding box.  If any of the gaps is large enough relative to
//		the longest side of the tight bounding box, then we shrink
//		all sides whose gaps are large enough.  (The reason for
//		comparing against the tight bounding box, is that after
//		shrinking the longest box size will decrease, and if we use
//		the standard bounding box, we may decide to shrink twice in
//		a row.  Since the tight box is fixed, we cannot shrink twice
//		consecutively.)
//----------------------------------------------------------------------
const float BD_GAP_THRESH = 0.5;		// gap threshold (must be < 1)
const int   BD_CT_THRESH  = 2;			// min number of shrink sides

ANNdecomp trySimpleShrink(				// try a simple shrink
	ANNpointArray		pa,				// point array
	ANNidxArray			pidx,			// point indices to store in subtree
	int					n,				// number of points
	int					dim,			// dimension of space
	const ANNorthRect	&bnd_box,		// current bounding box
	ANNorthRect			&inner_box)		// inner box if shrinking (returned)
{
	int i;
												// compute tight bounding box
	annEnclRect(pa, pidx, n, dim, inner_box);

	ANNcoord max_length = 0;					// find longest box side
	for (i = 0; i < dim; i++) {
		ANNcoord length = inner_box.hi[i] - inner_box.lo[i];
		if (length > max_length) {
			max_length = length;
		}
	}

	int shrink_ct = 0;							// number of sides we shrunk
	for (i = 0; i < dim; i++) {					// select which sides to shrink
												// gap between boxes
		ANNcoord gap_hi = bnd_box.hi[i] - inner_box.hi[i];
												// big enough gap to shrink?
		if (gap_hi < max_length*BD_GAP_THRESH)
			inner_box.hi[i] = bnd_box.hi[i];	// no - expand
		else shrink_ct++;						// yes - shrink this side

												// repeat for high side
		ANNcoord gap_lo = inner_box.lo[i] - bnd_box.lo[i];
		if (gap_lo < max_length*BD_GAP_THRESH)
			inner_box.lo[i] = bnd_box.lo[i];	// no - expand
		else shrink_ct++;						// yes - shrink this side
	}

	if (shrink_ct >= BD_CT_THRESH)				// did we shrink enough sides?
		 return SHRINK;
	else return SPLIT;
}

//----------------------------------------------------------------------
//	tryCentroidShrink - Attempt a centroid shrink
//
//	We repeatedly apply the splitting rule, always to the larger subset
//	of points, until the number of points decreases by the constant
//	fraction BD_FRACTION.  If this takes more than dim*BD_MAX_SPLIT_FAC
//	splits for this to happen, then we shrink to the final inner box
//	Otherwise we split.
//----------------------------------------------------------------------

const float	BD_MAX_SPLIT_FAC = 0.5;		// maximum number of splits allowed
const float	BD_FRACTION = 0.5;			// ...to reduce points by this fraction
										// ...This must be < 1.

ANNdecomp tryCentroidShrink(			// try a centroid shrink
	ANNpointArray		pa,				// point array
	ANNidxArray			pidx,			// point indices to store in subtree
	int					n,				// number of points
	int					dim,			// dimension of space
	const ANNorthRect	&bnd_box,		// current bounding box
	ANNkd_splitter		splitter,		// splitting procedure
	ANNorthRect			&inner_box)		// inner box if shrinking (returned)
{
	int n_sub = n;						// number of points in subset
	int n_goal = (int) (n*BD_FRACTION); // number of point in goal
	int n_splits = 0;					// number of splits needed
										// initialize inner box to bounding box
	annAssignRect(dim, inner_box, bnd_box);

	while (n_sub > n_goal) {			// keep splitting until goal reached
		int cd;							// cut dim from splitter (ignored)
		ANNcoord cv;					// cut value from splitter (ignored)
		int n_lo;						// number of points on low side
										// invoke splitting procedure
		(*splitter)(pa, pidx, inner_box, n_sub, dim, cd, cv, n_lo);
		n_splits++;						// increment split count

		if (n_lo >= n_sub/2) {			// most points on low side
			inner_box.hi[cd] = cv;		// collapse high side
			n_sub = n_lo;				// recurse on lower points
		}
		else {							// most points on high side
			inner_box.lo[cd] = cv;		// collapse low side
			pidx += n_lo;				// recurse on higher points
			n_sub -= n_lo;
		}
	}
    if (n_splits > dim*BD_MAX_SPLIT_FAC)// took too many splits
		return SHRINK;					// shrink to final subset
	else
		return SPLIT;
}

//----------------------------------------------------------------------
//	selectDecomp - select which decomposition to use
//----------------------------------------------------------------------

ANNdecomp selectDecomp(			// select decomposition method
	ANNpointArray		pa,				// point array
	ANNidxArray			pidx,			// point indices to store in subtree
	int					n,				// number of points
	int					dim,			// dimension of space
	const ANNorthRect	&bnd_box,		// current bounding box
	ANNkd_splitter		splitter,		// splitting procedure
	ANNshrinkRule		shrink,			// shrinking rule
	ANNorthRect			&inner_box)		// inner box if shrinking (returned)
{
	ANNdecomp decomp = SPLIT;			// decomposition

	switch (shrink) {					// check shrinking rule
	case ANN_BD_NONE:					// no shrinking allowed
		decomp = SPLIT;
		break;
	case ANN_BD_SUGGEST:				// author's suggestion
	case ANN_BD_SIMPLE:					// simple shrink
		decomp = trySimpleShrink(
				pa, pidx,				// points and indices
				n, dim,					// number of points and dimension
				bnd_box,				// current bounding box
				inner_box);				// inner box if shrinking (returned)
		break;
	case ANN_BD_CENTROID:				// centroid shrink
		decomp = tryCentroidShrink(
				pa, pidx,				// points and indices
				n, dim,					// number of points and dimension
				bnd_box,				// current bounding box
				splitter,				// splitting procedure
				inner_box);				// inner box if shrinking (returned)
		break;
	default:
		annError("Illegal shrinking rule", ANNabort);
	}
	return decomp;
}

//----------------------------------------------------------------------
//	rbd_tree - recursive procedure to build a bd-tree
//
//		This is analogous to rkd_tree, but for bd-trees.  See the
//		procedure rkd_tree() in kd_split.cpp for more information.
//
//		If the number of points falls below the bucket size, then a
//		leaf node is created for the points.  Otherwise we invoke the
//		procedure selectDecomp() which determines whether we are to
//		split or shrink.  If splitting is chosen, then we essentially
//		do exactly as rkd_tree() would, and invoke the specified
//		splitting procedure to the points.  Otherwise, the selection
//		procedure returns a bounding box, from which we extract the
//		appropriate shrinking bounds, and create a shrinking node.
//		Finally the points are subdivided, and the procedure is
//		invoked recursively on the two subsets to form the children.
//----------------------------------------------------------------------

ANNkd_ptr rbd_tree(				// recursive construction of bd-tree
	ANNpointArray		pa,				// point array
	ANNidxArray			pidx,			// point indices to store in subtree
	int					n,				// number of points
	int					dim,			// dimension of space
	int					bsp,			// bucket space
	ANNorthRect			&bnd_box,		// bounding box for current node
	ANNkd_splitter		splitter,		// splitting routine
	ANNshrinkRule		shrink)			// shrinking rule
{
	ANNdecomp decomp;					// decomposition method

	ANNorthRect inner_box(dim);			// inner box (if shrinking)

	if (n <= bsp) {						// n small, make a leaf node
		if (n == 0)						// empty leaf node
			return KD_TRIVIAL;			// return (canonical) empty leaf
		else							// construct the node and return
			return new ANNkd_leaf(n, pidx); 
	}
	
	decomp = selectDecomp(				// select decomposition method
				pa, pidx,				// points and indices
				n, dim,					// number of points and dimension
				bnd_box,				// current bounding box
				splitter, shrink,		// splitting/shrinking methods
				inner_box);				// inner box if shrinking (returned)
	
	if (decomp == SPLIT) {				// split selected
		int cd;							// cutting dimension
		ANNcoord cv;					// cutting value
		int n_lo;						// number on low side of cut
										// invoke splitting procedure
		(*splitter)(pa, pidx, bnd_box, n, dim, cd, cv, n_lo);

		ANNcoord lv = bnd_box.lo[cd];	// save bounds for cutting dimension
		ANNcoord hv = bnd_box.hi[cd];

		bnd_box.hi[cd] = cv;			// modify bounds for left subtree
		ANNkd_ptr lo = rbd_tree(		// build left subtree
				pa, pidx, n_lo,			// ...from pidx[0..n_lo-1]
				dim, bsp, bnd_box, splitter, shrink);
		bnd_box.hi[cd] = hv;			// restore bounds

		bnd_box.lo[cd] = cv;			// modify bounds for right subtree
		ANNkd_ptr hi = rbd_tree(		// build right subtree
				pa, pidx + n_lo, n-n_lo,// ...from pidx[n_lo..n-1]
				dim, bsp, bnd_box, splitter, shrink);
		bnd_box.lo[cd] = lv;			// restore bounds
										// create the splitting node
		return new ANNkd_split(cd, cv, lv, hv, lo, hi);
	}
	else {								// shrink selected
		int n_in;						// number of points in box
		int n_bnds;						// number of bounding sides

		annBoxSplit(					// split points around inner box
				pa,						// points to split
				pidx,					// point indices
				n,						// number of points
				dim,					// dimension
				inner_box,				// inner box
				n_in);					// number of points inside (returned)

		ANNkd_ptr in = rbd_tree(		// build inner subtree pidx[0..n_in-1]
				pa, pidx, n_in, dim, bsp, inner_box, splitter, shrink);
		ANNkd_ptr out = rbd_tree(		// build outer subtree pidx[n_in..n]
				pa, pidx+n_in, n - n_in, dim, bsp, bnd_box, splitter, shrink);

		ANNorthHSArray bnds = NULL;		// bounds (alloc in Box2Bnds and
										// ...freed in bd_shrink destroyer)

		annBox2Bnds(					// convert inner box to bounds
				inner_box,				// inner box
				bnd_box,				// enclosing box
				dim,					// dimension
				n_bnds,					// number of bounds (returned)
				bnds);					// bounds array (modified)

										// return shrinking node
		return new ANNbd_shrink(n_bnds, bnds, in, out);
	}
} 
}