summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjan.reininghaus <jan.reininghaus@8e3bb3c2-eed4-f18f-5264-0b6c94e6926d>2014-04-29 09:13:12 +0000
committerjan.reininghaus <jan.reininghaus@8e3bb3c2-eed4-f18f-5264-0b6c94e6926d>2014-04-29 09:13:12 +0000
commit7d7a7fbca61da76c423d2fb73857ad8f1c7abd56 (patch)
tree2de902949f8f59724d415a7ee9a9733d040427ad
parent2d0856665629001e6aa3600556131838cd709c99 (diff)
performance improvements for chunk_reduction.h
git-svn-id: https://phat.googlecode.com/svn/trunk@161 8e3bb3c2-eed4-f18f-5264-0b6c94e6926d
-rw-r--r--include/phat/algorithms/chunk_reduction.h15
1 files changed, 6 insertions, 9 deletions
diff --git a/include/phat/algorithms/chunk_reduction.h b/include/phat/algorithms/chunk_reduction.h
index 378fd4a..51460c2 100644
--- a/include/phat/algorithms/chunk_reduction.h
+++ b/include/phat/algorithms/chunk_reduction.h
@@ -39,26 +39,23 @@ namespace phat {
std::vector < column_type > column_type( nr_columns, GLOBAL );
std::vector< char > is_active( nr_columns, false );
- //const index chunk_size = (index) sqrt( (float)nr_columns );
- const index chunk_size = nr_columns / omp_get_max_threads( );
+ const index chunk_size = (index) sqrt( (double)nr_columns );
+ //const index chunk_size = nr_columns / omp_get_max_threads( );
- index cur_boundary = 0;
- std::vector<index> chunk_boundaries;
- for( cur_boundary = 0; cur_boundary < nr_columns; cur_boundary += chunk_size )
+ std::vector< index > chunk_boundaries;
+ for( index cur_boundary = 0; cur_boundary < nr_columns; cur_boundary += chunk_size )
chunk_boundaries.push_back( cur_boundary );
chunk_boundaries.push_back( nr_columns );
- // Phase 1: Reduce chunks locally -- 1st pass
for( dimension cur_dim = max_dim; cur_dim >= 1; cur_dim-- ) {
+ // Phase 1: Reduce chunks locally -- 1st pass
#pragma omp parallel for schedule( guided, 1 )
for( index chunk_id = 0; chunk_id < (index)chunk_boundaries.size() - 1; chunk_id++ )
_local_chunk_reduction( boundary_matrix, lowest_one_lookup, column_type, cur_dim,
chunk_boundaries[ chunk_id ], chunk_boundaries[ chunk_id + 1 ], chunk_boundaries[ chunk_id ] );
boundary_matrix.sync();
- }
- // Phase 1: Reduce chunks locally -- 2nd pass
- for( dimension cur_dim = max_dim; cur_dim >= 1; cur_dim-- ) {
+ // Phase 1: Reduce chunks locally -- 2nd pass
#pragma omp parallel for schedule( guided, 1 )
for( index chunk_id = 1; chunk_id < (index)chunk_boundaries.size( ) - 1; chunk_id++ )
_local_chunk_reduction( boundary_matrix, lowest_one_lookup, column_type, cur_dim,