From a4cbfdacbce7d813d5e48298e8a6b31e12c2670c Mon Sep 17 00:00:00 2001 From: "hub.wag@gmail.com" Date: Tue, 2 Apr 2013 20:24:37 +0000 Subject: Added bit-tree pivot representation. It's available from the command line tool and included in self-test. git-svn-id: https://phat.googlecode.com/svn/branches/bit-tree@19 8e3bb3c2-eed4-f18f-5264-0b6c94e6926d --- .../phat/representations/bit_tree_pivot_column.h | 180 ++++++++++++ src/phat.cpp | 37 ++- src/self_test.cpp | 323 +++++++++++---------- 3 files changed, 372 insertions(+), 168 deletions(-) create mode 100644 include/phat/representations/bit_tree_pivot_column.h diff --git a/include/phat/representations/bit_tree_pivot_column.h b/include/phat/representations/bit_tree_pivot_column.h new file mode 100644 index 0000000..53956a4 --- /dev/null +++ b/include/phat/representations/bit_tree_pivot_column.h @@ -0,0 +1,180 @@ +/* Copyright 2013 IST Austria + Contributed by: Ulrich Bauer, Michael Kerber, Jan Reininghaus, Hubert Wagner + + This file is part of PHAT. + + PHAT is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + PHAT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with PHAT. If not, see . */ + +#pragma once + +#include +#include + +namespace phat { + + // This is a bitset indexed with a 32-ary tree. Each node in the index + // has 32 bits; i-th bit says that the i-th subtree is non-empty. + // Supports practically O(1), inplace, zero-allocation: insert, remove, max_element + // and clear in O(number of ones in the bitset). + // 'add_index' is still the real bottleneck in practice. + class bit_tree_column + { + size_t offset; // present_data[i + offset] = ith block of the data-bitset + typedef uint64_t block_type; + std::vector present_data; + block_type * const present; // to prevent error checking in MS's vector... + + enum { block_size_in_bits = 64 }; + enum { block_shift = 6 }; + + // Some magic: http://graphics.stanford.edu/~seander/bithacks.html + // Gets the position of the rightmost bit of 'x'. First (-x)&x isolates the rightmost bit. + // This is much faster than calling log2i, and faster than using ScanBitForward/Reverse intrinsic, + // which should be one CPU instruction. + inline size_t rightmost_pos(const block_type value) const + { + static const size_t tab64[64] = { + 63, 0, 58, 1, 59, 47, 53, 2, + 60, 39, 48, 27, 54, 33, 42, 3, + 61, 51, 37, 40, 49, 18, 28, 20, + 55, 30, 34, 11, 43, 14, 22, 4, + 62, 57, 46, 52, 38, 26, 32, 41, + 50, 36, 17, 19, 29, 10, 13, 21, + 56, 45, 25, 31, 35, 16, 9, 12, + 44, 24, 15, 8, 23, 7, 6, 5}; + return 64 - 1 - tab64[((value & (-value))*0x07EDD5E59A4E28C2) >> 58]; + } + + public: + bit_tree_column() : present(0) + { + init(1); + } + + bit_tree_column(const bit_tree_column &other) : present(0) + { + if (this == &other) + return; + this->offset = other.offset; + this->present_data = other.present_data; + *const_cast(&present) = &present_data[0]; + } + + void init(index num_cols) + { + size_t n = 1; + size_t bottom_blocks_needed = (num_cols+block_size_in_bits-1)/block_size_in_bits; + size_t upper_blocks = 1; + + // How many blocks/nodes of index needed to index the whole bitset? + while(n * block_size_in_bits < bottom_blocks_needed) + { + n *= block_size_in_bits; + upper_blocks += n; + } + + offset = upper_blocks; + present_data.resize(upper_blocks + bottom_blocks_needed, 0); + + *const_cast(&present) = &present_data[0]; + } + + inline index max_index() const + { + if (!present[0]) + return -1; + + const size_t size = present_data.size(); + size_t n = 0; + + while(true) + { + const block_type val = present[n]; + const size_t index = rightmost_pos(val); + const size_t newn = (n << block_shift) + index + 1; + + if (newn >= size) + { + const size_t bottom_index = n - offset; + return (bottom_index << block_shift) + index; + } + + n = newn; + } + + return -1; + } + + inline bool empty() const + { + return present[0] == 0; + } + + inline void add_index(const size_t entry) + { + static const block_type ONE = 1; + static const block_type block_modulo_mask = ((ONE << block_shift) - 1); + size_t index_in_level = entry >> block_shift; + size_t address = index_in_level + offset; + size_t index_in_block = entry & block_modulo_mask; + + block_type mask = (ONE << (block_size_in_bits - index_in_block - 1)); + + while(true) + { + present[address]^=mask; + + // First we check if we reached the root. + // Also, if anyone else was in this block, we don't need to update the path up. + if (!address || (present[address] & ~mask)) + return; + + index_in_block = index_in_level & block_modulo_mask; + index_in_level >>= block_shift; + --address; + address >>= block_shift; + mask = (ONE << (block_size_in_bits - index_in_block - 1)); + } + } + + void get_column_and_clear(column &out) + { + out.clear(); + while(true) + { + index mx = this->max_index(); + if (mx == -1) + break; + out.push_back(mx); + add_index(mx); + } + + std::reverse(out.begin(), out.end()); + } + + void add_column(const column &col) + { + for (size_t i = 0; i < col.size(); ++i) + { + add_index(col[i]); + } + } + + inline bool empty() { + return !present[0]; + } + }; + + typedef abstract_pivot_column bit_tree_pivot_column; +} diff --git a/src/phat.cpp b/src/phat.cpp index a98827c..6757d3c 100644 --- a/src/phat.cpp +++ b/src/phat.cpp @@ -21,15 +21,16 @@ #include #include #include -#include +#include +#include #include -#include +#include #include #include -enum Representation_type {VEC_VEC, VEC_SET, SPARSE_PIVOT, FULL_PIVOT}; +enum Representation_type {VEC_VEC, VEC_SET, SPARSE_PIVOT, FULL_PIVOT, BIT_TREE_PIVOT}; enum Algorithm_type {STANDARD, TWIST, ROW, CHUNK }; void print_help() { @@ -42,23 +43,23 @@ void print_help() { std::cerr << "--help -- prints this screen" << std::endl; std::cerr << "--verbose -- verbose output" << std::endl; std::cerr << "--dualize -- use dualization approach" << std::endl; - std::cerr << "--vec-vec, --vec-set, --full-pivot, --sparse-pivot -- selects a representation data structure for boundary matrices (default is '--sparse-pivot')" << std::endl; + std::cerr << "--vec-vec, --vec-set, --full-pivot, --sparse-pivot, --bit-tree-pivot -- selects a representation data structure for boundary matrices (default is '--sparse-pivot')" << std::endl; std::cerr << "--standard, --twist, --chunk, --row -- selects a reduction algorithm (default is '--twist')" << std::endl; } void print_help_and_exit() { print_help(); exit( EXIT_FAILURE ); -} +} void parse_command_line( int argc, char** argv, bool& use_binary, Representation_type& rep_type, Algorithm_type& reduction, std::string& input_filename, std::string& output_filename, bool& verbose, bool& dualize ) { - + if( argc < 3 ) print_help_and_exit(); input_filename = argv[ argc - 2 ]; output_filename = argv[ argc - 1 ]; - + for( int idx = 1; idx < argc - 2; idx++ ) { const std::string option = argv[ idx ]; @@ -68,6 +69,7 @@ void parse_command_line( int argc, char** argv, bool& use_binary, Representation else if( option == "--vec-vec" ) rep_type = VEC_VEC; else if( option == "--vec-set" ) rep_type = VEC_SET; else if( option == "--full-pivot" ) rep_type = FULL_PIVOT; + else if( option == "--bit-tree-pivot" ) rep_type = BIT_TREE_PIVOT; else if( option == "--sparse-pivot" ) rep_type = SPARSE_PIVOT; else if( option == "--standard" ) reduction = STANDARD; else if( option == "--twist" ) reduction = TWIST; @@ -79,13 +81,13 @@ void parse_command_line( int argc, char** argv, bool& use_binary, Representation } } -#define LOG(msg) if( verbose ) std::cout << msg << std::endl; +#define LOG(msg) if( verbose ) std::cout << msg << std::endl; template void generic_compute_pairing( std::string input_filename, std::string output_filename, - bool use_binary, - bool verbose, + bool use_binary, + bool verbose, bool dualize ) { phat::boundary_matrix< Representation > matrix; @@ -93,7 +95,7 @@ void generic_compute_pairing( std::string input_filename, double read_timer = omp_get_wtime(); if( use_binary ) { - LOG( "Reading input file " << input_filename << " in binary mode" ) + LOG( "Reading input file " << input_filename << " in binary mode" ) read_successful = matrix.load_binary( input_filename ); } else { LOG( "Reading input file " << input_filename << " in ascii mode" ) @@ -111,7 +113,7 @@ void generic_compute_pairing( std::string input_filename, LOG( "Computing persistence pairs ..." ) if( dualize ) phat::compute_persistence_pairs_dualized< Algorithm > ( pairs, matrix ); - else + else phat::compute_persistence_pairs < Algorithm > ( pairs, matrix ); LOG( "Computing persistence pairs took " << omp_get_wtime() - pairs_timer <<"s" ) @@ -124,12 +126,12 @@ void generic_compute_pairing( std::string input_filename, pairs.save_ascii( output_filename ); } LOG( "Writing output file took " << omp_get_wtime() - write_timer <<"s" ) - + } #define CALL_GENERIC_CODE(rep,alg) generic_compute_pairing < rep, alg >( input_filename, output_filename, use_binary, verbose, dualize ); -int main( int argc, char** argv ) +int main( int argc, char** argv ) { bool use_binary = true; // interpret input as binary or ascii file Representation_type rep_type = SPARSE_PIVOT; // representation class @@ -163,6 +165,13 @@ int main( int argc, char** argv ) case CHUNK: CALL_GENERIC_CODE(phat::full_pivot_column, phat::chunk_reduction) break; } break; + case BIT_TREE_PIVOT: switch( reduction ) { + case STANDARD: CALL_GENERIC_CODE(phat::bit_tree_pivot_column, phat::standard_reduction) break; + case TWIST: CALL_GENERIC_CODE(phat::bit_tree_pivot_column, phat::twist_reduction) break; + case ROW: CALL_GENERIC_CODE(phat::bit_tree_pivot_column, phat::row_reduction) break; + case CHUNK: CALL_GENERIC_CODE(phat::bit_tree_pivot_column, phat::chunk_reduction) break; + } break; + case SPARSE_PIVOT: switch( reduction ) { case STANDARD: CALL_GENERIC_CODE(phat::sparse_pivot_column, phat::standard_reduction) break; case TWIST: CALL_GENERIC_CODE(phat::sparse_pivot_column, phat::twist_reduction) break; diff --git a/src/self_test.cpp b/src/self_test.cpp index 4764874..b3cddc2 100644 --- a/src/self_test.cpp +++ b/src/self_test.cpp @@ -1,154 +1,169 @@ -/* Copyright 2013 IST Austria - Contributed by: Ulrich Bauer, Michael Kerber, Jan Reininghaus - - This file is part of PHAT. - - PHAT is free software: you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - PHAT is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with PHAT. If not, see . */ - -#include - -#include -#include -#include -#include - -#include -#include -#include -#include - -int main( int argc, char** argv ) -{ - std::string test_data = argc > 1 ? argv[ 1 ] : "examples/torus.bin"; - - typedef phat::sparse_pivot_column Sparse; - typedef phat::full_pivot_column Full; - typedef phat::vector_vector Vec_vec; - typedef phat::vector_set Vec_set; - - std::cout << "Reading test data " << test_data << " in binary format ..." << std::endl; - phat::boundary_matrix< Full > boundary_matrix; - if( !boundary_matrix.load_binary( test_data ) ) { - std::cerr << "Error: test data " << test_data << " not found!" << std::endl; - return EXIT_FAILURE; - } - - bool error = false; - std::cout << "Comparing representations using Chunk algorithm ..." << std::endl; - { - std::cout << "Running Chunk - Sparse ..." << std::endl; - phat::persistence_pairs sparse_pairs; - phat::boundary_matrix< Sparse > sparse_boundary_matrix = boundary_matrix; - phat::compute_persistence_pairs< phat::chunk_reduction >( sparse_pairs, sparse_boundary_matrix ); - - std::cout << "Running Chunk - Full ..." << std::endl; - phat::persistence_pairs full_pairs; - phat::boundary_matrix< Full > full_boundary_matrix = boundary_matrix; - phat::compute_persistence_pairs< phat::chunk_reduction >( full_pairs, full_boundary_matrix ); - - std::cout << "Running Chunk - Vec_vec ..." << std::endl; - phat::persistence_pairs vec_vec_pairs; - phat::boundary_matrix< Vec_vec > vec_vec_boundary_matrix = boundary_matrix; - phat::compute_persistence_pairs< phat::chunk_reduction >( vec_vec_pairs, vec_vec_boundary_matrix ); - - std::cout << "Running Chunk - Vec_set ..." << std::endl; - phat::persistence_pairs vec_set_pairs; - phat::boundary_matrix< Vec_set > vec_set_boundary_matrix = boundary_matrix; - phat::compute_persistence_pairs< phat::chunk_reduction >( vec_set_pairs, vec_set_boundary_matrix ); - - if( sparse_pairs != full_pairs ) { - std::cerr << "Error: sparse and full differ!" << std::endl; - error = true; - } - if( full_pairs != vec_vec_pairs ) { - std::cerr << "Error: full and vec_vec differ!" << std::endl; - error = true; - } - if( vec_vec_pairs != vec_set_pairs ) { - std::cerr << "Error: vec_vec and vec_set differ!" << std::endl; - error = true; - } - if( vec_set_pairs != sparse_pairs ) { - std::cerr << "Error: vec_set and sparse differ!" << std::endl; - error = true; - } - - if( error ) return EXIT_FAILURE; - else std::cout << "All results are identical (as they should be)" << std::endl; - } - - std::cout << "Comparing algorithms using full_pivot_column representation ..." << std::endl; - { - std::cout << "Running Twist - Full ..." << std::endl; - phat::persistence_pairs twist_pairs; - phat::boundary_matrix< Full > twist_boundary_matrix = boundary_matrix; - phat::compute_persistence_pairs< phat::twist_reduction >( twist_pairs, twist_boundary_matrix ); - - std::cout << "Running Standard - Full ..." << std::endl; - phat::persistence_pairs std_pairs; - phat::boundary_matrix< Full > std_boundary_matrix = boundary_matrix; - phat::compute_persistence_pairs< phat::standard_reduction >( std_pairs, std_boundary_matrix ); - - std::cout << "Running Chunk - Full ..." << std::endl; - phat::persistence_pairs chunk_pairs; - phat::boundary_matrix< Full > chunk_boundary_matrix = boundary_matrix; - phat::compute_persistence_pairs< phat::chunk_reduction >( chunk_pairs, chunk_boundary_matrix ); - - std::cout << "Running Row - Full ..." << std::endl; - phat::persistence_pairs row_pairs; - phat::boundary_matrix< Full > row_boundary_matrix = boundary_matrix; - phat::compute_persistence_pairs< phat::row_reduction >( row_pairs, row_boundary_matrix ); - - if( twist_pairs != std_pairs ) { - std::cerr << "Error: twist and standard differ!" << std::endl; - error = true; - } - if( std_pairs != chunk_pairs ) { - std::cerr << "Error: standard and chunk differ!" << std::endl; - error = true; - } - if( chunk_pairs != row_pairs ) { - std::cerr << "Error: chunk and row differ!" << std::endl; - error = true; - } - if( row_pairs != twist_pairs ) { - std::cerr << "Error: row and twist differ!" << std::endl; - error = true; - } - - if( error ) return EXIT_FAILURE; - else std::cout << "All results are identical (as they should be)" << std::endl; - } - - std::cout << "Comparing primal and dual approach using Chunk - Full ..." << std::endl; - { - phat::persistence_pairs primal_pairs; - phat::boundary_matrix< Full > primal_boundary_matrix = boundary_matrix; - phat::compute_persistence_pairs< phat::chunk_reduction >( primal_pairs, primal_boundary_matrix ); - - phat::persistence_pairs dual_pairs; - phat::boundary_matrix< Full > dual_boundary_matrix = boundary_matrix; - phat::compute_persistence_pairs_dualized< phat::chunk_reduction >( dual_pairs, dual_boundary_matrix ); - - if( primal_pairs != dual_pairs ) { - std::cerr << "Error: primal and dual differ!" << std::endl; - error = true; - } - - if( error ) return EXIT_FAILURE; - else std::cout << "All results are identical (as they should be)" << std::endl; - } - - return EXIT_SUCCESS; -} +/* Copyright 2013 IST Austria + Contributed by: Ulrich Bauer, Michael Kerber, Jan Reininghaus + + This file is part of PHAT. + + PHAT is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + PHAT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with PHAT. If not, see . */ + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +int main( int argc, char** argv ) +{ + std::string test_data = argc > 1 ? argv[ 1 ] : "torus.bin"; + + typedef phat::sparse_pivot_column Sparse; + typedef phat::full_pivot_column Full; + typedef phat::bit_tree_pivot_column BitTree; + typedef phat::vector_vector Vec_vec; + typedef phat::vector_set Vec_set; + + std::cout << "Reading test data " << test_data << " in binary format ..." << std::endl; + phat::boundary_matrix< Full > boundary_matrix; + if( !boundary_matrix.load_binary( test_data ) ) { + std::cerr << "Error: test data " << test_data << " not found!" << std::endl; + return EXIT_FAILURE; + } + + bool error = false; + std::cout << "Comparing representations using Chunk algorithm ..." << std::endl; + { + std::cout << "Running Chunk - Sparse ..." << std::endl; + phat::persistence_pairs sparse_pairs; + phat::boundary_matrix< Sparse > sparse_boundary_matrix = boundary_matrix; + phat::compute_persistence_pairs< phat::chunk_reduction >( sparse_pairs, sparse_boundary_matrix ); + + std::cout << "Running Chunk - Full ..." << std::endl; + phat::persistence_pairs full_pairs; + phat::boundary_matrix< Full > full_boundary_matrix = boundary_matrix; + phat::compute_persistence_pairs< phat::chunk_reduction >( full_pairs, full_boundary_matrix ); + + std::cout << "Running Chunk - BitTree ..." << std::endl; + phat::persistence_pairs bit_tree_pairs; + phat::boundary_matrix< BitTree > bit_tree_boundary_matrix = boundary_matrix; + phat::compute_persistence_pairs< phat::chunk_reduction >( bit_tree_pairs, bit_tree_boundary_matrix ); + + std::cout << "Running Chunk - Vec_vec ..." << std::endl; + phat::persistence_pairs vec_vec_pairs; + phat::boundary_matrix< Vec_vec > vec_vec_boundary_matrix = boundary_matrix; + phat::compute_persistence_pairs< phat::chunk_reduction >( vec_vec_pairs, vec_vec_boundary_matrix ); + + std::cout << "Running Chunk - Vec_set ..." << std::endl; + phat::persistence_pairs vec_set_pairs; + phat::boundary_matrix< Vec_set > vec_set_boundary_matrix = boundary_matrix; + phat::compute_persistence_pairs< phat::chunk_reduction >( vec_set_pairs, vec_set_boundary_matrix ); + + if( sparse_pairs != full_pairs ) { + std::cerr << "Error: sparse and full differ!" << std::endl; + error = true; + } + if( full_pairs != vec_vec_pairs ) { + std::cerr << "Error: full and vec_vec differ!" << std::endl; + error = true; + } + if( vec_vec_pairs != vec_set_pairs ) { + std::cerr << "Error: vec_vec and vec_set differ!" << std::endl; + error = true; + } + if( vec_set_pairs != sparse_pairs ) { + std::cerr << "Error: vec_set and sparse differ!" << std::endl; + error = true; + } + if( bit_tree_pairs != sparse_pairs ) { + std::cerr << "Error: bit_tree and sparse differ!" << std::endl; + error = true; + } + if( bit_tree_pairs != vec_set_pairs ) { + std::cerr << "Error: bit_tree and vec_vec differ!" << std::endl; + error = true; + } + + if( error ) return EXIT_FAILURE; + else std::cout << "All results are identical (as they should be)" << std::endl; + } + + std::cout << "Comparing algorithms using full_pivot_column representation ..." << std::endl; + { + std::cout << "Running Twist - Full ..." << std::endl; + phat::persistence_pairs twist_pairs; + phat::boundary_matrix< Full > twist_boundary_matrix = boundary_matrix; + phat::compute_persistence_pairs< phat::twist_reduction >( twist_pairs, twist_boundary_matrix ); + + std::cout << "Running Standard - Full ..." << std::endl; + phat::persistence_pairs std_pairs; + phat::boundary_matrix< Full > std_boundary_matrix = boundary_matrix; + phat::compute_persistence_pairs< phat::standard_reduction >( std_pairs, std_boundary_matrix ); + + std::cout << "Running Chunk - Full ..." << std::endl; + phat::persistence_pairs chunk_pairs; + phat::boundary_matrix< Full > chunk_boundary_matrix = boundary_matrix; + phat::compute_persistence_pairs< phat::chunk_reduction >( chunk_pairs, chunk_boundary_matrix ); + + std::cout << "Running Row - Full ..." << std::endl; + phat::persistence_pairs row_pairs; + phat::boundary_matrix< Full > row_boundary_matrix = boundary_matrix; + phat::compute_persistence_pairs< phat::row_reduction >( row_pairs, row_boundary_matrix ); + + if( twist_pairs != std_pairs ) { + std::cerr << "Error: twist and standard differ!" << std::endl; + error = true; + } + if( std_pairs != chunk_pairs ) { + std::cerr << "Error: standard and chunk differ!" << std::endl; + error = true; + } + if( chunk_pairs != row_pairs ) { + std::cerr << "Error: chunk and row differ!" << std::endl; + error = true; + } + if( row_pairs != twist_pairs ) { + std::cerr << "Error: row and twist differ!" << std::endl; + error = true; + } + + if( error ) return EXIT_FAILURE; + else std::cout << "All results are identical (as they should be)" << std::endl; + } + + std::cout << "Comparing primal and dual approach using Chunk - Full ..." << std::endl; + { + phat::persistence_pairs primal_pairs; + phat::boundary_matrix< Full > primal_boundary_matrix = boundary_matrix; + phat::compute_persistence_pairs< phat::chunk_reduction >( primal_pairs, primal_boundary_matrix ); + + phat::persistence_pairs dual_pairs; + phat::boundary_matrix< Full > dual_boundary_matrix = boundary_matrix; + phat::compute_persistence_pairs_dualized< phat::chunk_reduction >( dual_pairs, dual_boundary_matrix ); + + if( primal_pairs != dual_pairs ) { + std::cerr << "Error: primal and dual differ!" << std::endl; + error = true; + } + + if( error ) return EXIT_FAILURE; + else std::cout << "All results are identical (as they should be)" << std::endl; + } + + return EXIT_SUCCESS; +} -- cgit v1.2.3