Updated the README and cleaned the phat.cpp command line options. Now the options match the names in the readme. Small changes to the bit_tree_pivot_column.h.

git-svn-id: https://phat.googlecode.com/svn/trunk@30 8e3bb3c2-eed4-f18f-5264-0b6c94e6926d
author: hub.wag@gmail.com <hub.wag@gmail.com@8e3bb3c2-eed4-f18f-5264-0b6c94e6926d> 2013-04-12 00:04:08 +0000
committer: hub.wag@gmail.com <hub.wag@gmail.com@8e3bb3c2-eed4-f18f-5264-0b6c94e6926d> 2013-04-12 00:04:08 +0000
commit: b66953101a196371faf55effb37fd2928832e8e5 (patch)
tree: 46e7427c8e06699618fb8e52bfcfd739c5d3998f
parent: 0c0078181f915b9224de69fd52842a77c40ea96c (diff)
3 files changed, 166 insertions, 163 deletions
diff --git a/README b/README
index 234b00a..d303271 100644
--- a/README
+++ b/README
@@ -1,5 +1,3 @@
-see phat.googlecode.com for the most current version of the file below:
-
 =PHAT (Persistent Homology Algorithm Toolbox)=
 Copyright 2013 IST Austria
 
@@ -16,8 +14,8 @@ contains code for several algorithmic variants:
 
   * The "standard" algorithm (see `[1]`, p.153)
   * The "row" algorithm from `[2]` (called pHrow in there)
-  * The "twist" algorithm, as described in `[3]` (default algorithm)
-  * The "chunk" algorithm presented in `[4]`
+  * The "twist" algorithm, as described in `[3]`
+  * The "chunk" algorithm presented in `[4]` (default algorithm, multi-threaded)
 
 The last two algorithms exploit the special structure of the boundary matrix
 to take shortcuts in the computation. The chunk algorithm makes use of multiple 
@@ -37,6 +35,7 @@ algorithm. We provide the following choices of representation classes:
   * {{{vector_set}}}: Each column is a {{{std::set}}} of integers, with the same meaning as before. The matrix is stored as a {{{std::vector}}} of such columns.
   * {{{sparse_pivot_column}}} (default representation): The matrix is stored as in the vector_vector representation. However, when a column is manipulated, it is first  converted into a {{{std::set}}}, using an extra data field called the "pivot column".  When another column is manipulated later, the pivot column is converted back to  the {{{std::vector}}} representation. This can lead to speed improvements when many columns  are added to a given pivot column consecutively. In a multicore setup, there is one pivot column per core.
   * {{{full_pivot_column}}}: The same idea as in the sparse version. However, instead of a {{{std::set}}}, the pivot column is expanded into a bit vector of size n (the dimension of the matrix). To avoid costly initializations, the class remembers which entries have been manipulated for a pivot column and updates only those entries when another column becomes the pivot.
+  * {{{bit_tree_pivot_column}}}: Default representation for all algorithms, except the "row" algorithm. Similar to the {{{full_pivot_column}}} but the implementation is more efficient. Internally it is an bit-set with fast iteration over present elements, and fast access to the maximum element. The structure is initialized before the reduction algorithm is started and reused.
   
 There are two ways to interface with the library:
 
diff --git a/include/phat/representations/bit_tree_pivot_column.h b/include/phat/representations/bit_tree_pivot_column.h
index 6bf6c3d..b33e786 100644
--- a/include/phat/representations/bit_tree_pivot_column.h
+++ b/include/phat/representations/bit_tree_pivot_column.h
@@ -1,5 +1,5 @@
 /*  Copyright 2013 IST Austria
-    Contributed by: Hubert Wagner
+    Contributed by: Ulrich Bauer, Michael Kerber, Jan Reininghaus, Hubert Wagner
 
     This file is part of PHAT.
 
@@ -23,143 +23,147 @@
 
 namespace phat {
 
-    // This is a bitset indexed with a 64-ary tree. Each node in the index
-    // has 64 bits; i-th bit says that the i-th subtree is non-empty.
-    // Supports practically O(1), inplace, zero-allocation: insert, remove, max_element
-    // and clear in O(number of ones in the bitset).
-    // 'add_index' is still the real bottleneck in practice.
-    class bit_tree_column
-    {
-        size_t offset; // data[i + offset] = ith block of the data-bitset
-        typedef uint64_t block_type;
-        std::vector< block_type > data;
-        size_t debrujin_magic_table[64];
-
-        enum { block_size_in_bits = 64 };
-        enum { block_shift = 6 };
-
-        // Some magic: http://graphics.stanford.edu/~seander/bithacks.html
-        // Gets the position of the rightmost bit of 'x'. 0 means the most significant bit. 
-        // (-x)&x isolates the rightmost bit.
-        // The whole method is much faster than calling log2i, and very comparable to using ScanBitForward/Reverse intrinsic,
-        // which should be one CPU instruction, but is not portable.
-        size_t rightmost_pos(const block_type value) const
-        {                
-            return 64 - 1 - debrujin_magic_table[((value & ((uint64_t)0 - value))*0x07EDD5E59A4E28C2) >> 58];
+	// This is a bitset indexed with a 64-ary tree. Each node in the index
+	// has 64 bits; i-th bit says that the i-th subtree is non-empty.
+	// Supports practically O(1), inplace, zero-allocation: insert, remove, max_element
+	// and clear in O(number of ones in the bitset).
+	// 'add_index' is still the real bottleneck in practice.
+	class bit_tree_column
+	{
+		size_t offset; // data[i + offset] = ith block of the data-bitset
+		typedef uint64_t block_type;
+		std::vector< block_type > data;
+
+		// this static is not a problem with OMP, it's initialized just after program starts
+		static const size_t debrujin_magic_table[64];
+
+		enum { block_size_in_bits = 64 };
+		enum { block_shift = 6 };
+
+		// Some magic: http://graphics.stanford.edu/~seander/bithacks.html
+		// Gets the position of the rightmost bit of 'x'. 0 means the most significant bit. 
+		// (-x)&x isolates the rightmost bit.
+		// The whole method is much faster than calling log2i, and very comparable to using ScanBitForward/Reverse intrinsic,
+		// which should be one CPU instruction, but is not portable.
+		inline size_t rightmost_pos(const block_type value) const
+		{				
+			return 64 - 1 - debrujin_magic_table[((value & (-value))*0x07EDD5E59A4E28C2) >> 58];
+		}
+
+		public:		
+
+		void init(index num_cols)
+		{
+			int64_t n = 1; // in case of overflow
+			size_t bottom_blocks_needed = (num_cols+block_size_in_bits-1)/block_size_in_bits;
+			size_t upper_blocks = 1;		
+
+			// How many blocks/nodes of index needed to index the whole bitset?
+			while(n * block_size_in_bits < bottom_blocks_needed)
+			{
+				n *= block_size_in_bits;
+				upper_blocks += n;
+			}
+
+			offset = upper_blocks;
+			data.resize(upper_blocks + bottom_blocks_needed, 0);
+		}
+
+		inline index max_index() const
+		{
+			if (!data[0])
+				return -1;
+
+			const size_t size = data.size();
+			size_t n = 0;
+
+			while(true)
+			{
+				const block_type val = data[n];
+				const size_t index = rightmost_pos(val);
+				const size_t newn = (n << block_shift) + index + 1;
+
+				if (newn >= size)
+				{
+					const size_t bottom_index = n - offset;
+					return (bottom_index << block_shift) + index;
+				}
+
+				n = newn;
+			}
+
+			return -1;
+		}
+
+		inline bool empty() const
+		{
+			return data[0] == 0;
+		}
+
+		inline void add_index(const size_t entry)
+		{
+			const block_type ONE = 1;
+			const block_type block_modulo_mask = ((ONE << block_shift) - 1);
+			size_t index_in_level = entry >> block_shift;
+			size_t address = index_in_level + offset;
+			size_t index_in_block = entry & block_modulo_mask;
+
+			block_type mask = (ONE << (block_size_in_bits - index_in_block - 1));
+
+			while(true)
+			{
+				data[address]^=mask;
+
+				// First we check if we reached the root.
+				// Also, if anyone else was in this block, we don't need to update the path up.
+				if (!address || (data[address] & ~mask))
+					return;
+
+				index_in_block = index_in_level & block_modulo_mask;
+				index_in_level >>= block_shift;
+				--address;
+				address >>= block_shift;
+				mask = (ONE << (block_size_in_bits - index_in_block - 1));
+			}
+		}
+
+		void get_column_and_clear(column &out)
+		{
+			out.clear();
+			while(true)
+			{
+				index mx = this->max_index();
+				if (mx == -1)
+					break;
+				out.push_back(mx);
+				add_index(mx);
+			}
+
+			std::reverse(out.begin(), out.end());
+		}
+
+		void add_column(const column &col)
+		{
+			for (size_t i = 0; i < col.size(); ++i)
+			{
+				add_index(col[i]);
+			}
+		}
+
+        inline bool empty() {
+			return !data[0];
         }
-
-        public:        
-
-        void init(index num_cols)
-        {
-            const size_t debrujin_for_64_bit[64] = {
-                    63,  0, 58,  1, 59, 47, 53,  2,
-                    60, 39, 48, 27, 54, 33, 42,  3,
-                    61, 51, 37, 40, 49, 18, 28, 20,
-                    55, 30, 34, 11, 43, 14, 22,  4,
-                    62, 57, 46, 52, 38, 26, 32, 41,
-                    50, 36, 17, 19, 29, 10, 13, 21,
-                    56, 45, 25, 31, 35, 16,  9, 12,
-                    44, 24, 15,  8, 23,  7,  6,  5};
-
-            std::copy(debrujin_for_64_bit, debrujin_for_64_bit+64, debrujin_magic_table);
-
-            uint64_t n = 1; // in case of overflow
-            size_t bottom_blocks_needed = (num_cols+block_size_in_bits-1)/block_size_in_bits;
-            size_t upper_blocks = 1;        
-
-            // How many blocks/nodes of index needed to index the whole bitset?
-            while(n * block_size_in_bits < bottom_blocks_needed)
-            {
-                n *= block_size_in_bits;
-                upper_blocks += n;
-            }
-
-            offset = upper_blocks;
-            data.resize(upper_blocks + bottom_blocks_needed, 0);
-        }
-
-        index max_index() const
-        {
-            if (!data[0])
-                return -1;
-
-            const size_t size = data.size();
-            size_t n = 0;
-
-            while(true)
-            {
-                const block_type val = data[n];
-                const size_t index = rightmost_pos(val);
-                const size_t newn = (n << block_shift) + index + 1;
-
-                if (newn >= size)
-                {
-                    const size_t bottom_index = n - offset;
-                    return (bottom_index << block_shift) + index;
-                }
-
-                n = newn;
-            }
-
-            return -1;
-        }
-
-        bool empty() const
-        {
-            return data[0] == 0;
-        }
-
-        void add_index(const size_t entry)
-        {
-            const block_type ONE = 1;
-            const block_type block_modulo_mask = ((ONE << block_shift) - 1);
-            size_t index_in_level = entry >> block_shift;
-            size_t address = index_in_level + offset;
-            size_t index_in_block = entry & block_modulo_mask;
-
-            block_type mask = (ONE << (block_size_in_bits - index_in_block - 1));
-
-            while(true)
-            {
-                data[address]^=mask;
-
-                // First we check if we reached the root.
-                // Also, if anyone else was in this block, we don't need to update the path up.
-                if (!address || (data[address] & ~mask))
-                    return;
-
-                index_in_block = index_in_level & block_modulo_mask;
-                index_in_level >>= block_shift;
-                --address;
-                address >>= block_shift;
-                mask = (ONE << (block_size_in_bits - index_in_block - 1));
-            }
-        }
-
-        void get_column_and_clear(column &out)
-        {
-            out.clear();
-            while(true)
-            {
-                index mx = this->max_index();
-                if (mx == -1)
-                    break;
-                out.push_back(mx);
-                add_index(mx);
-            }
-
-            std::reverse(out.begin(), out.end());
-        }
-
-        void add_column(const column &col)
-        {
-            for (size_t i = 0; i < col.size(); ++i)
-            {
-                add_index(col[i]);
-            }
-        }
-    };
-
-    typedef abstract_pivot_column<bit_tree_column> bit_tree_pivot_column;
+	};
+
+	const size_t bit_tree_column::debrujin_magic_table[64] =  {
+					63,  0, 58,  1, 59, 47, 53,  2,
+					60, 39, 48, 27, 54, 33, 42,  3,
+					61, 51, 37, 40, 49, 18, 28, 20,
+					55, 30, 34, 11, 43, 14, 22,  4,
+					62, 57, 46, 52, 38, 26, 32, 41,
+					50, 36, 17, 19, 29, 10, 13, 21,
+					56, 45, 25, 31, 35, 16,  9, 12,
+					44, 24, 15,  8, 23,  7,  6,  5};
+
+	typedef abstract_pivot_column<bit_tree_column> bit_tree_pivot_column;
 }
diff --git a/src/phat.cpp b/src/phat.cpp
index 6fec0c8..43ebedc 100644
--- a/src/phat.cpp
+++ b/src/phat.cpp
@@ -30,7 +30,7 @@
 #include <phat/algorithms/chunk_reduction.h>
 
 
-enum Representation_type  {VEC_VEC, VEC_SET, SPARSE_PIVOT, FULL_PIVOT, BIT_TREE_PIVOT};
+enum Representation_type  {VECTOR_VECTOR, VECTOR_SET, SPARSE_PIVOT_COLUMN, FULL_PIVOT_COLUMN, BIT_TREE_PIVOT_COLUMN};
 enum Algorithm_type  {STANDARD, TWIST, ROW, CHUNK };
 
 void print_help() {
@@ -43,8 +43,8 @@ void print_help() {
     std::cerr << "--help    --  prints this screen" << std::endl;
     std::cerr << "--verbose --  verbose output" << std::endl;
     std::cerr << "--dualize   --  use dualization approach" << std::endl;
-    std::cerr << "--vec-vec, --vec-set, --full-pivot, --sparse-pivot, --bit-tree-pivot  --  selects a representation data structure for boundary matrices (default is '--sparse-pivot')" << std::endl;
-    std::cerr << "--standard, --twist, --chunk, --row  --  selects a reduction algorithm (default is '--twist')" << std::endl;
+    std::cerr << "--vector_vector, --vector_set, --full_pivot_column, --sparse_pivot_column, --bit_tree_pivot_column  --  selects a representation data structure for boundary matrices (default is '--bit_tree_pivot_column')" << std::endl;
+    std::cerr << "--standard, --twist, --chunk, --row  --  selects a reduction algorithm (default is '--chunk')" << std::endl;
 }
 
 void print_help_and_exit() {
@@ -53,7 +53,7 @@ void print_help_and_exit() {
 }
 
 void parse_command_line( int argc, char** argv, bool& use_binary, Representation_type& rep_type, Algorithm_type& reduction,
-                         std::string& input_filename, std::string& output_filename, bool& verbose, bool& dualize ) {
+                         std::string& input_filename, std::string& output_filename, bool& verbose, bool& dualize) {
 
     if( argc < 3 ) print_help_and_exit();
 
@@ -66,11 +66,11 @@ void parse_command_line( int argc, char** argv, bool& use_binary, Representation
         if( option == "--ascii" ) use_binary = false;
         else if( option == "--binary" ) use_binary = true;
         else if( option == "--dualize" ) dualize = true;
-        else if( option == "--vec-vec" ) rep_type = VEC_VEC;
-        else if( option == "--vec-set" ) rep_type = VEC_SET;
-        else if( option == "--full-pivot" )  rep_type = FULL_PIVOT;
-        else if( option == "--bit-tree-pivot" )  rep_type = BIT_TREE_PIVOT;
-        else if( option == "--sparse-pivot" ) rep_type = SPARSE_PIVOT;
+        else if( option == "--vector_vector" ) rep_type = VECTOR_VECTOR;
+        else if( option == "--vector_set" ) rep_type = VECTOR_SET;
+        else if( option == "--full_pivot_column" )  rep_type = FULL_PIVOT_COLUMN;
+		else if( option == "--bit_tree_pivot_column" )  rep_type = BIT_TREE_PIVOT_COLUMN;
+        else if( option == "--sparse_pivot_column" ) rep_type = SPARSE_PIVOT_COLUMN;
         else if( option == "--standard" ) reduction = STANDARD;
         else if( option == "--twist" ) reduction = TWIST;
         else if( option == "--row" ) reduction = ROW;
@@ -86,8 +86,8 @@ void parse_command_line( int argc, char** argv, bool& use_binary, Representation
 template<typename Representation, typename Algorithm>
 void generic_compute_pairing( std::string input_filename,
                               std::string output_filename,
-                              bool use_binary,
-                              bool verbose,
+	                          bool use_binary,
+	                          bool verbose,
                               bool dualize ) {
 
     phat::boundary_matrix< Representation > matrix;
@@ -95,7 +95,7 @@ void generic_compute_pairing( std::string input_filename,
 
     double read_timer = omp_get_wtime();
     if( use_binary ) {
-        LOG( "Reading input file " << input_filename << " in binary mode" )
+	    LOG( "Reading input file " << input_filename << " in binary mode" )
         read_successful = matrix.load_binary( input_filename );
     } else {
         LOG( "Reading input file " << input_filename << " in ascii mode" )
@@ -134,8 +134,8 @@ void generic_compute_pairing( std::string input_filename,
 int main( int argc, char** argv )
 {
     bool use_binary = true; // interpret input as binary or ascii file
-    Representation_type rep_type = SPARSE_PIVOT; // representation class
-    Algorithm_type reduction = TWIST; // reduction algorithm
+	Representation_type rep_type = BIT_TREE_PIVOT_COLUMN; // representation class
+	Algorithm_type reduction = CHUNK; // reduction algorithm
     std::string input_filename; // name of file that contains the boundary matrix
     std::string output_filename; // name of file that will contain the persistence pairs
     bool verbose = false; // print timings / info
@@ -144,35 +144,35 @@ int main( int argc, char** argv )
     parse_command_line( argc, argv, use_binary, rep_type, reduction, input_filename, output_filename, verbose, dualize );
 
     switch( rep_type ) {
-    case VEC_VEC:       switch( reduction ) {
+    case VECTOR_VECTOR:       switch( reduction ) {
                         case STANDARD: CALL_GENERIC_CODE(phat::vector_vector, phat::standard_reduction) break;
                         case TWIST: CALL_GENERIC_CODE(phat::vector_vector, phat::twist_reduction) break;
                         case ROW: CALL_GENERIC_CODE(phat::vector_vector, phat::row_reduction) break;
                         case CHUNK: CALL_GENERIC_CODE(phat::vector_vector, phat::chunk_reduction) break;
                         } break;
 
-    case VEC_SET:       switch( reduction ) {
+    case VECTOR_SET:       switch( reduction ) {
                         case STANDARD: CALL_GENERIC_CODE(phat::vector_set, phat::standard_reduction) break;
                         case TWIST: CALL_GENERIC_CODE(phat::vector_set, phat::twist_reduction) break;
                         case ROW: CALL_GENERIC_CODE(phat::vector_set, phat::row_reduction) break;
                         case CHUNK: CALL_GENERIC_CODE(phat::vector_set, phat::chunk_reduction) break;
                         } break;
 
-    case FULL_PIVOT:    switch( reduction ) {
+    case FULL_PIVOT_COLUMN:    switch( reduction ) {
                         case STANDARD: CALL_GENERIC_CODE(phat::full_pivot_column, phat::standard_reduction) break;
                         case TWIST: CALL_GENERIC_CODE(phat::full_pivot_column, phat::twist_reduction) break;
                         case ROW: CALL_GENERIC_CODE(phat::full_pivot_column, phat::row_reduction) break;
                         case CHUNK: CALL_GENERIC_CODE(phat::full_pivot_column, phat::chunk_reduction) break;
                         } break;
 
-    case BIT_TREE_PIVOT:  switch( reduction ) {
-                        case STANDARD: CALL_GENERIC_CODE(phat::bit_tree_pivot_column, phat::standard_reduction) break;
+	case BIT_TREE_PIVOT_COLUMN:  switch( reduction ) {
+						case STANDARD: CALL_GENERIC_CODE(phat::bit_tree_pivot_column, phat::standard_reduction) break;
                         case TWIST: CALL_GENERIC_CODE(phat::bit_tree_pivot_column, phat::twist_reduction) break;
                         case ROW: CALL_GENERIC_CODE(phat::bit_tree_pivot_column, phat::row_reduction) break;
                         case CHUNK: CALL_GENERIC_CODE(phat::bit_tree_pivot_column, phat::chunk_reduction) break;
                         } break;
 
-    case SPARSE_PIVOT:  switch( reduction ) {
+    case SPARSE_PIVOT_COLUMN:  switch( reduction ) {
                         case STANDARD: CALL_GENERIC_CODE(phat::sparse_pivot_column, phat::standard_reduction) break;
                         case TWIST: CALL_GENERIC_CODE(phat::sparse_pivot_column, phat::twist_reduction) break;
                         case ROW: CALL_GENERIC_CODE(phat::sparse_pivot_column, phat::row_reduction) break;
author	hub.wag@gmail.com <hub.wag@gmail.com@8e3bb3c2-eed4-f18f-5264-0b6c94e6926d>	2013-04-12 00:04:08 +0000
committer	hub.wag@gmail.com <hub.wag@gmail.com@8e3bb3c2-eed4-f18f-5264-0b6c94e6926d>	2013-04-12 00:04:08 +0000
commit	b66953101a196371faf55effb37fd2928832e8e5 (patch)
tree	46e7427c8e06699618fb8e52bfcfd739c5d3998f
parent	0c0078181f915b9224de69fd52842a77c40ea96c (diff)