From 816b21f9df89788669b15369c42b1c03eca39461 Mon Sep 17 00:00:00 2001 From: Bryn Keller Date: Thu, 10 Mar 2016 16:04:37 -0800 Subject: Simplified, more Pythonic API, with additional documentation --- python/_phat.cpp | 3 +- python/phat.py | 318 ++++++++++++++++++++++++++++++++++++------- python/src/self_test.py | 16 +-- python/src/simple_example.py | 42 +++--- 4 files changed, 294 insertions(+), 85 deletions(-) diff --git a/python/_phat.cpp b/python/_phat.cpp index c43a3f5..40a9421 100644 --- a/python/_phat.cpp +++ b/python/_phat.cpp @@ -119,8 +119,7 @@ void wrap_boundary_matrix(py::module &mod, const std::string &representation_suf //Note that boundary_matrix dimensions are not normal matrix dimensions, //They refer to the dimension of the simplex stored in the given column. .def("get_dim", &mat::get_dim, - "Get the dimension list for this boundary matrix. " - "The dimension list shows the dimension for each column in the matrix") + "Get the dimension for the requested column.") .def("set_dim", &mat::set_dim, "Set the dimension for a single column", py::arg("index"), py::arg("dimension")) //The `set_dims` method is an example of making the data structure easier to use diff --git a/python/phat.py b/python/phat.py index 7cc0ae2..6537b26 100644 --- a/python/phat.py +++ b/python/phat.py @@ -1,20 +1,75 @@ -import _phat -import enum +"""Bindings for the Persistent Homology Algorithm Toolbox -from _phat import persistence_pairs +PHAT is a tool for algebraic topology. It can be used via phat.py to compute +persistent (co)homology from boundary matrices, using various reduction +algorithms and column data representations. -__all__ = ['boundary_matrix', - 'persistence_pairs', - 'compute_persistence_pairs', - 'compute_persistence_pairs_dualized'] +Here is a simple example of usage. +We will build an ordered boundary matrix of this simplicial complex consisting of a single triangle: -"""Bindings for the Persistent Homology Algorithm Toolbox + 3 + |\\ + | \\ + | \\ + | \\ 4 + 5| \\ + | \\ + | 6 \\ + | \\ + |________\\ + 0 2 1 + +Now the code:: + import phat -Please see https://bitbucket.org/phat-code/phat for more information. + # define a boundary matrix with the chosen internal representation + boundary_matrix = phat.boundary_matrix(representation = phat.representations.vector_vector) + + # set the respective columns -- (dimension, boundary) pairs + boundary_matrix.columns = [ (0, []), + (0, []), + (1, [0,1]), + (0, []), + (1, [1,3]), + (1, [0,3]), + (2, [2,4,5])] + + # or equivalently, boundary_matrix = phat.boundary_matrix(representation = ..., columns = ...) + # would combine the creation of the matrix and the assignment of the columns + + # print some information of the boundary matrix: + print("\nThe boundary matrix has %d columns:" % len(boundary_matrix.columns)) + for col in boundary_matrix.columns: + s = "Column %d represents a cell of dimension %d." % (col.index, col.dimension) + if (col.boundary): + s = s + " Its boundary consists of the cells " + " ".join([str(c) for c in col.boundary]) + print(s) + print("Overall, the boundary matrix has %d entries." % len(boundary_matrix)) + + pairs = boundary_matrix.compute_persistence_pairs() + + pairs.sort() + + print("\nThere are %d persistence pairs: " % len(pairs)) + for pair in pairs: + print("Birth: %d, Death: %d" % pair) + +Please see https://bitbucket.org/phat-code/phat/python for more information. """ +import _phat +import enum + +from _phat import persistence_pairs + +#The public API for the module + +__all__ = ['boundary_matrix', + 'persistence_pairs', + 'representations', + 'reductions'] class representations(enum.Enum): """Available representations for internal storage of columns in @@ -29,60 +84,221 @@ class representations(enum.Enum): vector_list = 7 class reductions(enum.Enum): - "Available reduction algorithms" + """Available reduction algorithms""" twist_reduction = 1 chunk_reduction = 2 standard_reduction = 3 row_reduction = 4 spectral_sequence_reduction = 5 -def __short_name(name): - return "".join([n[0] for n in name.split("_")]) +class column: + """A view on one column of data in a boundary matrix""" + def __init__(self, matrix, index): + """INTERNAL. Columns are created automatically by boundary matrices. + There is no need to construct them directly""" + self._matrix = matrix + self._index = index -def convert(source, to_representation): - """Internal - function to convert from one `boundary_matrix` implementation to another""" - class_name = source.__class__.__name__ - source_rep_short_name = class_name[len('boundary_matrix_'):] - to_rep_short_name = __short_name(to_representation.name) - function = getattr(_phat, "convert_%s_to_%s" % (source_rep_short_name, to_rep_short_name)) - return function(source) + @property + def index(self): + """The 0-based index of this column in its boundary matrix""" + return self._index + + @property + def dimension(self): + """The dimension of the column (0 = point, 1 = line, 2 = triangle, etc.)""" + return self._matrix._matrix.get_dim(self._index) + + @dimension.setter + def dimension(self, value): + return self._matrix._matrix.set_dim(self._index, value) -def boundary_matrix(representation = representations.bit_tree_pivot_column, source = None): - """Returns an instance of a `boundary_matrix` class. - The boundary matrix will use the specified implementation for storing its column data. - If the `source` parameter is specified, it will be assumed to be another boundary matrix, - whose data should be copied into the new matrix. + @property + def boundary(self): + """The boundary values in this column, i.e. the other columns that this column is bounded by""" + return self._matrix._matrix.get_col(self._index) + + @boundary.setter + def boundary(self, values): + return self._matrix._matrix.set_col(self._index, values) + +class boundary_matrix: + """Boundary matrices that store the shape information of a cell complex. """ - if source: - return convert(source, representation) - else: - class_name = representation.name - short_name = __short_name(class_name) - function = getattr(_phat, "boundary_matrix_" + short_name) - return function() - -def compute_persistence_pairs(matrix, - reduction = reductions.twist_reduction): - """Computes persistence pairs (birth, death) for the given boundary matrix.""" - class_name = matrix.__class__.__name__ - representation_short_name = class_name[len('boundary_matrix_'):] - algo_name = reduction.name - algo_short_name = __short_name(algo_name) - function = getattr(_phat, "compute_persistence_pairs_" + representation_short_name + "_" + algo_short_name) - return function(matrix) - -def compute_persistence_pairs_dualized(matrix, - reduction = reductions.twist_reduction): - """Computes persistence pairs (birth, death) from the dualized form of the given boundary matrix.""" - class_name = matrix.__class__.__name__ - representation_short_name = class_name[len('boundary_matrix_'):] - algo_name = reduction.name - algo_short_name = __short_name(algo_name) - function = getattr(_phat, "compute_persistence_pairs_dualized_" + representation_short_name + "_" + algo_short_name) - return function(matrix) + def __init__(self, representation = representations.bit_tree_pivot_column, source = None, columns = None): + """ + The boundary matrix will use the specified implementation for storing its + column data. If the `source` parameter is specified, it will be assumed to + be another boundary matrix, whose data should be copied into the new + matrix. + + Parameters + ---------- + + representation : phat.representation, optional + The type of column storage to use in the requested boundary matrix. + source : phat.boundary_matrix, optional + If provided, creates the requested matrix as a copy of the data and dimensions + in `source`. + columns : column list, or list of (dimension, boundary) tuples, optional + If provided, loads these columns into the new boundary matrix. Note that + columns will be loaded in the order given, not according to their ``index`` properties. + Returns + ------- + matrix : boundary_matrix + """ + self._representation = representation + if source: + self._matrix = _convert(source, representation) + else: + self._matrix = self.__matrix_for_representation(representation)() + if columns: + self.columns = columns + + @property + def columns(self): + """A collection of column objects""" + return [column(self, i) for i in range(self._matrix.get_num_cols())] + + @columns.setter + def columns(self, columns): + for col in columns: + if not (isinstance(col, column) or isinstance(col, tuple)): + raise TypeError("All columns must be column objects, or (dimension, values) tuples") + if len(columns) != len(self.dimensions): + self._matrix.set_dims([0] * len(columns)) + for i, col in enumerate(columns): + if isinstance(col, column): + self._matrix.set_dim(i, col.dimension) + self._matrix.set_col(i, col.boundary) + else: + dimension, values = col + self._matrix.set_dim(i, dimension) + self._matrix.set_col(i, values) + + @property + def dimensions(self): + """A collection of dimensions, equivalent to [c.dimension for c in self.columns]""" + return [self.get_dim(i) for i in range(self._matrix.get_num_cols())] + + @dimensions.setter + def dimensions(self, dimensions): + return self._matrix.set_dims(dimensions) + + def __matrix_for_representation(self, representation): + short_name = _short_name(representation.name) + return getattr(_phat, "boundary_matrix_" + short_name) + + def __eq__(self, other): + return self._matrix == other._matrix + + def __len__(self): + return self._matrix.get_num_entries() + + #Pickle support + def __getstate__(self): + (dimensions, columns) = self._matrix.get_vector_vector() + return (self._representation, dimensions, columns) + + #Pickle support + def __setstate__(self, state): + presentation, dimensions, columns = state + self._representation = representation + self._matrix = self.__matrix_for_representation(representation) + self._matrix.set_vector_vector(dimensions, columns) + + def load(self, file_name, mode = 'b'): + """Load this boundary matrix from a file + + Parameters + ---------- + + file_name : string + The file name to load + + mode : string, optional (defaults to 'b') + The mode ('b' for binary, 't' for text) to use for working with the file + + Returns + ------- + + success : bool + + """ + if mode == 'b': + return self._matrix.load_binary(file_name) + elif mode == 't': + return self._matrix.load_ascii(file_name) + else: + raise ValueError("Only 'b' - binary and 't' - text modes are supported") + + def save(self, file_name, mode = 'b'): + """Save this boundary matrix to a file + + Parameters + ---------- + + file_name : string + The file name to load + + mode : string, optional (defaults to 'b') + The mode ('b' for binary, 't' for text) to use for working with the file + + Returns + ------- + + success : bool + + """ + if mode == 'b': + return self._matrix.save_binary(file_name) + elif mode == 't': + return self._matrix.save_ascii(file_name) + else: + raise ValueError("Only 'b' - binary and 't' - text modes are supported") + + def compute_persistence_pairs(self, + reduction = reductions.twist_reduction): + """Computes persistence pairs (birth, death) for the given boundary matrix.""" + representation_short_name = _short_name(self._representation.name) + algo_name = reduction.name + algo_short_name = _short_name(algo_name) + #Look up an implementation that matches the requested characteristics + #in the _phat module + function = getattr(_phat, "compute_persistence_pairs_" + representation_short_name + "_" + algo_short_name) + return function(self._matrix) + + def compute_persistence_pairs_dualized(self, + reduction = reductions.twist_reduction): + """Computes persistence pairs (birth, death) from the dualized form of the given boundary matrix.""" + representation_short_name = _short_name(self._representation.name) + algo_name = reduction.name + algo_short_name = _short_name(algo_name) + #Look up an implementation that matches the requested characteristics + #in the _phat module + function = getattr(_phat, "compute_persistence_pairs_dualized_" + representation_short_name + "_" + algo_short_name) + return function(self._matrix) + + def convert(self, representation): + """Copy this matrix to another with a different representation""" + return boundary_matrix(representation, self) + +def _short_name(name): + """An internal API that takes leading characters from words + For instance, 'bit_tree_pivot_column' becomes 'btpc' + """ + return "".join([n[0] for n in name.split("_")]) + +def _convert(source, to_representation): + """Internal - function to convert from one `boundary_matrix` implementation to another""" + class_name = source._representation.name + source_rep_short_name = _short_name(class_name) + to_rep_short_name = _short_name(to_representation.name) + function = getattr(_phat, "convert_%s_to_%s" % (source_rep_short_name, to_rep_short_name)) + return function(source._matrix) diff --git a/python/src/self_test.py b/python/src/self_test.py index c8174fb..3f85fc1 100644 --- a/python/src/self_test.py +++ b/python/src/self_test.py @@ -8,15 +8,14 @@ if __name__=='__main__': boundary_matrix = phat.boundary_matrix() # This is broken for some reason - if not boundary_matrix.load_binary(test_data): - # if not boundary_matrix.load_ascii(test_data): + if not boundary_matrix.load(test_data): print("Error: test data %s not found!" % test_data) sys.exit(1) error = False def compute_chunked(mat): - return phat.compute_persistence_pairs(mat, phat.reductions.chunk_reduction) + return mat.compute_persistence_pairs(phat.reductions.chunk_reduction) print("Comparing representations using Chunk algorithm ...") print("Running Chunk - Sparse ...") @@ -88,7 +87,8 @@ if __name__=='__main__': reps = phat.representations reds = phat.reductions - pairs = phat.compute_persistence_pairs + def pairs(mat, red): + return mat.compute_persistence_pairs(red) twist_boundary_matrix = bit_tree_mat() twist_pairs = pairs(twist_boundary_matrix, reds.twist_reduction) @@ -132,10 +132,10 @@ if __name__=='__main__': print("Comparing primal and dual approach using Chunk - Full ...") primal_boundary_matrix = phat.boundary_matrix(reps.full_pivot_column, boundary_matrix) - primal_pairs = phat.compute_persistence_pairs(primal_boundary_matrix, reds.chunk_reduction) + primal_pairs = primal_boundary_matrix.compute_persistence_pairs(reds.chunk_reduction) dual_boundary_matrix = phat.boundary_matrix(reps.full_pivot_column, boundary_matrix) - dual_pairs = phat.compute_persistence_pairs_dualized(dual_boundary_matrix) + dual_pairs = dual_boundary_matrix.compute_persistence_pairs_dualized() if primal_pairs != dual_pairs: print("Error: primal and dual differ!", file=sys.stderr) @@ -149,11 +149,9 @@ if __name__=='__main__': print("Testing vector interface ...") - (vector_vector_matrix, vector_dims) = boundary_matrix.get_vector_vector() - vector_vector_boundary_matrix = phat.boundary_matrix(phat.representations.bit_tree_pivot_column) - vector_vector_boundary_matrix.load_vector_vector(vector_vector_matrix, vector_dims) + vector_vector_boundary_matrix.columns = boundary_matrix.columns if vector_vector_boundary_matrix != boundary_matrix: print("Error: [load|save]_vector_vector bug", file=sys.stderr) diff --git a/python/src/simple_example.py b/python/src/simple_example.py index 82cf6be..955e213 100644 --- a/python/src/simple_example.py +++ b/python/src/simple_example.py @@ -21,39 +21,35 @@ if __name__ == "__main__": import phat - # set the dimension of the cell that each column represents: - dimensions = [0, 0, 1, 0, 1, 1, 2] - # define a boundary matrix with the chosen internal representation boundary_matrix = phat.boundary_matrix(representation = phat.representations.vector_vector) - # set the respective columns -- the columns entries have to be sorted - boundary_matrix.set_dims(dimensions) - boundary_matrix.set_col(0, []) - boundary_matrix.set_col(1, []) - boundary_matrix.set_col(2, [0,1]) - boundary_matrix.set_col(3, []) - boundary_matrix.set_col(4, [1,3]) - boundary_matrix.set_col(5, [0,3]) - boundary_matrix.set_col(6, [2,4,5]) + # set the respective columns -- (dimension, boundary) pairs + boundary_matrix.columns = [ (0, []), + (0, []), + (1, [0,1]), + (0, []), + (1, [1,3]), + (1, [0,3]), + (2, [2,4,5])] + + # or equivalently, boundary_matrix = phat.boundary_matrix(representation = ..., columns = ...) + # would combine the creation of the matrix and the assignment of the columns # print some information of the boundary matrix: - print() - print("The boundary matrix has %d columns:" % boundary_matrix.get_num_cols()) - for col_idx in range(boundary_matrix.get_num_cols()): - s = "Column %d represents a cell of dimension %d." % (col_idx, boundary_matrix.get_dim(col_idx)) - if (not boundary_matrix.is_empty(col_idx)): - s = s + " Its boundary consists of the cells " + " ".join([str(c) for c in boundary_matrix.get_col(col_idx)]) + print("\nThe boundary matrix has %d columns:" % len(boundary_matrix.columns)) + for col in boundary_matrix.columns: + s = "Column %d represents a cell of dimension %d." % (col.index, col.dimension) + if (col.boundary): + s = s + " Its boundary consists of the cells " + " ".join([str(c) for c in col.boundary]) print(s) - print("Overall, the boundary matrix has %d entries." % boundary_matrix.get_num_entries()) + print("Overall, the boundary matrix has %d entries." % len(boundary_matrix)) - pairs = phat.compute_persistence_pairs(boundary_matrix) + pairs = boundary_matrix.compute_persistence_pairs() pairs.sort() - print() - - print("There are %d persistence pairs: " % len(pairs)) + print("\nThere are %d persistence pairs: " % len(pairs)) for pair in pairs: print("Birth: %d, Death: %d" % pair) -- cgit v1.2.3