diff options
Diffstat (limited to 'python')
-rw-r--r-- | python/.gitignore | 7 | ||||
-rw-r--r-- | python/MANIFEST.in | 1 | ||||
-rw-r--r-- | python/README.rst | 142 | ||||
-rw-r--r-- | python/_phat.cpp | 291 | ||||
-rw-r--r-- | python/phat.py | 321 | ||||
-rw-r--r-- | python/setup.cfg | 5 | ||||
-rw-r--r-- | python/src/self_test.py | 166 | ||||
-rw-r--r-- | python/src/simple_example.py | 55 |
8 files changed, 988 insertions, 0 deletions
diff --git a/python/.gitignore b/python/.gitignore new file mode 100644 index 0000000..b576037 --- /dev/null +++ b/python/.gitignore @@ -0,0 +1,7 @@ +__pycache__ +*.so +build +dist +old +phat.egg-info +pybind11 diff --git a/python/MANIFEST.in b/python/MANIFEST.in new file mode 100644 index 0000000..dba0deb --- /dev/null +++ b/python/MANIFEST.in @@ -0,0 +1 @@ +recursive-include include *.h
\ No newline at end of file diff --git a/python/README.rst b/python/README.rst new file mode 100644 index 0000000..0c6c391 --- /dev/null +++ b/python/README.rst @@ -0,0 +1,142 @@ +Persistent Homology Algorithm Toolkit (PHAT) +============================================ + +This is a Python interface for the `Persistent Homology Algorithm Toolkit`_, a software library +that contains methods for computing the persistence pairs of a +filtered cell complex represented by an ordered boundary matrix with Z\ :sub:`2` coefficients. + +For an introduction to persistent homology, see the textbook [1]_. This software package +contains code for several algorithmic variants: + +* The "standard" algorithm (see [1]_, p.153) +* The "row" algorithm from [2]_ (called pHrow in that paper) +* The "twist" algorithm, as described in [3]_ (default algorithm) +* The "chunk" algorithm presented in [4]_ +* The "spectral sequence" algorithm (see [1]_, p.166) + +All but the standard algorithm exploit the special structure of the boundary matrix +to take shortcuts in the computation. The chunk and the spectral sequence algorithms +make use of multiple CPU cores if PHAT is compiled with OpenMP support. + +All algorithms are implemented as function objects that manipulate a given +``boundary_matrix`` (to be defined below) object to reduced form. +From this reduced form one can then easily extract the persistence pairs. +Alternatively, one can use the ``compute_persistence_pairs function`` which takes an +algorithm as a parameter, reduces the given ``boundary_matrix`` and stores the +resulting pairs in a given ``persistence_pairs`` object. + +The ``boundary_matrix`` class takes a "Representation" class as a parameter. +This representation defines how columns of the matrix are represented and how +low-level operations (e.g., column additions) are performed. The right choice of the +representation class can be as important for the performance of the program as choosing +the algorithm. We provide the following choices of representation classes: + +* ``vector_vector``: Each column is represented as a sorted ``std::vector`` of integers, containing the indices of the non-zero entries of the column. The matrix itself is a ``std::vector`` of such columns. +* ``vector_heap``: Each column is represented as a heapified ``std::vector`` of integers, containing the indices of the non-zero entries of the column. The matrix itself is a ``std::vector`` of such columns. +* ``vector_set``: Each column is a ``std::set`` of integers, with the same meaning as above. The matrix is stored as a ``std::vector`` of such columns. +* ``vector_list``: Each column is a sorted ``std::list`` of integers, with the same meaning as above. The matrix is stored as a ``std::vector`` of such columns. +* ``sparse_pivot_column``: The matrix is stored as in the vector_vector representation. However, when a column is manipulated, it is first converted into a ``std::set``, using an extra data field called the "pivot column". When another column is manipulated later, the pivot column is converted back to the ``std::vector`` representation. This can lead to significant speed improvements when many columns are added to a given pivot column consecutively. In a multicore setup, there is one pivot column per thread. +* ``heap_pivot_column``: The same idea as in the sparse version. Instead of a ``std::set``, the pivot column is represented by a ``std::priority_queue``. +* ``full_pivot_column``: The same idea as in the sparse version. However, instead of a ``std::set``, the pivot column is expanded into a bit vector of size n (the dimension of the matrix). To avoid costly initializations, the class remembers which entries have been manipulated for a pivot column and updates only those entries when another column becomes the pivot. +* ``bit_tree_pivot_column`` (default representation): Similar to the ``full_pivot_column`` but the implementation is more efficient. Internally it is a bit-set with fast iteration over nonzero elements, and fast access to the maximal element. + +Installation +------------ + +If you wish to use the released version of PHAT, you can simply install from PyPI:: + + pip install phat + +Installation from Source +------------------------ +Suppose you have checked out the PHAT repository at location $PHAT. Then you can:: + + cd $PHAT + + pip install . + +This will install PHAT for whatever Python installation your ``pip`` executable is associated with. +Please ensure you use the ``pip`` that comes from the same directory where your ``python`` executable lives! + +Currently, the PHAT Python bindings are known to work on: + +* Linux with Python 2.7 (tested on Ubuntu 14.04 with system Python) +* Linux with Python 3.5 (tested on Ubuntu 14.04 with Anaconda) +* Mac OS X with Python 2.7.12 (tested on Sierra with homebrew) +* Mac OS X with Python 3.5 (tested on Sierra with homebrew) + +Other configurations are untested. + +Please note that this package DOES NOT work with the Python 2.7.10 that ships with the operating +system in Mac OS X. These words of wisdom from `python.org`_ are worth heeding: + + The version of Python that ships with OS X is great for learning but it’s not good for development. + The version shipped with OS X may be out of date from the official current Python release, + which is considered the stable production version. + +We recommend installing Python on Mac OS X using either homebrew or Anaconda, according to your taste. + +Please let us know if there is a platform you'd like us to support, we will do so if we can. + +Sample usage +------------ + +We will build an ordered boundary matrix of this simplicial complex consisting of a single triangle:: + + 3 + |\\ + | \\ + | \\ + | \\ 4 + 5| \\ + | \\ + | 6 \\ + | \\ + |________\\ + 0 2 1 + +Now the code:: + + import phat + + # define a boundary matrix with the chosen internal representation + boundary_matrix = phat.boundary_matrix(representation = phat.representations.vector_vector) + + # set the respective columns -- (dimension, boundary) pairs + boundary_matrix.columns = [ (0, []), + (0, []), + (1, [0,1]), + (0, []), + (1, [1,3]), + (1, [0,3]), + (2, [2,4,5])] + + # or equivalently, boundary_matrix = phat.boundary_matrix(representation = ..., columns = ...) + # would combine the creation of the matrix and the assignment of the columns + + # print some information of the boundary matrix: + print("\nThe boundary matrix has %d columns:" % len(boundary_matrix.columns)) + for col in boundary_matrix.columns: + s = "Column %d represents a cell of dimension %d." % (col.index, col.dimension) + if (col.boundary): + s = s + " Its boundary consists of the cells " + " ".join([str(c) for c in col.boundary]) + print(s) + print("Overall, the boundary matrix has %d entries." % len(boundary_matrix)) + + pairs = boundary_matrix.compute_persistence_pairs() + + pairs.sort() + + print("\nThere are %d persistence pairs: " % len(pairs)) + for pair in pairs: + print("Birth: %d, Death: %d" % pair) + +References: + +.. [1] H.Edelsbrunner, J.Harer: Computational Topology, An Introduction. American Mathematical Society, 2010, ISBN 0-8218-4925-5 +.. [2] V.de Silva, D.Morozov, M.Vejdemo-Johansson: Dualities in persistent (co)homology. Inverse Problems 27, 2011 +.. [3] C.Chen, M.Kerber: Persistent Homology Computation With a Twist. 27th European Workshop on Computational Geometry, 2011. +.. [4] U.Bauer, M.Kerber, J.Reininghaus: Clear and Compress: Computing Persistent Homology in Chunks. arXiv:1303.0477_ +.. _arXiv:1303.0477: http://arxiv.org/pdf/1303.0477.pdf +.. _`Persistent Homology Algorithm Toolkit`: https://bitbucket.org/phat/phat-code +.. _`python.org`:http://docs.python-guide.org/en/latest/starting/install/osx/ diff --git a/python/_phat.cpp b/python/_phat.cpp new file mode 100644 index 0000000..df7449d --- /dev/null +++ b/python/_phat.cpp @@ -0,0 +1,291 @@ +//Required header for using pybind11 +#include <pybind11/pybind11.h> + +//Automatic conversions of stl containers to Python ones +#include <pybind11/stl.h> + +//Additional support for operators and numpy +#include <pybind11/operators.h> +#include <pybind11/numpy.h> + +//All the things we're going to wrap +#include "phat/persistence_pairs.h" +#include "phat/compute_persistence_pairs.h" +#include "phat/boundary_matrix.h" +#include "phat/representations/abstract_pivot_column.h" +#include <phat/representations/vector_vector.h> +#include <phat/representations/vector_heap.h> +#include <phat/representations/vector_set.h> +#include <phat/representations/vector_list.h> +#include <phat/representations/sparse_pivot_column.h> +#include <phat/representations/heap_pivot_column.h> +#include <phat/representations/full_pivot_column.h> +#include <phat/representations/bit_tree_pivot_column.h> +#include <phat/algorithms/twist_reduction.h> +#include <phat/algorithms/standard_reduction.h> +#include <phat/algorithms/row_reduction.h> +#include <phat/algorithms/chunk_reduction.h> +#include <phat/algorithms/spectral_sequence_reduction.h> + +namespace py = pybind11; + +//## Some template functions we'll need later + +// This function defines two Python functions in the extension module, that are named +// `compute_persistence_pairs_${rep}_${reduction}` +// `compute_persistence_pairs_dualized_${rep}_${reductionx}`. +// +// The Python user will never see these, since we will use (in phat.py) the type of the +// boundary matrix and the requested reduction to dispatch to the correct function +// required. +// +// These functions are the main operations of PHAT. In the Python version, they take +// a boundary matrix, and return a persistence_pairs object. +// +// `Reduction` will be an algorithm, `Representation` is a type that controls +// how the boundary matrix stores its internal state. +// +// We will be using this function to define these two functions for every combination +// of `Representation` and `Reduction` that PHAT supports. +template <typename Reduction, typename Representation> +void define_compute_persistence(py::module &mod, + const std::string &representation_suffix, + const std::string &reduction_suffix) { + + auto suffix = representation_suffix + std::string("_") + reduction_suffix; + + //We don't annotate these with doc strings or py::args because + //they are only used internally by code in phat.py + mod.def((std::string("compute_persistence_pairs_") + suffix).c_str(), + [](phat::boundary_matrix<Representation> &matrix){ + phat::persistence_pairs pairs; + phat::compute_persistence_pairs<Reduction>(pairs, matrix); + return pairs; + }); + mod.def((std::string("compute_persistence_pairs_dualized_") + suffix).c_str(), + [](phat::boundary_matrix<Representation> &matrix){ + phat::persistence_pairs pairs; + phat::compute_persistence_pairs_dualized<Reduction>(pairs, matrix); + return pairs; + }); +} + +// Define a function to convert a `boundary_matrix` with one internal representation to a +// `boundary_matrix` with a different internal representation. Like with define_compute_persistence, +// the user will never see this function, but it is used internally by phat.py. +template <typename SelfRep, typename OtherRep> +void define_converter(py::module &mod, const std::string &self_suffix, const std::string &other_suffix) { + //We don't annotate these with doc strings or py::args because + //they are only used internally by code in phat.py + mod.def((std::string("convert_") + other_suffix + "_to_" + self_suffix).c_str(), + [](phat::boundary_matrix<OtherRep> &other) { + return phat::boundary_matrix<SelfRep>(other); + }); +} + +// Creates a Python class for a `boundary_matrix<T>`. Boundary matrices are one of two important types +// used by PHAT. +template<class T> +void wrap_boundary_matrix(py::module &mod, const std::string &representation_suffix) { + + using mat = phat::boundary_matrix<T>; + + py::class_<mat>(mod, (std::string("boundary_matrix_") + representation_suffix).c_str()) + //Default no-args constructor + .def(py::init()) + //#### Loading and extracting data from Python lists + //Note we can use references to member functions (even templated ones) directly in many cases. + .def("load_vector_vector", + &mat::template load_vector_vector<phat::index, phat::dimension>, + "Load this instance with the given columns and dimensions", + py::arg("columns"), py::arg("dimensions")) + .def("get_vector_vector", [](mat &m) { + std::vector< std::vector< int > > vector_vector_matrix; + std::vector< int > vector_dims; + m.save_vector_vector( vector_vector_matrix, vector_dims ); + return std::tuple<std::vector<std::vector<int>>, std::vector<int>>(vector_vector_matrix, vector_dims); + }, + "Extract the data in the boundary matrix into a list of columns, and a list of dimensions that correspond to the columns") + //#### Loading and saving files + .def("load_binary", &mat::load_binary, + "Load this instance with data from a binary file") + .def("save_binary", &mat::save_binary, + "Save this instance to a binary file") + .def("load_ascii", &mat::load_ascii, + "Load this instance with data from a text file") + .def("save_ascii", &mat::save_ascii, + "Save this instance to a text file") + //#### Getting and setting dimensions + //Note that boundary_matrix dimensions are not normal matrix dimensions, + //They refer to the dimension of the simplex stored in the given column. + .def("get_dim", &mat::get_dim, + "Get the dimension for the requested column.") + .def("set_dim", &mat::set_dim, "Set the dimension for a single column", + py::arg("index"), py::arg("dimension")) + //The `set_dims` method is an example of making the data structure easier to use + //from Python. This is a method that doesn't exist in the C++ class, but we add it + //using a C++ lambda. This ability to enhance the binding using lambdas + //is an *extremely* handy tool. + .def("set_dims", [](mat &m, std::vector<phat::index> dims) { + m.set_num_cols(dims.size()); + for(size_t i = 0; i < dims.size(); i++) { + m.set_dim(i, dims[i]); + } + }, + "Set the dimension list for this boundary matrix", + py::arg("dimensions")) + + //#### \__eq__ + //The `boundary_matrix<T>`'s `operator==` is templated, which could make a Python wrapper + //very tricky indeed. Luckily, when we define multiple + //methods with the same name but different C++ types, pybind11 will create a Python method + //that chooses between them based on type tags that it manages. This is *also* extremely handy. + .def("__eq__", &mat::template operator==<phat::bit_tree_pivot_column>) + .def("__eq__", &mat::template operator==<phat::sparse_pivot_column>) + .def("__eq__", &mat::template operator==<phat::heap_pivot_column>) + .def("__eq__", &mat::template operator==<phat::full_pivot_column>) + .def("__eq__", &mat::template operator==<phat::vector_vector>) + .def("__eq__", &mat::template operator==<phat::vector_heap>) + .def("__eq__", &mat::template operator==<phat::vector_set>) + .def("__eq__", &mat::template operator==<phat::vector_list>) + + //Python 3.x can figure this out for itself, but Python 2.7 needs to be told: + .def("__ne__", &mat::template operator!=<phat::bit_tree_pivot_column>) + .def("__ne__", &mat::template operator!=<phat::sparse_pivot_column>) + .def("__ne__", &mat::template operator!=<phat::heap_pivot_column>) + .def("__ne__", &mat::template operator!=<phat::full_pivot_column>) + .def("__ne__", &mat::template operator!=<phat::vector_vector>) + .def("__ne__", &mat::template operator!=<phat::vector_heap>) + .def("__ne__", &mat::template operator!=<phat::vector_set>) + .def("__ne__", &mat::template operator!=<phat::vector_list>) + + //#### Data access + + // In `get_col`, since Python is garbage collected, the C++ idiom of passing in a collection + // to load doesn't make much sense. We can simply allocate an STL vector and + // return it. The pybind11 framework will take ownership and hook it into the + // Python reference counting system. + .def("get_col", [](mat &m, phat::index col_index) { + std::vector<phat::index> col; + m.get_col(col_index, col); + return col; + }, + "Extract a single column as a list", + py::arg("index")) + .def("set_col", &mat::set_col, + "Set the values for a given column", + py::arg("index"), py::arg("column")) + .def("get_num_cols", &mat::get_num_cols) + .def("is_empty", &mat::is_empty) + .def("get_num_entries", &mat::get_num_entries); + + //#### Compute persistence + // Define compute_persistence(_dualized) for all possible reductions. + define_compute_persistence<phat::standard_reduction, T>(mod, representation_suffix, std::string("sr")); + define_compute_persistence<phat::chunk_reduction, T>(mod, representation_suffix, std::string("cr")); + define_compute_persistence<phat::row_reduction, T>(mod, representation_suffix, std::string("rr")); + define_compute_persistence<phat::twist_reduction, T>(mod, representation_suffix, std::string("tr")); + define_compute_persistence<phat::spectral_sequence_reduction, T>(mod, representation_suffix, std::string("ssr")); + //#### Converters + //Define functions to convert from this kind of `boundary_matrix` to any of the other types + define_converter<T, phat::bit_tree_pivot_column>(mod, representation_suffix, std::string("btpc")); + define_converter<T, phat::sparse_pivot_column>(mod, representation_suffix, std::string("spc")); + define_converter<T, phat::heap_pivot_column>(mod, representation_suffix, std::string("hpc")); + define_converter<T, phat::full_pivot_column>(mod, representation_suffix, std::string("fpc")); + define_converter<T, phat::vector_vector>(mod, representation_suffix, std::string("vv")); + define_converter<T, phat::vector_heap>(mod, representation_suffix, std::string("vh")); + define_converter<T, phat::vector_set>(mod, representation_suffix, std:: string("vs")); + define_converter<T, phat::vector_list>(mod, representation_suffix, std::string("vl")); +} +//fix_index checks for out-of-bounds indexes, and converts negative indices to positive ones +//e.g. pairs[-1] => pairs[len(pairs) - 1] +phat::index fix_index(const phat::persistence_pairs &p, int index) { + //Note get_num_pairs returns type index, which is not unsigned, though it comes from + //std::vector.size, which is size_t. + phat::index pairs = p.get_num_pairs(); + assert(pairs > 0); + if (index < 0) { + index = pairs + index; + } + if ((index < 0) || static_cast<size_t>(index) >= static_cast<size_t>(pairs)) { + //pybind11 helpfully converts C++ exceptions into Python ones + throw py::index_error(); + } + return index; +} + +//Here we define the wrapper for the persistence_pairs class. Unlike `boundary_matrix`, this +//class is not templated, so is simpler to wrap. +void wrap_persistence_pairs(py::module &m) { + py::class_<phat::persistence_pairs>(m, "persistence_pairs") + //No-args constructor + .def(py::init()) + + //This is a method that takes two ints + .def("append_pair", + &phat::persistence_pairs::append_pair, + "Appends a single (birth, death) pair", + py::arg("birth"), py::arg("death")) + + //This is a method that takes two ints + .def("set_pair", + &phat::persistence_pairs::set_pair, + "Sets the (birth, death) pair at a given index", + py::arg("index"), py::arg("birth"), py::arg("death")) + + //#### Python collection support + .def("__len__", &phat::persistence_pairs::get_num_pairs) + // Unlike set_pair, this takes a Python 2-tuple + .def("__setitem__", + [](phat::persistence_pairs &p, int index, std::pair<phat::index,phat::index> pair) { + phat::index idx = fix_index(p, index); + p.set_pair(idx, pair.first, pair.second); + }) + // \__len\__ and \__getitem\__ together serve to make this a Python iterable + // so you can do `for i in pairs: blah`. A nicer way is to support \__iter\__, + // which we leave for future work. + .def("__getitem__", [](const phat::persistence_pairs &p, int index) { + phat::index idx = fix_index(p, index); + return p.get_pair(idx); + }) + .def("clear", &phat::persistence_pairs::clear, "Empties the collection") + .def("sort", &phat::persistence_pairs::sort, "Sort in place") + .def("__eq__", &phat::persistence_pairs::operator==) + .def("__ne__", [](phat::persistence_pairs &p, phat::persistence_pairs &other) { + return p != other; + }) + //#### File operations + .def("load_ascii", &phat::persistence_pairs::load_ascii, + "Load the contents of a text file into this instance") + .def("save_ascii", &phat::persistence_pairs::save_ascii, + "Save this instance to a text file") + .def("save_binary", &phat::persistence_pairs::save_binary, + "Save the contents of this instance to a binary file") + .def("load_binary", &phat::persistence_pairs::load_binary, + "Load the contents of a binary file into this instance"); +} + +//## Define the module +//This is where we actually define the `_phat` module. We'll also have a `phat` module that's written +//in Python, which will use `_phat` as an implementation detail. +PYBIND11_PLUGIN(_phat) { + //Create the module object. First arg is the name, second is the module docstring. + py::module m("_phat", "C++ wrapper for PHAT. Please use the phat module, not the _phat module"); + + //Wrap the `persistence_pairs` class + wrap_persistence_pairs(m); + + //#### Generate all the different representations of `boundary_matrix` + wrap_boundary_matrix<phat::bit_tree_pivot_column>(m, "btpc"); + wrap_boundary_matrix<phat::sparse_pivot_column>(m, "spc"); + wrap_boundary_matrix<phat::heap_pivot_column>(m, "hpc"); + wrap_boundary_matrix<phat::full_pivot_column>(m, "fpc"); + wrap_boundary_matrix<phat::vector_vector>(m, "vv"); + wrap_boundary_matrix<phat::vector_heap>(m, "vh"); + wrap_boundary_matrix<phat::vector_set>(m, "vs"); + wrap_boundary_matrix<phat::vector_list>(m, "vl"); + + //We're done! + return m.ptr(); + +} diff --git a/python/phat.py b/python/phat.py new file mode 100644 index 0000000..70f5b39 --- /dev/null +++ b/python/phat.py @@ -0,0 +1,321 @@ +"""Bindings for the Persistent Homology Algorithm Toolbox + +PHAT is a tool for algebraic topology. It can be used via phat.py to compute +persistent (co)homology from boundary matrices, using various reduction +algorithms and column data representations. + +Here is a simple example of usage. + +We will build an ordered boundary matrix of this simplicial complex consisting of a single triangle:: + + 3 + |\\ + | \\ + | \\ + | \\ 4 + 5| \\ + | \\ + | 6 \\ + | \\ + |________\\ + 0 2 1 + +Now the code:: + + import phat + + # define a boundary matrix with the chosen internal representation + boundary_matrix = phat.boundary_matrix(representation = phat.representations.vector_vector) + + # set the respective columns -- (dimension, boundary) pairs + boundary_matrix.columns = [ (0, []), + (0, []), + (1, [0,1]), + (0, []), + (1, [1,3]), + (1, [0,3]), + (2, [2,4,5])] + + # or equivalently, + # boundary_matrix = phat.boundary_matrix(representation = ..., + # columns = ...) + # would combine the creation of the matrix and + # the assignment of the columns + + # print some information of the boundary matrix: + print() + print("The boundary matrix has %d columns:" % len(boundary_matrix.columns)) + for col in boundary_matrix.columns: + s = "Column %d represents a cell of dimension %d." % (col.index, col.dimension) + if (col.boundary): + s = s + " Its boundary consists of the cells " + " ".join([str(c) for c in col.boundary]) + print(s) + print("Overall, the boundary matrix has %d entries." % len(boundary_matrix)) + + pairs = boundary_matrix.compute_persistence_pairs() + + pairs.sort() + + print() + print("There are %d persistence pairs: " % len(pairs)) + for pair in pairs: + print("Birth: %d, Death: %d" % pair) + + +Please see https://bitbucket.org/phat-code/phat/python for more information. +""" + +import _phat +import enum + +from _phat import persistence_pairs + +#The public API for the module + +__all__ = ['boundary_matrix', + 'persistence_pairs', + 'representations', + 'reductions'] + + +class representations(enum.Enum): + """Available representations for internal storage of columns in + a `boundary_matrix` + """ + bit_tree_pivot_column = 1 + sparse_pivot_column = 2 + full_pivot_column = 3 + vector_vector = 4 + vector_heap = 5 + vector_set = 6 + vector_list = 7 + + +class reductions(enum.Enum): + """Available reduction algorithms""" + twist_reduction = 1 + chunk_reduction = 2 + standard_reduction = 3 + row_reduction = 4 + spectral_sequence_reduction = 5 + + +class column(object): + """A view on one column of data in a boundary matrix""" + def __init__(self, matrix, index): + """INTERNAL. Columns are created automatically by boundary matrices. + There is no need to construct them directly""" + self._matrix = matrix + self._index = index + + @property + def index(self): + """The 0-based index of this column in its boundary matrix""" + return self._index + + @property + def dimension(self): + """The dimension of the column (0 = point, 1 = line, 2 = triangle, etc.)""" + return self._matrix._matrix.get_dim(self._index) + + @dimension.setter + def dimension(self, value): + return self._matrix._matrix.set_dim(self._index, value) + + @property + def boundary(self): + """The boundary values in this column, i.e. the other columns that this column is bounded by""" + return self._matrix._matrix.get_col(self._index) + + @boundary.setter + def boundary(self, values): + return self._matrix._matrix.set_col(self._index, values) + + def __str__(self): + return "(%d, %s)" % (self.dimension, self.boundary) + +class boundary_matrix(object): + """Boundary matrices that store the shape information of a cell complex. + """ + + def __init__(self, representation = representations.bit_tree_pivot_column, source = None, columns = None): + """ + The boundary matrix will use the specified implementation for storing its + column data. If the `source` parameter is specified, it will be assumed to + be another boundary matrix, whose data should be copied into the new + matrix. + + Parameters + ---------- + + representation : phat.representation, optional + The type of column storage to use in the requested boundary matrix. + source : phat.boundary_matrix, optional + If provided, creates the requested matrix as a copy of the data and dimensions + in `source`. + columns : column list, or list of (dimension, boundary) tuples, optional + If provided, loads these columns into the new boundary matrix. Note that + columns will be loaded in the order given, not according to their ``index`` properties. + + Returns + ------- + + matrix : boundary_matrix + """ + self._representation = representation + if source: + self._matrix = _convert(source, representation) + else: + self._matrix = self.__matrix_for_representation(representation)() + if columns: + self.columns = columns + + @property + def columns(self): + """A collection of column objects""" + return [column(self, i) for i in range(self._matrix.get_num_cols())] + + @columns.setter + def columns(self, columns): + for col in columns: + if not (isinstance(col, column) or isinstance(col, tuple)): + raise TypeError("All columns must be column objects, or (dimension, values) tuples") + if len(columns) != len(self.dimensions): + self._matrix.set_dims([0] * len(columns)) + for i, col in enumerate(columns): + if isinstance(col, column): + self._matrix.set_dim(i, col.dimension) + self._matrix.set_col(i, col.boundary) + else: + dimension, values = col + self._matrix.set_dim(i, dimension) + self._matrix.set_col(i, values) + + @property + def dimensions(self): + """A collection of dimensions, equivalent to [c.dimension for c in self.columns]""" + return [self._matrix.get_dim(i) for i in range(self._matrix.get_num_cols())] + + @dimensions.setter + def dimensions(self, dimensions): + return self._matrix.set_dims(dimensions) + + def __matrix_for_representation(self, representation): + short_name = _short_name(representation.name) + return getattr(_phat, "boundary_matrix_" + short_name) + + def __eq__(self, other): + return self._matrix == other._matrix + + #Note Python 2.7 needs BOTH __eq__ and __ne__ otherwise you get things that + #are both equal and not equal + def __ne__(self, other): + return self._matrix != other._matrix + + def __len__(self): + return self._matrix.get_num_entries() + + #Pickle support + def __getstate__(self): + (dimensions, columns) = self._matrix.get_vector_vector() + return (self._representation, dimensions, columns) + + #Pickle support + def __setstate__(self, state): + presentation, dimensions, columns = state + self._representation = representation + self._matrix = self.__matrix_for_representation(representation) + self._matrix.set_vector_vector(dimensions, columns) + + def load(self, file_name, mode = 'b'): + """Load this boundary matrix from a file + + Parameters + ---------- + + file_name : string + The file name to load + + mode : string, optional (defaults to 'b') + The mode ('b' for binary, 't' for text) to use for working with the file + + Returns + ------- + + success : bool + + """ + if mode == 'b': + return self._matrix.load_binary(file_name) + elif mode == 't': + return self._matrix.load_ascii(file_name) + else: + raise ValueError("Only 'b' - binary and 't' - text modes are supported") + + def save(self, file_name, mode = 'b'): + """Save this boundary matrix to a file + + Parameters + ---------- + + file_name : string + The file name to load + + mode : string, optional (defaults to 'b') + The mode ('b' for binary, 't' for text) to use for working with the file + + Returns + ------- + + success : bool + + """ + if mode == 'b': + return self._matrix.save_binary(file_name) + elif mode == 't': + return self._matrix.save_ascii(file_name) + else: + raise ValueError("Only 'b' - binary and 't' - text modes are supported") + + def compute_persistence_pairs(self, + reduction = reductions.twist_reduction): + """Computes persistence pairs (birth, death) for the given boundary matrix.""" + representation_short_name = _short_name(self._representation.name) + algo_name = reduction.name + algo_short_name = _short_name(algo_name) + #Look up an implementation that matches the requested characteristics + #in the _phat module + function = getattr(_phat, "compute_persistence_pairs_" + representation_short_name + "_" + algo_short_name) + return function(self._matrix) + + def compute_persistence_pairs_dualized(self, + reduction = reductions.twist_reduction): + """Computes persistence pairs (birth, death) from the dualized form of the given boundary matrix.""" + representation_short_name = _short_name(self._representation.name) + algo_name = reduction.name + algo_short_name = _short_name(algo_name) + #Look up an implementation that matches the requested characteristics + #in the _phat module + function = getattr(_phat, "compute_persistence_pairs_dualized_" + representation_short_name + "_" + algo_short_name) + return function(self._matrix) + + def convert(self, representation): + """Copy this matrix to another with a different representation""" + return boundary_matrix(representation, self) + +def _short_name(name): + """An internal API that takes leading characters from words + For instance, 'bit_tree_pivot_column' becomes 'btpc' + """ + return "".join([n[0] for n in name.split("_")]) + +def _convert(source, to_representation): + """Internal - function to convert from one `boundary_matrix` implementation to another""" + class_name = source._representation.name + source_rep_short_name = _short_name(class_name) + to_rep_short_name = _short_name(to_representation.name) + function = getattr(_phat, "convert_%s_to_%s" % (source_rep_short_name, to_rep_short_name)) + return function(source._matrix) + + + diff --git a/python/setup.cfg b/python/setup.cfg new file mode 100644 index 0000000..458847f --- /dev/null +++ b/python/setup.cfg @@ -0,0 +1,5 @@ +# [bdist_wheel] +# This flag says that the code is written to work on both Python 2 and Python +# 3. If at all possible, it is good practice to do this. If you cannot, you +# will need to generate wheels for each Python version that you support. +# universal=1
\ No newline at end of file diff --git a/python/src/self_test.py b/python/src/self_test.py new file mode 100644 index 0000000..8017387 --- /dev/null +++ b/python/src/self_test.py @@ -0,0 +1,166 @@ +from __future__ import print_function +import sys +import phat + +if __name__=='__main__': + test_data = (sys.argv[1:] and sys.argv[1]) or "../../examples/torus.bin" + + print("Reading test data %s in binary format ..." % test_data) + + boundary_matrix = phat.boundary_matrix() + # This is broken for some reason + if not boundary_matrix.load(test_data): + print("Error: test data %s not found!" % test_data) + sys.exit(1) + + error = False + + def compute_chunked(mat): + return mat.compute_persistence_pairs(phat.reductions.chunk_reduction) + + print("Comparing representations using Chunk algorithm ...") + print("Running Chunk - Sparse ...") + sparse_boundary_matrix = phat.boundary_matrix(phat.representations.sparse_pivot_column, boundary_matrix) + sparse_pairs = compute_chunked(sparse_boundary_matrix) + + print("Running Chunk - Heap ...") + heap_boundary_matrix = phat.boundary_matrix(phat.representations.vector_heap, boundary_matrix) + heap_pairs = compute_chunked(heap_boundary_matrix) + + print("Running Chunk - Full ...") + full_boundary_matrix = phat.boundary_matrix(phat.representations.full_pivot_column, boundary_matrix) + full_pairs = compute_chunked(full_boundary_matrix) + + print("Running Chunk - BitTree ...") + bit_tree_boundary_matrix = phat.boundary_matrix(phat.representations.bit_tree_pivot_column, boundary_matrix) + bit_tree_pairs = compute_chunked(bit_tree_boundary_matrix) + + print("Running Chunk - Vec_vec ...") + vec_vec_boundary_matrix = phat.boundary_matrix(phat.representations.vector_vector, boundary_matrix) + vec_vec_pairs = compute_chunked(vec_vec_boundary_matrix) + + print("Running Chunk - Vec_heap ...") + vec_heap_boundary_matrix = phat.boundary_matrix(phat.representations.vector_heap, boundary_matrix) + vec_heap_pairs = compute_chunked(vec_heap_boundary_matrix) + + print("Running Chunk - Vec_set ...") + vec_set_boundary_matrix = phat.boundary_matrix(phat.representations.vector_set, boundary_matrix) + vec_set_pairs = compute_chunked(vec_set_boundary_matrix) + + print("Running Chunk - Vec_list ...") + vec_list_boundary_matrix = phat.boundary_matrix(phat.representations.vector_list, boundary_matrix) + vec_list_pairs = compute_chunked(vec_list_boundary_matrix) + + if sparse_pairs != heap_pairs: + print("Error: sparse and heap differ!", file=sys.stderr) + error = True + if heap_pairs != full_pairs: + print("Error: heap and full differ!", file=sys.stderr) + error = True + if full_pairs != vec_vec_pairs: + print("Error: full and vec_vec differ!", file=sys.stderr) + error = True + if vec_vec_pairs != vec_heap_pairs: + print("Error: vec_vec and vec_heap differ!", file=sys.stderr) + error = True + if vec_heap_pairs != vec_set_pairs: + print("Error: vec_heap and vec_set differ!", file=sys.stderr) + error = True + if vec_set_pairs != bit_tree_pairs: + print("Error: vec_set and bit_tree differ!", file=sys.stderr) + error = True + if bit_tree_pairs != vec_list_pairs: + print("Error: bit_tree and vec_list differ!", file=sys.stderr) + error = True + if vec_list_pairs != sparse_pairs: + print("Error: vec_list and sparse differ!", file=sys.stderr) + error = True + if error: + sys.exit(1) + else: + print("All results are identical (as they should be)") + + print("Comparing algorithms using BitTree representation ...") + print("Running Twist - BitTree ...") + + def bit_tree_mat(): + return phat.boundary_matrix(phat.representations.bit_tree_pivot_column, boundary_matrix) + + reps = phat.representations + reds = phat.reductions + def pairs(mat, red): + return mat.compute_persistence_pairs(red) + + twist_boundary_matrix = bit_tree_mat() + twist_pairs = pairs(twist_boundary_matrix, reds.twist_reduction) + + print("Running Standard - BitTree ...") + std_boundary_matrix = bit_tree_mat() + std_pairs = pairs(std_boundary_matrix, reds.standard_reduction) + + print("Running Chunk - BitTree ...") + chunk_boundary_matrix = bit_tree_mat() + chunk_pairs = pairs(chunk_boundary_matrix, reds.chunk_reduction) + + print("Running Row - BitTree ...") + row_boundary_matrix = bit_tree_mat() + row_pairs = pairs(row_boundary_matrix, reds.row_reduction) + + print("Running Spectral sequence - BitTree ...") + ss_boundary_matrix = bit_tree_mat() + ss_pairs = pairs(ss_boundary_matrix, reds.spectral_sequence_reduction) + + if twist_pairs != std_pairs: + print("Error: twist and standard differ!", file=sys.stderr) + error = True + if std_pairs != chunk_pairs: + print("Error: standard and chunk differ!", file=sys.stderr) + error = True + if chunk_pairs != row_pairs: + print("Error: chunk and row differ!", file=sys.stderr) + error = True + if row_pairs != ss_pairs: + print("Error: row and spectral sequence differ!", file=sys.stderr) + error = True + if ss_pairs != twist_pairs: + print("Error: spectral sequence and twist differ!", file=sys.stderr) + error = True + if error: + sys.exit(1) + else: + print("All results are identical (as they should be)") + + print("Comparing primal and dual approach using Chunk - Full ...") + + primal_boundary_matrix = phat.boundary_matrix(reps.full_pivot_column, boundary_matrix) + primal_pairs = primal_boundary_matrix.compute_persistence_pairs(reds.chunk_reduction) + + dual_boundary_matrix = phat.boundary_matrix(reps.full_pivot_column, boundary_matrix) + dual_pairs = dual_boundary_matrix.compute_persistence_pairs_dualized() + + if primal_pairs != dual_pairs: + print("Error: primal and dual differ!", file=sys.stderr) + error = True + + + if error: + sys.exit(1) + else: + print("All results are identical (as they should be)") + + print("Testing vector<vector> interface ...") + + vector_vector_boundary_matrix = phat.boundary_matrix(phat.representations.bit_tree_pivot_column) + + vector_vector_boundary_matrix.columns = boundary_matrix.columns + + if vector_vector_boundary_matrix != boundary_matrix: + print("Error: [load|save]_vector_vector bug", file=sys.stderr) + error = True + + if error: + sys.exit(1) + else: + print("Test passed!") + + diff --git a/python/src/simple_example.py b/python/src/simple_example.py new file mode 100644 index 0000000..955e213 --- /dev/null +++ b/python/src/simple_example.py @@ -0,0 +1,55 @@ +"""This file contains a simple example that demonstrates the usage of the library interface""" + +if __name__ == "__main__": + + print(""" + we will build an ordered boundary matrix of this simplicial complex consisting of a single triangle: + + 3 + |\\ + | \\ + | \\ + | \\ 4 + 5| \\ + | \\ + | 6 \\ + | \\ + |________\\ + 0 2 1 + +""") + + import phat + + # define a boundary matrix with the chosen internal representation + boundary_matrix = phat.boundary_matrix(representation = phat.representations.vector_vector) + + # set the respective columns -- (dimension, boundary) pairs + boundary_matrix.columns = [ (0, []), + (0, []), + (1, [0,1]), + (0, []), + (1, [1,3]), + (1, [0,3]), + (2, [2,4,5])] + + # or equivalently, boundary_matrix = phat.boundary_matrix(representation = ..., columns = ...) + # would combine the creation of the matrix and the assignment of the columns + + # print some information of the boundary matrix: + print("\nThe boundary matrix has %d columns:" % len(boundary_matrix.columns)) + for col in boundary_matrix.columns: + s = "Column %d represents a cell of dimension %d." % (col.index, col.dimension) + if (col.boundary): + s = s + " Its boundary consists of the cells " + " ".join([str(c) for c in col.boundary]) + print(s) + print("Overall, the boundary matrix has %d entries." % len(boundary_matrix)) + + pairs = boundary_matrix.compute_persistence_pairs() + + pairs.sort() + + print("\nThere are %d persistence pairs: " % len(pairs)) + for pair in pairs: + print("Birth: %d, Death: %d" % pair) + |