From f95c4cd479caabf54d425b3f62901f977098abe0 Mon Sep 17 00:00:00 2001 From: Gard Spreemann Date: Tue, 22 Mar 2016 16:14:10 +0100 Subject: Initial commit with basic stuff from messy scripts. --- README.md | 48 +++++++++ phstuff/__init__.py | 0 phstuff/barcode.py | 68 ++++++++++++ phstuff/diphawrapper.py | 281 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 397 insertions(+) create mode 100644 README.md create mode 100644 phstuff/__init__.py create mode 100644 phstuff/barcode.py create mode 100644 phstuff/diphawrapper.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..7bb5c22 --- /dev/null +++ b/README.md @@ -0,0 +1,48 @@ +Introduction +===== + +This package is a cleaned up subset of the hogdepodge of the Python +glue that I regularly use to massage data into and out of persistent +homology and other TDA computations. It does *not* compute anything +itself, and exists primarily to marshal data into and out of the +formats used by the excellent [DIPHA](https://github.com/DIPHA/dipha) +software, in order to make it easier to pre- and post-process data in +Python. + +Caveats +----- + +I decided to clean up and release a subset of these scripts to make +life slightly easier for those who compute persistent homology and +prefer to manipulate data in Python. The scripts come with the +following caveats: + +- The scripts were too messy to release as they were, and the cleaning + up process means that this is now essentially untested software + again. Beware. + +- There is nothing of substance here. This is just glue around + [DIPHA](https://github.com/DIPHA/dipha). + +- I use Python as modern Perl, and my experience with it is limited to + quickly manipulating data without too much thought for writing + structured software. Beware. + +- I make no attempt to accurately reflect the full capabilities of + [DIPHA](https://github.com/DIPHA/dipha). + +- Since Python will let you write `Interval("banana", 5)`, so will I, + and you and your persistence generator "from banana to 5" can go + solve problems down the road. + + +Installation +===== + +FIXME. + + +Use +===== + +FIXME. diff --git a/phstuff/__init__.py b/phstuff/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/phstuff/barcode.py b/phstuff/barcode.py new file mode 100644 index 0000000..9c7b05e --- /dev/null +++ b/phstuff/barcode.py @@ -0,0 +1,68 @@ +# -*- coding: utf-8 -*- + +import numpy as np + +class Interval: + def __init__(self, birth, death = None): + if death is not None and death < birth: + raise ValueError("Death must be at least birth.") + self.birth = birth + self.death = death + + def is_finite(self): + return self.death is not None + + def __str__(self): + if self.is_finite(): + return "(%s, %s)" %(str(self.birth), str(self.death)) + else: + return "(%s, ∞)" %(str(self.birth)) + + def __repr__(self): + if self.is_finite(): + return "Interval(%s, %s)" %(repr(self.birth), repr(self.death)) + else: + return "Interval(%s)" %(repr(self.birth)) + + def size(self): + if self.is_finite(): + return self.death - self.birth + else: + return float("inf") + + +def finitize(barcode, max_scale): + ret = [] + for bar in barcode: + if bar.is_finite(): + ret.append(bar) + else: + ret.append(Interval(bar.birth, max_scale)) + return ret + +def betti_curve(barcode, min_scale, max_scale): + xs = [] + for interval in pd: + xs.append((interval.l, 1)) + if interval.is_finite(): + xs.append((interval.u, -1)) + + if not xs: + return np.zeros((2,1)) + + xs.sort() + scales = [min_scale] + bettis = [0] + + for (t, delta) in xs: + if scales[-1] == t: + bettis[-1] += delta + else: + scales.append(t) + bettis.append(bettis[-1] + delta) + + if scales[-1] < max_scale: + scales.append(max_scale) + bettis.append(bettis[-1]) + + return (np.array(scales), np.array(bettis, dtype=int)) diff --git a/phstuff/diphawrapper.py b/phstuff/diphawrapper.py new file mode 100644 index 0000000..ac3983f --- /dev/null +++ b/phstuff/diphawrapper.py @@ -0,0 +1,281 @@ +# -*- coding: utf-8 -*- + +import struct +import numpy as np +import multiprocessing # To get CPU count. +import os +import tempfile +import subprocess + +import phstuff.barcode as bc + +# FIXME, TODO: These magic numbers should really be read from the +# DIPHA header files. +DIPHA_MAGIC = 8067171840 +DIPHA_WEIGHTED_BOUNDARY_MATRIX = 0 +DIPHA_PERSISTENCE_DIAGRAM = 2 +DIPHA_DISTANCE_MATRIX = 7 +DIPHA_SPARSE_DISTANCE_MATRIX = 8 + + +def save_weight_matrix(fname, weights): + """Write a NumPy weight matrix to a DIPHA full distance matrix + file. Attention: DIPHA uses a convention wherein an edge's + filtration value is *half* its weight. This function compensates + by multiplying every weight by 2. Pay attention in case DIPHA's + behavior changes in the future! + + Parameters: + ----------- + + fname: Name of file to write. + + weights: NumPy array. Elements will be converted to IEEE-754 + doubles and used as weights. The entire array is passed on to + DIPHA, whose behvior is undefined if it's not symmetric. + + """ + + # override_dipha_half: DIPHA currently (commit 0081862) divides all + # entries by 2 when loading the file. If this argument is `True`, + # we compensate by multiplying all entries by 2. Be sure to pay + # attention if DIPHA's behavior changes! + + factor = 2.0 + + m = weights.shape[0] + if weights.shape[0] != weights.shape[1]: + raise ValueError("Matrix is not square.") + + with open(fname, "wb") as f: + f.write(struct.pack("