diff options
author | Gard Spreemann <gspreemann@gmail.com> | 2016-03-22 16:14:10 +0100 |
---|---|---|
committer | Gard Spreemann <gspreemann@gmail.com> | 2016-03-22 16:14:10 +0100 |
commit | f95c4cd479caabf54d425b3f62901f977098abe0 (patch) | |
tree | 5e8ccae84112b5194fe6ef2ac9a831682e737463 |
Initial commit with basic stuff from messy scripts.
-rw-r--r-- | README.md | 48 | ||||
-rw-r--r-- | phstuff/__init__.py | 0 | ||||
-rw-r--r-- | phstuff/barcode.py | 68 | ||||
-rw-r--r-- | phstuff/diphawrapper.py | 281 |
4 files changed, 397 insertions, 0 deletions
diff --git a/README.md b/README.md new file mode 100644 index 0000000..7bb5c22 --- /dev/null +++ b/README.md @@ -0,0 +1,48 @@ +Introduction +===== + +This package is a cleaned up subset of the hogdepodge of the Python +glue that I regularly use to massage data into and out of persistent +homology and other TDA computations. It does *not* compute anything +itself, and exists primarily to marshal data into and out of the +formats used by the excellent [DIPHA](https://github.com/DIPHA/dipha) +software, in order to make it easier to pre- and post-process data in +Python. + +Caveats +----- + +I decided to clean up and release a subset of these scripts to make +life slightly easier for those who compute persistent homology and +prefer to manipulate data in Python. The scripts come with the +following caveats: + +- The scripts were too messy to release as they were, and the cleaning + up process means that this is now essentially untested software + again. Beware. + +- There is nothing of substance here. This is just glue around + [DIPHA](https://github.com/DIPHA/dipha). + +- I use Python as modern Perl, and my experience with it is limited to + quickly manipulating data without too much thought for writing + structured software. Beware. + +- I make no attempt to accurately reflect the full capabilities of + [DIPHA](https://github.com/DIPHA/dipha). + +- Since Python will let you write `Interval("banana", 5)`, so will I, + and you and your persistence generator "from banana to 5" can go + solve problems down the road. + + +Installation +===== + +FIXME. + + +Use +===== + +FIXME. diff --git a/phstuff/__init__.py b/phstuff/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/phstuff/__init__.py diff --git a/phstuff/barcode.py b/phstuff/barcode.py new file mode 100644 index 0000000..9c7b05e --- /dev/null +++ b/phstuff/barcode.py @@ -0,0 +1,68 @@ +# -*- coding: utf-8 -*- + +import numpy as np + +class Interval: + def __init__(self, birth, death = None): + if death is not None and death < birth: + raise ValueError("Death must be at least birth.") + self.birth = birth + self.death = death + + def is_finite(self): + return self.death is not None + + def __str__(self): + if self.is_finite(): + return "(%s, %s)" %(str(self.birth), str(self.death)) + else: + return "(%s, ∞)" %(str(self.birth)) + + def __repr__(self): + if self.is_finite(): + return "Interval(%s, %s)" %(repr(self.birth), repr(self.death)) + else: + return "Interval(%s)" %(repr(self.birth)) + + def size(self): + if self.is_finite(): + return self.death - self.birth + else: + return float("inf") + + +def finitize(barcode, max_scale): + ret = [] + for bar in barcode: + if bar.is_finite(): + ret.append(bar) + else: + ret.append(Interval(bar.birth, max_scale)) + return ret + +def betti_curve(barcode, min_scale, max_scale): + xs = [] + for interval in pd: + xs.append((interval.l, 1)) + if interval.is_finite(): + xs.append((interval.u, -1)) + + if not xs: + return np.zeros((2,1)) + + xs.sort() + scales = [min_scale] + bettis = [0] + + for (t, delta) in xs: + if scales[-1] == t: + bettis[-1] += delta + else: + scales.append(t) + bettis.append(bettis[-1] + delta) + + if scales[-1] < max_scale: + scales.append(max_scale) + bettis.append(bettis[-1]) + + return (np.array(scales), np.array(bettis, dtype=int)) diff --git a/phstuff/diphawrapper.py b/phstuff/diphawrapper.py new file mode 100644 index 0000000..ac3983f --- /dev/null +++ b/phstuff/diphawrapper.py @@ -0,0 +1,281 @@ +# -*- coding: utf-8 -*- + +import struct +import numpy as np +import multiprocessing # To get CPU count. +import os +import tempfile +import subprocess + +import phstuff.barcode as bc + +# FIXME, TODO: These magic numbers should really be read from the +# DIPHA header files. +DIPHA_MAGIC = 8067171840 +DIPHA_WEIGHTED_BOUNDARY_MATRIX = 0 +DIPHA_PERSISTENCE_DIAGRAM = 2 +DIPHA_DISTANCE_MATRIX = 7 +DIPHA_SPARSE_DISTANCE_MATRIX = 8 + + +def save_weight_matrix(fname, weights): + """Write a NumPy weight matrix to a DIPHA full distance matrix + file. Attention: DIPHA uses a convention wherein an edge's + filtration value is *half* its weight. This function compensates + by multiplying every weight by 2. Pay attention in case DIPHA's + behavior changes in the future! + + Parameters: + ----------- + + fname: Name of file to write. + + weights: NumPy array. Elements will be converted to IEEE-754 + doubles and used as weights. The entire array is passed on to + DIPHA, whose behvior is undefined if it's not symmetric. + + """ + + # override_dipha_half: DIPHA currently (commit 0081862) divides all + # entries by 2 when loading the file. If this argument is `True`, + # we compensate by multiplying all entries by 2. Be sure to pay + # attention if DIPHA's behavior changes! + + factor = 2.0 + + m = weights.shape[0] + if weights.shape[0] != weights.shape[1]: + raise ValueError("Matrix is not square.") + + with open(fname, "wb") as f: + f.write(struct.pack("<q", DIPHA_MAGIC)) + f.write(struct.pack("<q", DIPHA_DISTANCE_MATRIX)) + f.write(struct.pack("<q", m)) + + # Why write in a loop? I heard rumors that struct.pack will + # allocate a lot of temporary heap space if given lots of + # data. I haven't actually tested. + for i in range(0, m): + for j in range(0, m): + f.write(struct.pack("<d", factor*weights[i,j])) + +def save_masked_weight_matrix(fname, weights): + """Write a masked NumPy weight matrix to a DIPHA sparse distance + matrix file, keeping edges only for unmasked entries. Attention: + DIPHA uses a convention wherein an edge's filtration value is + *half* its weight. This function compensates by multiplying every + weight by 2. Pay attention in case DIPHA's behavior changes in the + future! + + Parameters: + ----------- + + fname: Name of file to write. + + weights: Masked NumPy array. Unmasked elements will be converted + to IEEE-754 and used as weights. Must be symmetric (not + checked). + + """ + + factor = 2.0 + + n = weights.shape[1] + + with open(fname, "wb") as f: + f.write(struct.pack("<q", DIPHA_MAGIC)) + f.write(struct.pack("<q", DIPHA_SPARSE_DISTANCE_MATRIX)) + f.write(struct.pack("<q", n)) + + for i in range(0, n): + f.write(struct.pack('<q', n - np.sum(weights.mask[i, :]))) + for i in range(0, n): + for j in np.arange(0, n)[-(weights.mask[i, :])]: + f.write(struct.pack('<q', j)) + for i in range(0, n): + for j in np.arange(0, n)[-(weights.mask[i, :])]: + f.write(struct.pack('<d', factor*weights[i, j])) + + + +def save_edge_list(fname, edge_list): + """Write a possibly non-complete weighted graph represented as an edge + list to a DIPHA sparse distance file. Attention: DIPHA uses a + convention wherein an edge's filtration value is *half* its + weight. This function compensates by multiplying every weight by + 2. Pay attention in case DIPHA's behavior changes in the future! + + Parameters: + ----------- + + fname: Name of file to write. + + edge_list: A list of the edges and weights of each node, in node + order. If node `i` has edges to nodes `a_{i,0}, …¸ a_{i,k_i}` + with weights `w_{i,0}, …, w_{i,k_i}`, then this argument should + be + + `[[(a_{0,0}, w_{0,0}), …, (a_{0,k_0}, w_{0,k_0})], + [(a_{1,0}, w_{1,0}), …, (a_{1,k_1}, w_{1,k_1})], + …, + [(a_{n,0}, w_{n,0}), …, (a_{n,k_n}, w_{n,k_n})]]` + + Remember that an isolated node corresponds to an empty (inner) + list. + + """ + + factor = 2.0 + + n = len(edge_list) + + with open(fname, "wb") as f: + f.write(struct.pack("<q", DIPHA_MAGIC)) + f.write(struct.pack("<q", DIPHA_SPARSE_DISTANCE_MATRIX)) + f.write(struct.pack("<q", n)) + + for i in range(0, n): + f.write(struct.pack('<q', len(edge_list[i]))) + for i in range(0, n): + for j in range(0, len(edge_list[i])): + f.write(struct.pack('<q', edge_list[i][j][0])) + for i in range(0, n): + for j in range(0, len(edge_list[i])): + f.write(struct.pack('<d', factor*edge_list[i][j][1])) + + +def load_barcode(fname, top_dim = None): + ret = dict() + + with open(fname, "rb") as f: + if struct.unpack('<q', f.read(8))[0] != DIPHA_MAGIC: + raise IOError("File %s is not a valid DIPHA file." %(fname)) + if struct.unpack('<q', f.read(8))[0] != DIPHA_PERSISTENCE_DIAGRAM: + raise IOError("File %s is not a valid DIPHA barcode file." %(fname)) + + n = struct.unpack('<q', f.read(8))[0] + + for i in range(0, n): + (d, birth, death) = struct.unpack('<qdd', f.read(3*8)) + if d < 0: + dim = -d -1 + else: + dim = d + + if top_dim is None or dim <= top_dim: + if d < 0: + ret.setdefault(dim, []).append(bc.Interval(birth)) + else: + ret.setdefault(dim, []).append(bc.Interval(birth, death)) + + return ret + +def save_text_barcode(fname, barcode): + with open(fname, "w") as f: + for dim in barcode.keys(): + for interval in barcode[dim]: + if interval.is_finite(): + f.write("%d %g %g\n" %(dim, interval.birth, interval.death)) + else: + f.write("%d %g inf\n" %(dim, interval.birth)) + + +class DiphaRunner: + def __init__(self, top_dim, cpu_count = None, quiet = True, mpirun = None, dipha = None): + self.top_dim = int(top_dim) + if top_dim <= 0: + raise ValueError("Top dimension must be postitive.") + + if cpu_count is None: + try: + self.cpu_count = multiprocessing.cpu_count() + except NotImplementedError as e: + self.cpu_count = 1 + else: + self.cpu_count = cpu_count + + self.quiet = quiet + + if mpirun is None: + self.mpirun = os.environ.get("MPIRUN") + paths = os.environ.get("PATH", "") + if self.mpirun is None: + for path in paths.split(":"): + x = os.path.join(path, "mpirun") + if os.path.isfile(x): + self.mpirun = x + break + else: + self.mpirun = mpirun + + if dipha is None: + self.dipha = os.environ.get("DIPHA") + paths = os.environ.get("PATH", "") + if self.dipha is None: + for path in paths.split(":"): + x = os.path.join(path, "dipha") + if os.path.isfile(x): + self.dipha = x + break + else: + self.dipha = dipha + + if self.mpirun is None: + raise RuntimeError("Could not find mpirun exectuable.") + if self.dipha is None: + raise RuntimeError("Could not find dipha executable.") + + self.tmpdir = tempfile.mkdtemp() + self.ifile = os.path.join(self.tmpdir, "input.dipha") + self.ofile = os.path.join(self.tmpdir, "output.dipha") + self.ready = False + + + def weight_matrix(self, weights): + save_weight_matrix(self.ifile, weights) + self.ready = True + + def masked_weight_matrix(self, weights): + save_masked_weight_matrix(self.ifile, weights) + self.ready = True + + def edge_list(self, edge_list): + save_edge_list(self.ifile, edge_list) + self.ready = True + + def run(self): + if not self.ready: + self.cleanup() + raise RuntimeError("Not ready to run. Maybe you haven't given me data, or I've already been run?") + + if self.quiet: + opipe = subprocess.PIPE + else: + opipe = None + + if not self.quiet: + print("Spawning DIPHA.") + + proc = subprocess.Popen([self.mpirun, "-np", "%d" %(self.cpu_count), self.dipha, "--upper_dim", "%d" %(self.top_dim), self.ifile, self.ofile], stdout=opipe) + + self.out_message = proc.communicate()[0] + self.code = proc.returncode + + if not self.quiet and self.code != 0: + print("DIPHA exited abnormally with code %d." %(self.code)) + self.cleanup() + + if self.code == 0: + self.barcode = load_barcode(self.ofile) + + self.ready = False + self.cleanup() + + + def cleanup(self): + os.remove(self.ifile) + os.remove(self.ofile) + os.rmdir(self.tmpdir) + self.ifile = None + self.ofile = None + self.tmpdir = None |