summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGard Spreemann <gspreemann@gmail.com>2016-03-22 16:14:10 +0100
committerGard Spreemann <gspreemann@gmail.com>2016-03-22 16:14:10 +0100
commitf95c4cd479caabf54d425b3f62901f977098abe0 (patch)
tree5e8ccae84112b5194fe6ef2ac9a831682e737463
Initial commit with basic stuff from messy scripts.
-rw-r--r--README.md48
-rw-r--r--phstuff/__init__.py0
-rw-r--r--phstuff/barcode.py68
-rw-r--r--phstuff/diphawrapper.py281
4 files changed, 397 insertions, 0 deletions
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..7bb5c22
--- /dev/null
+++ b/README.md
@@ -0,0 +1,48 @@
+Introduction
+=====
+
+This package is a cleaned up subset of the hogdepodge of the Python
+glue that I regularly use to massage data into and out of persistent
+homology and other TDA computations. It does *not* compute anything
+itself, and exists primarily to marshal data into and out of the
+formats used by the excellent [DIPHA](https://github.com/DIPHA/dipha)
+software, in order to make it easier to pre- and post-process data in
+Python.
+
+Caveats
+-----
+
+I decided to clean up and release a subset of these scripts to make
+life slightly easier for those who compute persistent homology and
+prefer to manipulate data in Python. The scripts come with the
+following caveats:
+
+- The scripts were too messy to release as they were, and the cleaning
+ up process means that this is now essentially untested software
+ again. Beware.
+
+- There is nothing of substance here. This is just glue around
+ [DIPHA](https://github.com/DIPHA/dipha).
+
+- I use Python as modern Perl, and my experience with it is limited to
+ quickly manipulating data without too much thought for writing
+ structured software. Beware.
+
+- I make no attempt to accurately reflect the full capabilities of
+ [DIPHA](https://github.com/DIPHA/dipha).
+
+- Since Python will let you write `Interval("banana", 5)`, so will I,
+ and you and your persistence generator "from banana to 5" can go
+ solve problems down the road.
+
+
+Installation
+=====
+
+FIXME.
+
+
+Use
+=====
+
+FIXME.
diff --git a/phstuff/__init__.py b/phstuff/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/phstuff/__init__.py
diff --git a/phstuff/barcode.py b/phstuff/barcode.py
new file mode 100644
index 0000000..9c7b05e
--- /dev/null
+++ b/phstuff/barcode.py
@@ -0,0 +1,68 @@
+# -*- coding: utf-8 -*-
+
+import numpy as np
+
+class Interval:
+ def __init__(self, birth, death = None):
+ if death is not None and death < birth:
+ raise ValueError("Death must be at least birth.")
+ self.birth = birth
+ self.death = death
+
+ def is_finite(self):
+ return self.death is not None
+
+ def __str__(self):
+ if self.is_finite():
+ return "(%s, %s)" %(str(self.birth), str(self.death))
+ else:
+ return "(%s, ∞)" %(str(self.birth))
+
+ def __repr__(self):
+ if self.is_finite():
+ return "Interval(%s, %s)" %(repr(self.birth), repr(self.death))
+ else:
+ return "Interval(%s)" %(repr(self.birth))
+
+ def size(self):
+ if self.is_finite():
+ return self.death - self.birth
+ else:
+ return float("inf")
+
+
+def finitize(barcode, max_scale):
+ ret = []
+ for bar in barcode:
+ if bar.is_finite():
+ ret.append(bar)
+ else:
+ ret.append(Interval(bar.birth, max_scale))
+ return ret
+
+def betti_curve(barcode, min_scale, max_scale):
+ xs = []
+ for interval in pd:
+ xs.append((interval.l, 1))
+ if interval.is_finite():
+ xs.append((interval.u, -1))
+
+ if not xs:
+ return np.zeros((2,1))
+
+ xs.sort()
+ scales = [min_scale]
+ bettis = [0]
+
+ for (t, delta) in xs:
+ if scales[-1] == t:
+ bettis[-1] += delta
+ else:
+ scales.append(t)
+ bettis.append(bettis[-1] + delta)
+
+ if scales[-1] < max_scale:
+ scales.append(max_scale)
+ bettis.append(bettis[-1])
+
+ return (np.array(scales), np.array(bettis, dtype=int))
diff --git a/phstuff/diphawrapper.py b/phstuff/diphawrapper.py
new file mode 100644
index 0000000..ac3983f
--- /dev/null
+++ b/phstuff/diphawrapper.py
@@ -0,0 +1,281 @@
+# -*- coding: utf-8 -*-
+
+import struct
+import numpy as np
+import multiprocessing # To get CPU count.
+import os
+import tempfile
+import subprocess
+
+import phstuff.barcode as bc
+
+# FIXME, TODO: These magic numbers should really be read from the
+# DIPHA header files.
+DIPHA_MAGIC = 8067171840
+DIPHA_WEIGHTED_BOUNDARY_MATRIX = 0
+DIPHA_PERSISTENCE_DIAGRAM = 2
+DIPHA_DISTANCE_MATRIX = 7
+DIPHA_SPARSE_DISTANCE_MATRIX = 8
+
+
+def save_weight_matrix(fname, weights):
+ """Write a NumPy weight matrix to a DIPHA full distance matrix
+ file. Attention: DIPHA uses a convention wherein an edge's
+ filtration value is *half* its weight. This function compensates
+ by multiplying every weight by 2. Pay attention in case DIPHA's
+ behavior changes in the future!
+
+ Parameters:
+ -----------
+
+ fname: Name of file to write.
+
+ weights: NumPy array. Elements will be converted to IEEE-754
+ doubles and used as weights. The entire array is passed on to
+ DIPHA, whose behvior is undefined if it's not symmetric.
+
+ """
+
+ # override_dipha_half: DIPHA currently (commit 0081862) divides all
+ # entries by 2 when loading the file. If this argument is `True`,
+ # we compensate by multiplying all entries by 2. Be sure to pay
+ # attention if DIPHA's behavior changes!
+
+ factor = 2.0
+
+ m = weights.shape[0]
+ if weights.shape[0] != weights.shape[1]:
+ raise ValueError("Matrix is not square.")
+
+ with open(fname, "wb") as f:
+ f.write(struct.pack("<q", DIPHA_MAGIC))
+ f.write(struct.pack("<q", DIPHA_DISTANCE_MATRIX))
+ f.write(struct.pack("<q", m))
+
+ # Why write in a loop? I heard rumors that struct.pack will
+ # allocate a lot of temporary heap space if given lots of
+ # data. I haven't actually tested.
+ for i in range(0, m):
+ for j in range(0, m):
+ f.write(struct.pack("<d", factor*weights[i,j]))
+
+def save_masked_weight_matrix(fname, weights):
+ """Write a masked NumPy weight matrix to a DIPHA sparse distance
+ matrix file, keeping edges only for unmasked entries. Attention:
+ DIPHA uses a convention wherein an edge's filtration value is
+ *half* its weight. This function compensates by multiplying every
+ weight by 2. Pay attention in case DIPHA's behavior changes in the
+ future!
+
+ Parameters:
+ -----------
+
+ fname: Name of file to write.
+
+ weights: Masked NumPy array. Unmasked elements will be converted
+ to IEEE-754 and used as weights. Must be symmetric (not
+ checked).
+
+ """
+
+ factor = 2.0
+
+ n = weights.shape[1]
+
+ with open(fname, "wb") as f:
+ f.write(struct.pack("<q", DIPHA_MAGIC))
+ f.write(struct.pack("<q", DIPHA_SPARSE_DISTANCE_MATRIX))
+ f.write(struct.pack("<q", n))
+
+ for i in range(0, n):
+ f.write(struct.pack('<q', n - np.sum(weights.mask[i, :])))
+ for i in range(0, n):
+ for j in np.arange(0, n)[-(weights.mask[i, :])]:
+ f.write(struct.pack('<q', j))
+ for i in range(0, n):
+ for j in np.arange(0, n)[-(weights.mask[i, :])]:
+ f.write(struct.pack('<d', factor*weights[i, j]))
+
+
+
+def save_edge_list(fname, edge_list):
+ """Write a possibly non-complete weighted graph represented as an edge
+ list to a DIPHA sparse distance file. Attention: DIPHA uses a
+ convention wherein an edge's filtration value is *half* its
+ weight. This function compensates by multiplying every weight by
+ 2. Pay attention in case DIPHA's behavior changes in the future!
+
+ Parameters:
+ -----------
+
+ fname: Name of file to write.
+
+ edge_list: A list of the edges and weights of each node, in node
+ order. If node `i` has edges to nodes `a_{i,0}, …¸ a_{i,k_i}`
+ with weights `w_{i,0}, …, w_{i,k_i}`, then this argument should
+ be
+
+ `[[(a_{0,0}, w_{0,0}), …, (a_{0,k_0}, w_{0,k_0})],
+ [(a_{1,0}, w_{1,0}), …, (a_{1,k_1}, w_{1,k_1})],
+ …,
+ [(a_{n,0}, w_{n,0}), …, (a_{n,k_n}, w_{n,k_n})]]`
+
+ Remember that an isolated node corresponds to an empty (inner)
+ list.
+
+ """
+
+ factor = 2.0
+
+ n = len(edge_list)
+
+ with open(fname, "wb") as f:
+ f.write(struct.pack("<q", DIPHA_MAGIC))
+ f.write(struct.pack("<q", DIPHA_SPARSE_DISTANCE_MATRIX))
+ f.write(struct.pack("<q", n))
+
+ for i in range(0, n):
+ f.write(struct.pack('<q', len(edge_list[i])))
+ for i in range(0, n):
+ for j in range(0, len(edge_list[i])):
+ f.write(struct.pack('<q', edge_list[i][j][0]))
+ for i in range(0, n):
+ for j in range(0, len(edge_list[i])):
+ f.write(struct.pack('<d', factor*edge_list[i][j][1]))
+
+
+def load_barcode(fname, top_dim = None):
+ ret = dict()
+
+ with open(fname, "rb") as f:
+ if struct.unpack('<q', f.read(8))[0] != DIPHA_MAGIC:
+ raise IOError("File %s is not a valid DIPHA file." %(fname))
+ if struct.unpack('<q', f.read(8))[0] != DIPHA_PERSISTENCE_DIAGRAM:
+ raise IOError("File %s is not a valid DIPHA barcode file." %(fname))
+
+ n = struct.unpack('<q', f.read(8))[0]
+
+ for i in range(0, n):
+ (d, birth, death) = struct.unpack('<qdd', f.read(3*8))
+ if d < 0:
+ dim = -d -1
+ else:
+ dim = d
+
+ if top_dim is None or dim <= top_dim:
+ if d < 0:
+ ret.setdefault(dim, []).append(bc.Interval(birth))
+ else:
+ ret.setdefault(dim, []).append(bc.Interval(birth, death))
+
+ return ret
+
+def save_text_barcode(fname, barcode):
+ with open(fname, "w") as f:
+ for dim in barcode.keys():
+ for interval in barcode[dim]:
+ if interval.is_finite():
+ f.write("%d %g %g\n" %(dim, interval.birth, interval.death))
+ else:
+ f.write("%d %g inf\n" %(dim, interval.birth))
+
+
+class DiphaRunner:
+ def __init__(self, top_dim, cpu_count = None, quiet = True, mpirun = None, dipha = None):
+ self.top_dim = int(top_dim)
+ if top_dim <= 0:
+ raise ValueError("Top dimension must be postitive.")
+
+ if cpu_count is None:
+ try:
+ self.cpu_count = multiprocessing.cpu_count()
+ except NotImplementedError as e:
+ self.cpu_count = 1
+ else:
+ self.cpu_count = cpu_count
+
+ self.quiet = quiet
+
+ if mpirun is None:
+ self.mpirun = os.environ.get("MPIRUN")
+ paths = os.environ.get("PATH", "")
+ if self.mpirun is None:
+ for path in paths.split(":"):
+ x = os.path.join(path, "mpirun")
+ if os.path.isfile(x):
+ self.mpirun = x
+ break
+ else:
+ self.mpirun = mpirun
+
+ if dipha is None:
+ self.dipha = os.environ.get("DIPHA")
+ paths = os.environ.get("PATH", "")
+ if self.dipha is None:
+ for path in paths.split(":"):
+ x = os.path.join(path, "dipha")
+ if os.path.isfile(x):
+ self.dipha = x
+ break
+ else:
+ self.dipha = dipha
+
+ if self.mpirun is None:
+ raise RuntimeError("Could not find mpirun exectuable.")
+ if self.dipha is None:
+ raise RuntimeError("Could not find dipha executable.")
+
+ self.tmpdir = tempfile.mkdtemp()
+ self.ifile = os.path.join(self.tmpdir, "input.dipha")
+ self.ofile = os.path.join(self.tmpdir, "output.dipha")
+ self.ready = False
+
+
+ def weight_matrix(self, weights):
+ save_weight_matrix(self.ifile, weights)
+ self.ready = True
+
+ def masked_weight_matrix(self, weights):
+ save_masked_weight_matrix(self.ifile, weights)
+ self.ready = True
+
+ def edge_list(self, edge_list):
+ save_edge_list(self.ifile, edge_list)
+ self.ready = True
+
+ def run(self):
+ if not self.ready:
+ self.cleanup()
+ raise RuntimeError("Not ready to run. Maybe you haven't given me data, or I've already been run?")
+
+ if self.quiet:
+ opipe = subprocess.PIPE
+ else:
+ opipe = None
+
+ if not self.quiet:
+ print("Spawning DIPHA.")
+
+ proc = subprocess.Popen([self.mpirun, "-np", "%d" %(self.cpu_count), self.dipha, "--upper_dim", "%d" %(self.top_dim), self.ifile, self.ofile], stdout=opipe)
+
+ self.out_message = proc.communicate()[0]
+ self.code = proc.returncode
+
+ if not self.quiet and self.code != 0:
+ print("DIPHA exited abnormally with code %d." %(self.code))
+ self.cleanup()
+
+ if self.code == 0:
+ self.barcode = load_barcode(self.ofile)
+
+ self.ready = False
+ self.cleanup()
+
+
+ def cleanup(self):
+ os.remove(self.ifile)
+ os.remove(self.ofile)
+ os.rmdir(self.tmpdir)
+ self.ifile = None
+ self.ofile = None
+ self.tmpdir = None