diff options
author | tlacombe <lacombe1993@gmail.com> | 2020-03-17 09:28:57 +0100 |
---|---|---|
committer | tlacombe <lacombe1993@gmail.com> | 2020-03-17 09:28:57 +0100 |
commit | 2de9709b63045c484aa1c53f72c870eb210880d9 (patch) | |
tree | 6350e1a4b45634747c4cc49ea53c4eb97af1493c /src/python/gudhi | |
parent | 80513805a453fadd137d606f501239de3e1ab12a (diff) | |
parent | bf5873f5dbef29ff9990131c00bdd6d32b7a2f85 (diff) |
merging master (including wasserstein distance) into wbary ; modified CMakeList to solve conflicts
Diffstat (limited to 'src/python/gudhi')
-rw-r--r-- | src/python/gudhi/point_cloud/__init__.py | 0 | ||||
-rw-r--r-- | src/python/gudhi/point_cloud/timedelay.py | 95 | ||||
-rw-r--r-- | src/python/gudhi/simplex_tree.pxd | 26 | ||||
-rw-r--r-- | src/python/gudhi/simplex_tree.pyx | 52 | ||||
-rw-r--r-- | src/python/gudhi/wasserstein.py | 57 |
5 files changed, 192 insertions, 38 deletions
diff --git a/src/python/gudhi/point_cloud/__init__.py b/src/python/gudhi/point_cloud/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/python/gudhi/point_cloud/__init__.py diff --git a/src/python/gudhi/point_cloud/timedelay.py b/src/python/gudhi/point_cloud/timedelay.py new file mode 100644 index 00000000..f01df442 --- /dev/null +++ b/src/python/gudhi/point_cloud/timedelay.py @@ -0,0 +1,95 @@ +# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. +# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. +# Author(s): Martin Royer, Yuichi Ike, Masatoshi Takenouchi +# +# Copyright (C) 2020 Inria, Copyright (C) 2020 Fujitsu Laboratories Ltd. +# Modification(s): +# - YYYY/MM Author: Description of the modification + +import numpy as np + + +class TimeDelayEmbedding: + """Point cloud transformation class. + Embeds time-series data in the R^d according to [Takens' Embedding Theorem] + (https://en.wikipedia.org/wiki/Takens%27s_theorem) and obtains the + coordinates of each point. + + Parameters + ---------- + dim : int, optional (default=3) + `d` of R^d to be embedded. + delay : int, optional (default=1) + Time-Delay embedding. + skip : int, optional (default=1) + How often to skip embedded points. + + Example + ------- + + Given delay=3 and skip=2, a point cloud which is obtained by embedding + a scalar time-series into R^3 is as follows:: + + time-series = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + point cloud = [[1, 4, 7], + [3, 6, 9]] + + Given delay=1 and skip=1, a point cloud which is obtained by embedding + a 2D vector time-series data into R^4 is as follows:: + + time-series = [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]] + point cloud = [[0, 1, 2, 3], + [2, 3, 4, 5], + [4, 5, 6, 7], + [6, 7, 8, 9]] + """ + + def __init__(self, dim=3, delay=1, skip=1): + self._dim = dim + self._delay = delay + self._skip = skip + + def __call__(self, ts): + """Transform method for single time-series data. + + Parameters + ---------- + ts : Iterable[float] or Iterable[Iterable[float]] + A single time-series data, with scalar or vector values. + + Returns + ------- + point cloud : n x dim numpy arrays + Makes point cloud from a single time-series data. + """ + return self._transform(np.array(ts)) + + def fit(self, ts, y=None): + return self + + def _transform(self, ts): + """Guts of transform method.""" + if ts.ndim == 1: + repeat = self._dim + else: + assert self._dim % ts.shape[1] == 0 + repeat = self._dim // ts.shape[1] + end = len(ts) - self._delay * (repeat - 1) + short = np.arange(0, end, self._skip) + vertical = np.arange(0, repeat * self._delay, self._delay) + return ts[np.add.outer(short, vertical)].reshape(len(short), -1) + + def transform(self, ts): + """Transform method for multiple time-series data. + + Parameters + ---------- + ts : Iterable[Iterable[float]] or Iterable[Iterable[Iterable[float]]] + Multiple time-series data, with scalar or vector values. + + Returns + ------- + point clouds : list of n x dim numpy arrays + Makes point cloud from each time-series data. + """ + return [self._transform(np.array(s)) for s in ts] diff --git a/src/python/gudhi/simplex_tree.pxd b/src/python/gudhi/simplex_tree.pxd index 96d14079..82f155de 100644 --- a/src/python/gudhi/simplex_tree.pxd +++ b/src/python/gudhi/simplex_tree.pxd @@ -21,6 +21,22 @@ cdef extern from "Simplex_tree_interface.h" namespace "Gudhi": cdef cppclass Simplex_tree_options_full_featured: pass + cdef cppclass Simplex_tree_simplex_handle "Gudhi::Simplex_tree_interface<Gudhi::Simplex_tree_options_full_featured>::Simplex_handle": + pass + + cdef cppclass Simplex_tree_simplices_iterator "Gudhi::Simplex_tree_interface<Gudhi::Simplex_tree_options_full_featured>::Complex_simplex_iterator": + Simplex_tree_simplices_iterator() + Simplex_tree_simplex_handle& operator*() + Simplex_tree_simplices_iterator operator++() + bint operator!=(Simplex_tree_simplices_iterator) + + cdef cppclass Simplex_tree_skeleton_iterator "Gudhi::Simplex_tree_interface<Gudhi::Simplex_tree_options_full_featured>::Skeleton_simplex_iterator": + Simplex_tree_skeleton_iterator() + Simplex_tree_simplex_handle& operator*() + Simplex_tree_skeleton_iterator operator++() + bint operator!=(Simplex_tree_skeleton_iterator) + + cdef cppclass Simplex_tree_interface_full_featured "Gudhi::Simplex_tree_interface<Gudhi::Simplex_tree_options_full_featured>": Simplex_tree() double simplex_filtration(vector[int] simplex) @@ -34,8 +50,6 @@ cdef extern from "Simplex_tree_interface.h" namespace "Gudhi": bool find_simplex(vector[int] simplex) bool insert_simplex_and_subfaces(vector[int] simplex, double filtration) - vector[pair[vector[int], double]] get_filtration() - vector[pair[vector[int], double]] get_skeleton(int dimension) vector[pair[vector[int], double]] get_star(vector[int] simplex) vector[pair[vector[int], double]] get_cofaces(vector[int] simplex, int dimension) @@ -43,6 +57,14 @@ cdef extern from "Simplex_tree_interface.h" namespace "Gudhi": void remove_maximal_simplex(vector[int] simplex) bool prune_above_filtration(double filtration) bool make_filtration_non_decreasing() + # Iterators over Simplex tree + pair[vector[int], double] get_simplex_and_filtration(Simplex_tree_simplex_handle f_simplex) + Simplex_tree_simplices_iterator get_simplices_iterator_begin() + Simplex_tree_simplices_iterator get_simplices_iterator_end() + vector[Simplex_tree_simplex_handle].const_iterator get_filtration_iterator_begin() + vector[Simplex_tree_simplex_handle].const_iterator get_filtration_iterator_end() + Simplex_tree_skeleton_iterator get_skeleton_iterator_begin(int dimension) + Simplex_tree_skeleton_iterator get_skeleton_iterator_end(int dimension) cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi": cdef cppclass Simplex_tree_persistence_interface "Gudhi::Persistent_cohomology_interface<Gudhi::Simplex_tree<Gudhi::Simplex_tree_options_full_featured>>": diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index b18627c4..c01cc905 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -7,6 +7,7 @@ # Modification(s): # - YYYY/MM Author: Description of the modification +from cython.operator import dereference, preincrement from libc.stdint cimport intptr_t from numpy import array as np_array cimport simplex_tree @@ -207,22 +208,34 @@ cdef class SimplexTree: return self.get_ptr().insert_simplex_and_subfaces(csimplex, <double>filtration) - def get_filtration(self): - """This function returns a list of all simplices with their given + def get_simplices(self): + """This function returns a generator with simplices and their given filtration values. + :returns: The simplices. + :rtype: generator with tuples(simplex, filtration) + """ + cdef Simplex_tree_simplices_iterator it = self.get_ptr().get_simplices_iterator_begin() + cdef Simplex_tree_simplices_iterator end = self.get_ptr().get_simplices_iterator_end() + cdef Simplex_tree_simplex_handle sh = dereference(it) + + while it != end: + yield self.get_ptr().get_simplex_and_filtration(dereference(it)) + preincrement(it) + + def get_filtration(self): + """This function returns a generator with simplices and their given + filtration values sorted by increasing filtration values. + :returns: The simplices sorted by increasing filtration values. - :rtype: list of tuples(simplex, filtration) + :rtype: generator with tuples(simplex, filtration) """ - cdef vector[pair[vector[int], double]] filtration \ - = self.get_ptr().get_filtration() - ct = [] - for filtered_complex in filtration: - v = [] - for vertex in filtered_complex.first: - v.append(vertex) - ct.append((v, filtered_complex.second)) - return ct + cdef vector[Simplex_tree_simplex_handle].const_iterator it = self.get_ptr().get_filtration_iterator_begin() + cdef vector[Simplex_tree_simplex_handle].const_iterator end = self.get_ptr().get_filtration_iterator_end() + + while it != end: + yield self.get_ptr().get_simplex_and_filtration(dereference(it)) + preincrement(it) def get_skeleton(self, dimension): """This function returns the (simplices of the) skeleton of a maximum @@ -233,15 +246,12 @@ cdef class SimplexTree: :returns: The (simplices of the) skeleton of a maximum dimension. :rtype: list of tuples(simplex, filtration) """ - cdef vector[pair[vector[int], double]] skeleton \ - = self.get_ptr().get_skeleton(<int>dimension) - ct = [] - for filtered_simplex in skeleton: - v = [] - for vertex in filtered_simplex.first: - v.append(vertex) - ct.append((v, filtered_simplex.second)) - return ct + cdef Simplex_tree_skeleton_iterator it = self.get_ptr().get_skeleton_iterator_begin(dimension) + cdef Simplex_tree_skeleton_iterator end = self.get_ptr().get_skeleton_iterator_end(dimension) + + while it != end: + yield self.get_ptr().get_simplex_and_filtration(dereference(it)) + preincrement(it) def get_star(self, simplex): """This function returns the star of a given N-simplex. diff --git a/src/python/gudhi/wasserstein.py b/src/python/gudhi/wasserstein.py index 13102094..3dd993f9 100644 --- a/src/python/gudhi/wasserstein.py +++ b/src/python/gudhi/wasserstein.py @@ -30,8 +30,10 @@ def _build_dist_matrix(X, Y, order=2., internal_p=2.): :param order: exponent for the Wasserstein metric. :param internal_p: Ground metric (i.e. norm L^p). :returns: (n+1) x (m+1) np.array encoding the cost matrix C. - For 1 <= i <= n, 1 <= j <= m, C[i,j] encodes the distance between X[i] and Y[j], while C[i, m+1] (resp. C[n+1, j]) encodes the distance (to the p) between X[i] (resp Y[j]) and its orthogonal proj onto the diagonal. - note also that C[n+1, m+1] = 0 (it costs nothing to move from the diagonal to the diagonal). + For 0 <= i < n, 0 <= j < m, C[i,j] encodes the distance between X[i] and Y[j], + while C[i, m] (resp. C[n, j]) encodes the distance (to the p) between X[i] (resp Y[j]) + and its orthogonal projection onto the diagonal. + note also that C[n, m] = 0 (it costs nothing to move from the diagonal to the diagonal). ''' Xdiag = _proj_on_diag(X) Ydiag = _proj_on_diag(Y) @@ -62,14 +64,20 @@ def _perstot(X, order, internal_p): return (np.sum(np.linalg.norm(X - Xdiag, ord=internal_p, axis=1)**order))**(1./order) -def wasserstein_distance(X, Y, order=2., internal_p=2.): +def wasserstein_distance(X, Y, matching=False, order=2., internal_p=2.): ''' - :param X: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points (i.e. with infinite coordinate). + :param X: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points + (i.e. with infinite coordinate). :param Y: (m x 2) numpy.array encoding the second diagram. + :param matching: if True, computes and returns the optimal matching between X and Y, encoded as + a (n x 2) np.array [...[i,j]...], meaning the i-th point in X is matched to + the j-th point in Y, with the convention (-1) represents the diagonal. :param order: exponent for Wasserstein; Default value is 2. - :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); Default value is 2 (Euclidean norm). - :returns: the Wasserstein distance of order q (1 <= q < infinity) between persistence diagrams with respect to the internal_p-norm as ground metric. - :rtype: float + :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); + Default value is 2 (Euclidean norm). + :returns: the Wasserstein distance of order q (1 <= q < infinity) between persistence diagrams with + respect to the internal_p-norm as ground metric. + If matching is set to True, also returns the optimal matching between X and Y. ''' n = len(X) m = len(Y) @@ -77,21 +85,40 @@ def wasserstein_distance(X, Y, order=2., internal_p=2.): # handle empty diagrams if X.size == 0: if Y.size == 0: - return 0. + if not matching: + return 0. + else: + return 0., np.array([]) else: - return _perstot(Y, order, internal_p) + if not matching: + return _perstot(Y, order, internal_p) + else: + return _perstot(Y, order, internal_p), np.array([[-1, j] for j in range(m)]) elif Y.size == 0: - return _perstot(X, order, internal_p) + if not matching: + return _perstot(X, order, internal_p) + else: + return _perstot(X, order, internal_p), np.array([[i, -1] for i in range(n)]) M = _build_dist_matrix(X, Y, order=order, internal_p=internal_p) - a = np.full(n+1, 1. / (n + m) ) # weight vector of the input diagram. Uniform here. - a[-1] = a[-1] * m # normalized so that we have a probability measure, required by POT - b = np.full(m+1, 1. / (n + m) ) # weight vector of the input diagram. Uniform here. - b[-1] = b[-1] * n # so that we have a probability measure, required by POT + a = np.ones(n+1) # weight vector of the input diagram. Uniform here. + a[-1] = m + b = np.ones(m+1) # weight vector of the input diagram. Uniform here. + b[-1] = n + + if matching: + P = ot.emd(a=a,b=b,M=M, numItermax=2000000) + ot_cost = np.sum(np.multiply(P,M)) + P[-1, -1] = 0 # Remove matching corresponding to the diagonal + match = np.argwhere(P) + # Now we turn to -1 points encoding the diagonal + match[:,0][match[:,0] >= n] = -1 + match[:,1][match[:,1] >= m] = -1 + return ot_cost ** (1./order) , match # Comptuation of the otcost using the ot.emd2 library. # Note: it is the Wasserstein distance to the power q. # The default numItermax=100000 is not sufficient for some examples with 5000 points, what is a good value? - ot_cost = (n+m) * ot.emd2(a, b, M, numItermax=2000000) + ot_cost = ot.emd2(a, b, M, numItermax=2000000) return ot_cost ** (1./order) |