merging master (including wasserstein distance) into wbary ; modified CMakeList to solve conflicts

author: tlacombe <lacombe1993@gmail.com> 2020-03-17 09:28:57 +0100
committer: tlacombe <lacombe1993@gmail.com> 2020-03-17 09:28:57 +0100
commit: 2de9709b63045c484aa1c53f72c870eb210880d9 (patch)
tree: 6350e1a4b45634747c4cc49ea53c4eb97af1493c /src/python/gudhi
parent: 80513805a453fadd137d606f501239de3e1ab12a (diff)
parent: bf5873f5dbef29ff9990131c00bdd6d32b7a2f85 (diff)
5 files changed, 192 insertions, 38 deletions
diff --git a/src/python/gudhi/point_cloud/__init__.py b/src/python/gudhi/point_cloud/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/src/python/gudhi/point_cloud/__init__.py
diff --git a/src/python/gudhi/point_cloud/timedelay.py b/src/python/gudhi/point_cloud/timedelay.py
new file mode 100644
index 00000000..f01df442
--- /dev/null
+++ b/src/python/gudhi/point_cloud/timedelay.py
@@ -0,0 +1,95 @@
+# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+# Author(s):       Martin Royer, Yuichi Ike, Masatoshi Takenouchi
+#
+# Copyright (C) 2020 Inria, Copyright (C) 2020 Fujitsu Laboratories Ltd.
+# Modification(s):
+#   - YYYY/MM Author: Description of the modification
+
+import numpy as np
+
+
+class TimeDelayEmbedding:
+    """Point cloud transformation class.
+    Embeds time-series data in the R^d according to [Takens' Embedding Theorem]
+    (https://en.wikipedia.org/wiki/Takens%27s_theorem) and obtains the 
+    coordinates of each point.
+
+    Parameters
+    ----------
+    dim : int, optional (default=3)
+        `d` of R^d to be embedded.
+    delay : int, optional (default=1)
+        Time-Delay embedding.
+    skip : int, optional (default=1)
+        How often to skip embedded points.
+
+    Example
+    -------
+
+    Given delay=3 and skip=2, a point cloud which is obtained by embedding
+    a scalar time-series into R^3 is as follows::
+
+        time-series = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+        point cloud = [[1, 4, 7],
+                       [3, 6, 9]]
+
+    Given delay=1 and skip=1, a point cloud which is obtained by embedding
+    a 2D vector time-series data into R^4 is as follows::
+
+        time-series = [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]]
+        point cloud = [[0, 1, 2, 3],
+                       [2, 3, 4, 5],
+                       [4, 5, 6, 7],
+                       [6, 7, 8, 9]]
+    """
+
+    def __init__(self, dim=3, delay=1, skip=1):
+        self._dim = dim
+        self._delay = delay
+        self._skip = skip
+
+    def __call__(self, ts):
+        """Transform method for single time-series data.
+
+        Parameters
+        ----------
+        ts : Iterable[float] or Iterable[Iterable[float]]
+            A single time-series data, with scalar or vector values.
+
+        Returns
+        -------
+        point cloud : n x dim numpy arrays
+            Makes point cloud from a single time-series data.
+        """
+        return self._transform(np.array(ts))
+
+    def fit(self, ts, y=None):
+        return self
+    
+    def _transform(self, ts):
+        """Guts of transform method."""
+        if ts.ndim == 1:
+            repeat = self._dim
+        else:
+            assert self._dim % ts.shape[1] == 0
+            repeat = self._dim // ts.shape[1]
+        end = len(ts) - self._delay * (repeat - 1)
+        short = np.arange(0, end, self._skip)
+        vertical = np.arange(0, repeat * self._delay, self._delay)
+        return ts[np.add.outer(short, vertical)].reshape(len(short), -1)
+
+    def transform(self, ts):
+        """Transform method for multiple time-series data.
+
+        Parameters
+        ----------
+        ts : Iterable[Iterable[float]] or Iterable[Iterable[Iterable[float]]]
+            Multiple time-series data, with scalar or vector values.
+
+        Returns
+        -------
+        point clouds : list of n x dim numpy arrays
+            Makes point cloud from each time-series data.
+        """
+        return [self._transform(np.array(s)) for s in ts]
diff --git a/src/python/gudhi/simplex_tree.pxd b/src/python/gudhi/simplex_tree.pxd
index 96d14079..82f155de 100644
--- a/src/python/gudhi/simplex_tree.pxd
+++ b/src/python/gudhi/simplex_tree.pxd
@@ -21,6 +21,22 @@ cdef extern from "Simplex_tree_interface.h" namespace "Gudhi":
     cdef cppclass Simplex_tree_options_full_featured:
         pass
 
+    cdef cppclass Simplex_tree_simplex_handle "Gudhi::Simplex_tree_interface<Gudhi::Simplex_tree_options_full_featured>::Simplex_handle":
+        pass
+
+    cdef cppclass Simplex_tree_simplices_iterator "Gudhi::Simplex_tree_interface<Gudhi::Simplex_tree_options_full_featured>::Complex_simplex_iterator":
+        Simplex_tree_simplices_iterator()
+        Simplex_tree_simplex_handle& operator*()
+        Simplex_tree_simplices_iterator operator++()
+        bint operator!=(Simplex_tree_simplices_iterator)
+
+    cdef cppclass Simplex_tree_skeleton_iterator "Gudhi::Simplex_tree_interface<Gudhi::Simplex_tree_options_full_featured>::Skeleton_simplex_iterator":
+        Simplex_tree_skeleton_iterator()
+        Simplex_tree_simplex_handle& operator*()
+        Simplex_tree_skeleton_iterator operator++()
+        bint operator!=(Simplex_tree_skeleton_iterator)
+
+
     cdef cppclass Simplex_tree_interface_full_featured "Gudhi::Simplex_tree_interface<Gudhi::Simplex_tree_options_full_featured>":
         Simplex_tree()
         double simplex_filtration(vector[int] simplex)
@@ -34,8 +50,6 @@ cdef extern from "Simplex_tree_interface.h" namespace "Gudhi":
         bool find_simplex(vector[int] simplex)
         bool insert_simplex_and_subfaces(vector[int] simplex,
                                          double filtration)
-        vector[pair[vector[int], double]] get_filtration()
-        vector[pair[vector[int], double]] get_skeleton(int dimension)
         vector[pair[vector[int], double]] get_star(vector[int] simplex)
         vector[pair[vector[int], double]] get_cofaces(vector[int] simplex,
                                                           int dimension)
@@ -43,6 +57,14 @@ cdef extern from "Simplex_tree_interface.h" namespace "Gudhi":
         void remove_maximal_simplex(vector[int] simplex)
         bool prune_above_filtration(double filtration)
         bool make_filtration_non_decreasing()
+        # Iterators over Simplex tree
+        pair[vector[int], double] get_simplex_and_filtration(Simplex_tree_simplex_handle f_simplex)
+        Simplex_tree_simplices_iterator get_simplices_iterator_begin()
+        Simplex_tree_simplices_iterator get_simplices_iterator_end()
+        vector[Simplex_tree_simplex_handle].const_iterator get_filtration_iterator_begin()
+        vector[Simplex_tree_simplex_handle].const_iterator get_filtration_iterator_end()
+        Simplex_tree_skeleton_iterator get_skeleton_iterator_begin(int dimension)
+        Simplex_tree_skeleton_iterator get_skeleton_iterator_end(int dimension)
 
 cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi":
     cdef cppclass Simplex_tree_persistence_interface "Gudhi::Persistent_cohomology_interface<Gudhi::Simplex_tree<Gudhi::Simplex_tree_options_full_featured>>":
diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx
index b18627c4..c01cc905 100644
--- a/src/python/gudhi/simplex_tree.pyx
+++ b/src/python/gudhi/simplex_tree.pyx
@@ -7,6 +7,7 @@
 # Modification(s):
 #   - YYYY/MM Author: Description of the modification
 
+from cython.operator import dereference, preincrement
 from libc.stdint cimport intptr_t
 from numpy import array as np_array
 cimport simplex_tree
@@ -207,22 +208,34 @@ cdef class SimplexTree:
         return self.get_ptr().insert_simplex_and_subfaces(csimplex,
                                                         <double>filtration)
 
-    def get_filtration(self):
-        """This function returns a list of all simplices with their given
+    def get_simplices(self):
+        """This function returns a generator with simplices and their given
         filtration values.
 
+        :returns:  The simplices.
+        :rtype:  generator with tuples(simplex, filtration)
+        """
+        cdef Simplex_tree_simplices_iterator it = self.get_ptr().get_simplices_iterator_begin()
+        cdef Simplex_tree_simplices_iterator end = self.get_ptr().get_simplices_iterator_end()
+        cdef Simplex_tree_simplex_handle sh = dereference(it)
+
+        while it != end:
+            yield self.get_ptr().get_simplex_and_filtration(dereference(it))
+            preincrement(it)
+
+    def get_filtration(self):
+        """This function returns a generator with simplices and their given
+        filtration values sorted by increasing filtration values.
+
         :returns:  The simplices sorted by increasing filtration values.
-        :rtype:  list of tuples(simplex, filtration)
+        :rtype:  generator with tuples(simplex, filtration)
         """
-        cdef vector[pair[vector[int], double]] filtration \
-            = self.get_ptr().get_filtration()
-        ct = []
-        for filtered_complex in filtration:
-            v = []
-            for vertex in filtered_complex.first:
-                v.append(vertex)
-            ct.append((v, filtered_complex.second))
-        return ct
+        cdef vector[Simplex_tree_simplex_handle].const_iterator it = self.get_ptr().get_filtration_iterator_begin()
+        cdef vector[Simplex_tree_simplex_handle].const_iterator end = self.get_ptr().get_filtration_iterator_end()
+
+        while it != end:
+            yield self.get_ptr().get_simplex_and_filtration(dereference(it))
+            preincrement(it)
 
     def get_skeleton(self, dimension):
         """This function returns the (simplices of the) skeleton of a maximum
@@ -233,15 +246,12 @@ cdef class SimplexTree:
         :returns:  The (simplices of the) skeleton of a maximum dimension.
         :rtype:  list of tuples(simplex, filtration)
         """
-        cdef vector[pair[vector[int], double]] skeleton \
-            = self.get_ptr().get_skeleton(<int>dimension)
-        ct = []
-        for filtered_simplex in skeleton:
-            v = []
-            for vertex in filtered_simplex.first:
-                v.append(vertex)
-            ct.append((v, filtered_simplex.second))
-        return ct
+        cdef Simplex_tree_skeleton_iterator it = self.get_ptr().get_skeleton_iterator_begin(dimension)
+        cdef Simplex_tree_skeleton_iterator end = self.get_ptr().get_skeleton_iterator_end(dimension)
+
+        while it != end:
+            yield self.get_ptr().get_simplex_and_filtration(dereference(it))
+            preincrement(it)
 
     def get_star(self, simplex):
         """This function returns the star of a given N-simplex.
diff --git a/src/python/gudhi/wasserstein.py b/src/python/gudhi/wasserstein.py
index 13102094..3dd993f9 100644
--- a/src/python/gudhi/wasserstein.py
+++ b/src/python/gudhi/wasserstein.py
@@ -30,8 +30,10 @@ def _build_dist_matrix(X, Y, order=2., internal_p=2.):
     :param order: exponent for the Wasserstein metric.
     :param internal_p: Ground metric (i.e. norm L^p).
     :returns: (n+1) x (m+1) np.array encoding the cost matrix C. 
-                For 1 <= i <= n, 1 <= j <= m, C[i,j] encodes the distance between X[i] and Y[j], while C[i, m+1] (resp. C[n+1, j]) encodes the distance (to the p) between X[i] (resp Y[j]) and its orthogonal proj onto the diagonal.
-                note also that C[n+1, m+1] = 0  (it costs nothing to move from the diagonal to the diagonal).
+                For 0 <= i < n, 0 <= j < m, C[i,j] encodes the distance between X[i] and Y[j], 
+                while C[i, m] (resp. C[n, j]) encodes the distance (to the p) between X[i] (resp Y[j]) 
+                and its orthogonal projection onto the diagonal.
+                note also that C[n, m] = 0  (it costs nothing to move from the diagonal to the diagonal).
     '''
     Xdiag = _proj_on_diag(X)
     Ydiag = _proj_on_diag(Y)
@@ -62,14 +64,20 @@ def _perstot(X, order, internal_p):
     return (np.sum(np.linalg.norm(X - Xdiag, ord=internal_p, axis=1)**order))**(1./order)
 
 
-def wasserstein_distance(X, Y, order=2., internal_p=2.):
+def wasserstein_distance(X, Y, matching=False, order=2., internal_p=2.):
     '''
-    :param X: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points (i.e. with infinite coordinate).
+    :param X: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points 
+                (i.e. with infinite coordinate).
     :param Y: (m x 2) numpy.array encoding the second diagram.
+    :param matching: if True, computes and returns the optimal matching between X and Y, encoded as
+                     a (n x 2) np.array  [...[i,j]...], meaning the i-th point in X is matched to
+                     the j-th point in Y, with the convention (-1) represents the diagonal.
     :param order: exponent for Wasserstein; Default value is 2.
-    :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); Default value is 2 (Euclidean norm).
-    :returns: the Wasserstein distance of order q (1 <= q < infinity) between persistence diagrams with respect to the internal_p-norm as ground metric.
-    :rtype: float
+    :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); 
+                       Default value is 2 (Euclidean norm).
+    :returns: the Wasserstein distance of order q (1 <= q < infinity) between persistence diagrams with 
+              respect to the internal_p-norm as ground metric.
+              If matching is set to True, also returns the optimal matching between X and Y.
     '''
     n = len(X)
     m = len(Y)
@@ -77,21 +85,40 @@ def wasserstein_distance(X, Y, order=2., internal_p=2.):
     # handle empty diagrams
     if X.size == 0:
         if Y.size == 0:
-            return 0.
+            if not matching:
+                return 0.
+            else:
+                return 0., np.array([])
         else:
-            return _perstot(Y, order, internal_p)
+            if not matching:
+                return _perstot(Y, order, internal_p)
+            else:
+                return _perstot(Y, order, internal_p), np.array([[-1, j] for j in range(m)])
     elif Y.size == 0:
-        return _perstot(X, order, internal_p)
+        if not matching:
+            return _perstot(X, order, internal_p)
+        else:
+            return _perstot(X, order, internal_p), np.array([[i, -1] for i in range(n)])
 
     M = _build_dist_matrix(X, Y, order=order, internal_p=internal_p)
-    a = np.full(n+1, 1. / (n + m) )  # weight vector of the input diagram. Uniform here.
-    a[-1] = a[-1] * m                # normalized so that we have a probability measure, required by POT
-    b = np.full(m+1, 1. / (n + m) )  # weight vector of the input diagram. Uniform here.
-    b[-1] = b[-1] * n                # so that we have a probability measure, required by POT
+    a = np.ones(n+1) # weight vector of the input diagram. Uniform here.
+    a[-1] = m
+    b = np.ones(m+1) # weight vector of the input diagram. Uniform here.
+    b[-1] = n
+
+    if matching:
+        P = ot.emd(a=a,b=b,M=M, numItermax=2000000)
+        ot_cost = np.sum(np.multiply(P,M))
+        P[-1, -1] = 0  # Remove matching corresponding to the diagonal
+        match = np.argwhere(P)
+        # Now we turn to -1 points encoding the diagonal
+        match[:,0][match[:,0] >= n] = -1
+        match[:,1][match[:,1] >= m] = -1
+        return ot_cost ** (1./order) , match
 
     # Comptuation of the otcost using the ot.emd2 library.
     # Note: it is the Wasserstein distance to the power q.
     # The default numItermax=100000 is not sufficient for some examples with 5000 points, what is a good value?
-    ot_cost = (n+m) * ot.emd2(a, b, M, numItermax=2000000)
+    ot_cost = ot.emd2(a, b, M, numItermax=2000000)
 
     return ot_cost ** (1./order)
author	tlacombe <lacombe1993@gmail.com>	2020-03-17 09:28:57 +0100
committer	tlacombe <lacombe1993@gmail.com>	2020-03-17 09:28:57 +0100
commit	2de9709b63045c484aa1c53f72c870eb210880d9 (patch)
tree	6350e1a4b45634747c4cc49ea53c4eb97af1493c /src/python/gudhi
parent	80513805a453fadd137d606f501239de3e1ab12a (diff)
parent	bf5873f5dbef29ff9990131c00bdd6d32b7a2f85 (diff)