Merge branch 'master' of github.com:rflamary/POT

author: Rémi Flamary <remi.flamary@gmail.com> 2018-02-20 16:11:56 +0100
committer: Rémi Flamary <remi.flamary@gmail.com> 2018-02-20 16:11:56 +0100
commit: 6d9b281271167d3676538f2ef8518abea82ef9c8 (patch)
tree: 7d1fae1d15a0ec70e229819a68b9f3a1ceea8f02 /ot
parent: 806a406e1ca2e9ca0bfdfe0516c75865e8098205 (diff)
parent: 5ff8030ce300f3d066e1edba2b36e60709b023b8 (diff)
6 files changed, 474 insertions, 120 deletions
diff --git a/ot/__init__.py b/ot/__init__.py
index a5df43d..1500e59 100644
--- a/ot/__init__.py
+++ b/ot/__init__.py
@@ -16,7 +16,6 @@ from . import bregman
 from . import optim
 from . import utils
 from . import datasets
-from . import plot
 from . import da
 from . import gromov
 
@@ -24,7 +23,6 @@ from . import gromov
 from .lp import emd, emd2
 from .bregman import sinkhorn, sinkhorn2, barycenter
 from .da import sinkhorn_lpl1_mm
-from .gromov import gromov_wasserstein, gromov_wasserstein2
 
 # utils functions
 from .utils import dist, unif, tic, toc, toq
@@ -32,6 +30,5 @@ from .utils import dist, unif, tic, toc, toq
 __version__ = "0.4.0"
 
 __all__ = ["emd", "emd2", "sinkhorn", "sinkhorn2", "utils", 'datasets',
-           'bregman', 'lp', 'plot', 'tic', 'toc', 'toq',
-           'dist', 'unif', 'barycenter', 'sinkhorn_lpl1_mm', 'da', 'optim', 
-           'gromov_wasserstein','gromov_wasserstein2']
+           'bregman', 'lp', 'tic', 'toc', 'toq', 'gromov',
+           'dist', 'unif', 'barycenter', 'sinkhorn_lpl1_mm', 'da', 'optim']
diff --git a/ot/da.py b/ot/da.py
index 1d3d0ba..c688654 100644
--- a/ot/da.py
+++ b/ot/da.py
@@ -330,17 +330,17 @@ def joint_OT_mapping_linear(xs, xt, mu=1, eta=0.001, bias=False, verbose=False,
     if bias:
         xs1 = np.hstack((xs, np.ones((ns, 1))))
         xstxs = xs1.T.dot(xs1)
-        I = np.eye(d + 1)
-        I[-1] = 0
-        I0 = I[:, :-1]
+        Id = np.eye(d + 1)
+        Id[-1] = 0
+        I0 = Id[:, :-1]
 
         def sel(x):
             return x[:-1, :]
     else:
         xs1 = xs
         xstxs = xs1.T.dot(xs1)
-        I = np.eye(d)
-        I0 = I
+        Id = np.eye(d)
+        I0 = Id
 
         def sel(x):
             return x
@@ -361,7 +361,7 @@ def joint_OT_mapping_linear(xs, xt, mu=1, eta=0.001, bias=False, verbose=False,
     def solve_L(G):
         """ solve L problem with fixed G (least square)"""
         xst = ns * G.dot(xt)
-        return np.linalg.solve(xstxs + eta * I, xs1.T.dot(xst) + eta * I0)
+        return np.linalg.solve(xstxs + eta * Id, xs1.T.dot(xst) + eta * I0)
 
     def solve_G(L, G0):
         """Update G with CG algorithm"""
@@ -520,8 +520,8 @@ def joint_OT_mapping_kernel(xs, xt, mu=1, eta=0.001, kerneltype='gaussian',
     K = kernel(xs, xs, method=kerneltype, sigma=sigma)
     if bias:
         K1 = np.hstack((K, np.ones((ns, 1))))
-        I = np.eye(ns + 1)
-        I[-1] = 0
+        Id = np.eye(ns + 1)
+        Id[-1] = 0
         Kp = np.eye(ns + 1)
         Kp[:ns, :ns] = K
 
@@ -535,14 +535,14 @@ def joint_OT_mapping_kernel(xs, xt, mu=1, eta=0.001, kerneltype='gaussian',
 
     else:
         K1 = K
-        I = np.eye(ns)
+        Id = np.eye(ns)
 
         # ls regul
         # K0 = K1.T.dot(K1)+eta*I
         # Kreg=I
 
         # proper kernel ridge
-        K0 = K + eta * I
+        K0 = K + eta * Id
         Kreg = K
 
     if log:
@@ -933,6 +933,7 @@ def distribution_estimation_uniform(X):
 
 
 class BaseTransport(BaseEstimator):
+
     """Base class for OTDA objects
 
     Notes
@@ -1180,6 +1181,7 @@ class BaseTransport(BaseEstimator):
 
 
 class SinkhornTransport(BaseTransport):
+
     """Domain Adapatation OT method based on Sinkhorn Algorithm
 
     Parameters
@@ -1289,6 +1291,7 @@ class SinkhornTransport(BaseTransport):
 
 
 class EMDTransport(BaseTransport):
+
     """Domain Adapatation OT method based on Earth Mover's Distance
 
     Parameters
@@ -1377,6 +1380,7 @@ class EMDTransport(BaseTransport):
 
 
 class SinkhornLpl1Transport(BaseTransport):
+
     """Domain Adapatation OT method based on sinkhorn algorithm +
     LpL1 class regularization.
 
@@ -1486,6 +1490,7 @@ class SinkhornLpl1Transport(BaseTransport):
 
 
 class SinkhornL1l2Transport(BaseTransport):
+
     """Domain Adapatation OT method based on sinkhorn algorithm +
     l1l2 class regularization.
 
@@ -1608,6 +1613,7 @@ class SinkhornL1l2Transport(BaseTransport):
 
 
 class MappingTransport(BaseEstimator):
+
     """MappingTransport: DA methods that aims at jointly estimating a optimal
     transport coupling and the associated mapping
 
diff --git a/ot/gpu/__init__.py b/ot/gpu/__init__.py
index c8f9433..a2fdd3d 100644
--- a/ot/gpu/__init__.py
+++ b/ot/gpu/__init__.py
@@ -5,7 +5,7 @@ from . import da
 from .bregman import sinkhorn
 
 # Author: Remi Flamary <remi.flamary@unice.fr>
-#         Leo Gautheron <https://github.com/aje> 
+#         Leo Gautheron <https://github.com/aje>
 #
 # License: MIT License
 
diff --git a/ot/gpu/da.py b/ot/gpu/da.py
index 05c580f..71a485a 100644
--- a/ot/gpu/da.py
+++ b/ot/gpu/da.py
@@ -188,6 +188,7 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10,
 
 
 class OTDA_GPU(OTDA):
+
     def normalizeM(self, norm):
         if norm == "median":
             self.M_GPU.divide(float(np.median(self.M_GPU.asarray())))
@@ -204,6 +205,7 @@ class OTDA_GPU(OTDA):
 
 
 class OTDA_sinkhorn(OTDA_GPU):
+
     def fit(self, xs, xt, reg=1, ws=None, wt=None, norm=None, **kwargs):
         cudamat.init()
         xs = np.asarray(xs, dtype=np.float64)
@@ -228,6 +230,7 @@ class OTDA_sinkhorn(OTDA_GPU):
 
 
 class OTDA_lpl1(OTDA_GPU):
+
     def fit(self, xs, ys, xt, reg=1, eta=1, ws=None, wt=None, norm=None,
             **kwargs):
         cudamat.init()
diff --git a/ot/gromov.py b/ot/gromov.py
index 20bf7ee..2a23873 100644
--- a/ot/gromov.py
+++ b/ot/gromov.py
@@ -9,6 +9,7 @@ Gromov-Wasserstein transport method
 
 # Author: Erwan Vautier <erwan.vautier@gmail.com>
 #         Nicolas Courty <ncourty@irisa.fr>
+#         Rémi Flamary <remi.flamary@unice.fr>
 #
 # License: MIT License
 
@@ -16,40 +17,35 @@ import numpy as np
 
 from .bregman import sinkhorn
 from .utils import dist
+from .optim import cg
 
 
-def square_loss(a, b):
-    """
-    Returns the value of L(a,b)=(1/2)*|a-b|^2
-    """
-
-    return 0.5 * (a - b)**2
+def init_matrix(C1, C2, T, p, q, loss_fun='square_loss'):
+    """ Return loss matrices and tensors for Gromov-Wasserstein fast computation
 
-
-def kl_loss(a, b):
-    """
-    Returns the value of L(a,b)=a*log(a/b)-a+b
-    """
-
-    return a * np.log(a / b) - a + b
-
-
-def tensor_square_loss(C1, C2, T):
-    """
-    Returns the value of \mathcal{L}(C1,C2) \otimes T with the square loss
+    Returns the value of \mathcal{L}(C1,C2) \otimes T with the selected loss
     function as the loss function of Gromow-Wasserstein discrepancy.
 
+    The matrices are computed as described in Proposition 1 in [12]
+
     Where :
-        C1 : Metric cost matrix in the source space
-        C2 : Metric cost matrix in the target space
-        T : A coupling between those two spaces
+        * C1 : Metric cost matrix in the source space
+        * C2 : Metric cost matrix in the target space
+        * T : A coupling between those two spaces
 
     The square-loss function L(a,b)=(1/2)*|a-b|^2 is read as :
         L(a,b) = f1(a)+f2(b)-h1(a)*h2(b) with :
-            f1(a)=(a^2)/2
-            f2(b)=(b^2)/2
-            h1(a)=a
-            h2(b)=b
+            * f1(a)=(a^2)/2
+            * f2(b)=(b^2)/2
+            * h1(a)=a
+            * h2(b)=b
+
+    The kl-loss function L(a,b)=(1/2)*|a-b|^2 is read as :
+        L(a,b) = f1(a)+f2(b)-h1(a)*h2(b) with :
+            * f1(a)=a*log(a)-a
+            * f2(b)=b
+            * h1(a)=a
+            * h2(b)=log(b)
 
     Parameters
     ----------
@@ -57,66 +53,83 @@ def tensor_square_loss(C1, C2, T):
          Metric cost matrix in the source space
     C2 : ndarray, shape (nt, nt)
          Metric costfr matrix in the target space
-    T : ndarray, shape (ns, nt)
+    T :  ndarray, shape (ns, nt)
          Coupling between source and target spaces
+    p : ndarray, shape (ns,)
+
 
     Returns
     -------
-    tens : ndarray, shape (ns, nt)
-           \mathcal{L}(C1,C2) \otimes T tensor-matrix multiplication result
+
+    constC : ndarray, shape (ns, nt)
+           Constant C matrix in Eq. (6)
+    hC1 : ndarray, shape (ns, ns)
+           h1(C1) matrix in Eq. (6)
+    hC2 : ndarray, shape (nt, nt)
+           h2(C) matrix in Eq. (6)
+
+    References
+    ----------
+    .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
+    "Gromov-Wasserstein averaging of kernel and distance matrices."
+    International Conference on Machine Learning (ICML). 2016.
+
     """
 
-    C1 = np.asarray(C1, dtype=np.float64)
-    C2 = np.asarray(C2, dtype=np.float64)
-    T = np.asarray(T, dtype=np.float64)
+    if loss_fun == 'square_loss':
+        def f1(a):
+            return (a**2) / 2
 
-    def f1(a):
-        return (a**2) / 2
+        def f2(b):
+            return (b**2) / 2
 
-    def f2(b):
-        return (b**2) / 2
+        def h1(a):
+            return a
 
-    def h1(a):
-        return a
+        def h2(b):
+            return b
+    elif loss_fun == 'kl_loss':
+        def f1(a):
+            return a * np.log(a + 1e-15) - a
 
-    def h2(b):
-        return b
+        def f2(b):
+            return b
 
-    tens = -np.dot(h1(C1), T).dot(h2(C2).T)
-    tens -= tens.min()
+        def h1(a):
+            return a
 
-    return tens
+        def h2(b):
+            return np.log(b + 1e-15)
 
+    constC1 = np.dot(np.dot(f1(C1), p.reshape(-1, 1)),
+                     np.ones(len(q)).reshape(1, -1))
+    constC2 = np.dot(np.ones(len(p)).reshape(-1, 1),
+                     np.dot(q.reshape(1, -1), f2(C2).T))
+    constC = constC1 + constC2
+    hC1 = h1(C1)
+    hC2 = h2(C2)
 
-def tensor_kl_loss(C1, C2, T):
-    """
-    Returns the value of \mathcal{L}(C1,C2) \otimes T with the square loss
-    function as the loss function of Gromow-Wasserstein discrepancy.
+    return constC, hC1, hC2
 
-    Where :
 
-        C1 : Metric cost matrix in the source space
-        C2 : Metric cost matrix in the target space
-        T : A coupling between those two spaces
+def tensor_product(constC, hC1, hC2, T):
+    """ Return the tensor for Gromov-Wasserstein fast computation
 
-    The square-loss function L(a,b)=(1/2)*|a-b|^2 is read as :
-        L(a,b) = f1(a)+f2(b)-h1(a)*h2(b) with :
-            f1(a)=a*log(a)-a
-            f2(b)=b
-            h1(a)=a
-            h2(b)=log(b)
+    The tensor is computed as described in Proposition 1 Eq. (6) in [12].
 
     Parameters
     ----------
-    C1 : ndarray, shape (ns, ns)
-         Metric cost matrix in the source space
-    C2 : ndarray, shape (nt, nt)
-         Metric costfr matrix in the target space
-    T :  ndarray, shape (ns, nt)
-         Coupling between source and target spaces
+    constC : ndarray, shape (ns, nt)
+           Constant C matrix in Eq. (6)
+    hC1 : ndarray, shape (ns, ns)
+           h1(C1) matrix in Eq. (6)
+    hC2 : ndarray, shape (nt, nt)
+           h2(C) matrix in Eq. (6)
+
 
     Returns
     -------
+
     tens : ndarray, shape (ns, nt)
            \mathcal{L}(C1,C2) \otimes T tensor-matrix multiplication result
 
@@ -127,27 +140,78 @@ def tensor_kl_loss(C1, C2, T):
     International Conference on Machine Learning (ICML). 2016.
 
     """
+    A = -np.dot(hC1, T).dot(hC2.T)
+    tens = constC + A
+    # tens -= tens.min()
+    return tens
 
-    C1 = np.asarray(C1, dtype=np.float64)
-    C2 = np.asarray(C2, dtype=np.float64)
-    T = np.asarray(T, dtype=np.float64)
 
-    def f1(a):
-        return a * np.log(a + 1e-15) - a
+def gwloss(constC, hC1, hC2, T):
+    """ Return the Loss for Gromov-Wasserstein
 
-    def f2(b):
-        return b
+    The loss is computed as described in Proposition 1 Eq. (6) in [12].
 
-    def h1(a):
-        return a
+    Parameters
+    ----------
+    constC : ndarray, shape (ns, nt)
+           Constant C matrix in Eq. (6)
+    hC1 : ndarray, shape (ns, ns)
+           h1(C1) matrix in Eq. (6)
+    hC2 : ndarray, shape (nt, nt)
+           h2(C) matrix in Eq. (6)
+    T : ndarray, shape (ns, nt)
+           Current value of transport matrix T
 
-    def h2(b):
-        return np.log(b + 1e-15)
+    Returns
+    -------
 
-    tens = -np.dot(h1(C1), T).dot(h2(C2).T)
-    tens -= tens.min()
+    loss : float
+           Gromov Wasserstein loss
 
-    return tens
+    References
+    ----------
+    .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
+    "Gromov-Wasserstein averaging of kernel and distance matrices."
+    International Conference on Machine Learning (ICML). 2016.
+
+    """
+
+    tens = tensor_product(constC, hC1, hC2, T)
+
+    return np.sum(tens * T)
+
+
+def gwggrad(constC, hC1, hC2, T):
+    """ Return the gradient for Gromov-Wasserstein
+
+    The gradient is computed as described in Proposition 2 in [12].
+
+    Parameters
+    ----------
+    constC : ndarray, shape (ns, nt)
+           Constant C matrix in Eq. (6)
+    hC1 : ndarray, shape (ns, ns)
+           h1(C1) matrix in Eq. (6)
+    hC2 : ndarray, shape (nt, nt)
+           h2(C) matrix in Eq. (6)
+    T : ndarray, shape (ns, nt)
+           Current value of transport matrix T
+
+    Returns
+    -------
+
+    grad : ndarray, shape (ns, nt)
+           Gromov Wasserstein gradient
+
+    References
+    ----------
+    .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
+    "Gromov-Wasserstein averaging of kernel and distance matrices."
+    International Conference on Machine Learning (ICML). 2016.
+
+    """
+    return 2 * tensor_product(constC, hC1, hC2,
+                              T)  # [12] Prop. 2 misses a 2 factor
 
 
 def update_square_loss(p, lambdas, T, Cs):
@@ -205,10 +269,169 @@ def update_kl_loss(p, lambdas, T, Cs):
     return np.exp(np.divide(tmpsum, ppt))
 
 
-def gromov_wasserstein(C1, C2, p, q, loss_fun, epsilon,
-                       max_iter=1000, tol=1e-9, verbose=False, log=False):
+def gromov_wasserstein(C1, C2, p, q, loss_fun, log=False, **kwargs):
     """
-    Returns the gromov-wasserstein coupling between the two measured similarity matrices
+    Returns the gromov-wasserstein transport between (C1,p) and (C2,q)
+
+    The function solves the following optimization problem:
+
+    .. math::
+        \GW_Dist = \min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
+
+    Where :
+        C1 : Metric cost matrix in the source space
+        C2 : Metric cost matrix in the target space
+        p  : distribution in the source space
+        q  : distribution in the target space
+        L  : loss function to account for the misfit between the similarity matrices
+        H  : entropy
+
+    Parameters
+    ----------
+    C1 : ndarray, shape (ns, ns)
+         Metric cost matrix in the source space
+    C2 : ndarray, shape (nt, nt)
+         Metric costfr matrix in the target space
+    p :  ndarray, shape (ns,)
+         distribution in the source space
+    q :  ndarray, shape (nt,)
+         distribution in the target space
+    loss_fun :  string
+        loss function used for the solver either 'square_loss' or 'kl_loss'
+
+    max_iter : int, optional
+        Max number of iterations
+    tol : float, optional
+        Stop threshold on error (>0)
+    verbose : bool, optional
+        Print information along iterations
+    log : bool, optional
+        record log if True
+    **kwargs : dict
+        parameters can be directly pased to the ot.optim.cg solver
+
+    Returns
+    -------
+    T : ndarray, shape (ns, nt)
+        coupling between the two spaces that minimizes :
+            \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
+    log : dict
+        convergence information and loss
+
+    References
+    ----------
+    .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
+        "Gromov-Wasserstein averaging of kernel and distance matrices."
+        International Conference on Machine Learning (ICML). 2016.
+
+    .. [13] Mémoli, Facundo. Gromov–Wasserstein distances and the
+        metric approach to object matching. Foundations of computational
+        mathematics 11.4 (2011): 417-487.
+
+    """
+
+    T = np.eye(len(p), len(q))
+
+    constC, hC1, hC2 = init_matrix(C1, C2, T, p, q, loss_fun)
+
+    G0 = p[:, None] * q[None, :]
+
+    def f(G):
+        return gwloss(constC, hC1, hC2, G)
+
+    def df(G):
+        return gwggrad(constC, hC1, hC2, G)
+
+    if log:
+        res, log = cg(p, q, 0, 1, f, df, G0, log=True, **kwargs)
+        log['gw_dist'] = gwloss(constC, hC1, hC2, res)
+        return res, log
+    else:
+        return cg(p, q, 0, 1, f, df, G0, **kwargs)
+
+
+def gromov_wasserstein2(C1, C2, p, q, loss_fun, log=False, **kwargs):
+    """
+    Returns the gromov-wasserstein discrepancy between (C1,p) and (C2,q)
+
+    The function solves the following optimization problem:
+
+    .. math::
+        \GW_Dist = \min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
+
+    Where :
+        C1 : Metric cost matrix in the source space
+        C2 : Metric cost matrix in the target space
+        p  : distribution in the source space
+        q  : distribution in the target space
+        L  : loss function to account for the misfit between the similarity matrices
+        H  : entropy
+
+    Parameters
+    ----------
+    C1 : ndarray, shape (ns, ns)
+         Metric cost matrix in the source space
+    C2 : ndarray, shape (nt, nt)
+         Metric costfr matrix in the target space
+    p :  ndarray, shape (ns,)
+         distribution in the source space
+    q :  ndarray, shape (nt,)
+         distribution in the target space
+    loss_fun :  string
+        loss function used for the solver either 'square_loss' or 'kl_loss'
+
+    max_iter : int, optional
+        Max number of iterations
+    tol : float, optional
+        Stop threshold on error (>0)
+    verbose : bool, optional
+        Print information along iterations
+    log : bool, optional
+        record log if True
+
+    Returns
+    -------
+    gw_dist : float
+        Gromov-Wasserstein distance
+    log : dict
+        convergence information and Coupling marix
+
+    References
+    ----------
+    .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
+        "Gromov-Wasserstein averaging of kernel and distance matrices."
+        International Conference on Machine Learning (ICML). 2016.
+
+    .. [13] Mémoli, Facundo. Gromov–Wasserstein distances and the
+        metric approach to object matching. Foundations of computational
+        mathematics 11.4 (2011): 417-487.
+
+    """
+
+    T = np.eye(len(p), len(q))
+
+    constC, hC1, hC2 = init_matrix(C1, C2, T, p, q, loss_fun)
+
+    G0 = p[:, None] * q[None, :]
+
+    def f(G):
+        return gwloss(constC, hC1, hC2, G)
+
+    def df(G):
+        return gwggrad(constC, hC1, hC2, G)
+    res, log = cg(p, q, 0, 1, f, df, G0, log=True, **kwargs)
+    log['gw_dist'] = gwloss(constC, hC1, hC2, res)
+    log['T'] = res
+    if log:
+        return log['gw_dist'], log
+    else:
+        return log['gw_dist']
+
+
+def entropic_gromov_wasserstein(C1, C2, p, q, loss_fun, epsilon,
+                                max_iter=1000, tol=1e-9, verbose=False, log=False):
+    """
+    Returns the gromov-wasserstein transport between (C1,p) and (C2,q)
 
     (C1,p) and (C2,q)
 
@@ -259,6 +482,13 @@ def gromov_wasserstein(C1, C2, p, q, loss_fun, epsilon,
     T : ndarray, shape (ns, nt)
         coupling between the two spaces that minimizes :
             \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}-\epsilon(H(T))
+
+    References
+    ----------
+    .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
+    "Gromov-Wasserstein averaging of kernel and distance matrices."
+    International Conference on Machine Learning (ICML). 2016.
+
     """
 
     C1 = np.asarray(C1, dtype=np.float64)
@@ -266,18 +496,20 @@ def gromov_wasserstein(C1, C2, p, q, loss_fun, epsilon,
 
     T = np.outer(p, q)  # Initialization
 
+    constC, hC1, hC2 = init_matrix(C1, C2, T, p, q, loss_fun)
+
     cpt = 0
     err = 1
 
+    if log:
+        log = {'err': []}
+
     while (err > tol and cpt < max_iter):
 
         Tprev = T
 
-        if loss_fun == 'square_loss':
-            tens = tensor_square_loss(C1, C2, T)
-
-        elif loss_fun == 'kl_loss':
-            tens = tensor_kl_loss(C1, C2, T)
+        # compute the gradient
+        tens = gwggrad(constC, hC1, hC2, T)
 
         T = sinkhorn(p, q, tens, epsilon)
 
@@ -298,15 +530,16 @@ def gromov_wasserstein(C1, C2, p, q, loss_fun, epsilon,
         cpt += 1
 
     if log:
+        log['gw_dist'] = gwloss(constC, hC1, hC2, T)
         return T, log
     else:
         return T
 
 
-def gromov_wasserstein2(C1, C2, p, q, loss_fun, epsilon,
-                        max_iter=1000, tol=1e-9, verbose=False, log=False):
+def entropic_gromov_wasserstein2(C1, C2, p, q, loss_fun, epsilon,
+                                 max_iter=1000, tol=1e-9, verbose=False, log=False):
     """
-    Returns the gromov-wasserstein discrepancy between the two measured similarity matrices
+    Returns the entropic gromov-wasserstein discrepancy between the two measured similarity matrices
 
     (C1,p) and (C2,q)
 
@@ -350,29 +583,28 @@ def gromov_wasserstein2(C1, C2, p, q, loss_fun, epsilon,
     -------
     gw_dist : float
         Gromov-Wasserstein distance
-    """
 
-    if log:
-        gw, logv = gromov_wasserstein(
-            C1, C2, p, q, loss_fun, epsilon, max_iter, tol, verbose, log)
-    else:
-        gw = gromov_wasserstein(C1, C2, p, q, loss_fun,
-                                epsilon, max_iter, tol, verbose, log)
+    References
+    ----------
+    .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
+    "Gromov-Wasserstein averaging of kernel and distance matrices."
+    International Conference on Machine Learning (ICML). 2016.
 
-    if loss_fun == 'square_loss':
-        gw_dist = np.sum(gw * tensor_square_loss(C1, C2, gw))
+    """
 
-    elif loss_fun == 'kl_loss':
-        gw_dist = np.sum(gw * tensor_kl_loss(C1, C2, gw))
+    gw, logv = entropic_gromov_wasserstein(
+        C1, C2, p, q, loss_fun, epsilon, max_iter, tol, verbose, log=True)
+
+    log['T'] = gw
 
     if log:
-        return gw_dist, logv
+        return logv['gw_dist'], logv
     else:
-        return gw_dist
+        return logv['gw_dist']
 
 
-def gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon,
-                       max_iter=1000, tol=1e-9, verbose=False, log=False, init_C=None):
+def entropic_gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon,
+                                max_iter=1000, tol=1e-9, verbose=False, log=False, init_C=None):
     """
     Returns the gromov-wasserstein barycenters of S measured similarity matrices
 
@@ -421,6 +653,120 @@ def gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon,
     -------
     C : ndarray, shape (N, N)
         Similarity matrix in the barycenter space (permutated arbitrarily)
+
+    References
+    ----------
+    .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
+    "Gromov-Wasserstein averaging of kernel and distance matrices."
+    International Conference on Machine Learning (ICML). 2016.
+
+    """
+
+    S = len(Cs)
+
+    Cs = [np.asarray(Cs[s], dtype=np.float64) for s in range(S)]
+    lambdas = np.asarray(lambdas, dtype=np.float64)
+
+    # Initialization of C : random SPD matrix (if not provided by user)
+    if init_C is None:
+        xalea = np.random.randn(N, 2)
+        C = dist(xalea, xalea)
+        C /= C.max()
+    else:
+        C = init_C
+
+    cpt = 0
+    err = 1
+
+    error = []
+
+    while(err > tol and cpt < max_iter):
+        Cprev = C
+
+        T = [entropic_gromov_wasserstein(Cs[s], C, ps[s], p, loss_fun, epsilon,
+                                         max_iter, 1e-5, verbose, log) for s in range(S)]
+        if loss_fun == 'square_loss':
+            C = update_square_loss(p, lambdas, T, Cs)
+
+        elif loss_fun == 'kl_loss':
+            C = update_kl_loss(p, lambdas, T, Cs)
+
+        if cpt % 10 == 0:
+            # we can speed up the process by checking for the error only all
+            # the 10th iterations
+            err = np.linalg.norm(C - Cprev)
+            error.append(err)
+
+            if log:
+                log['err'].append(err)
+
+            if verbose:
+                if cpt % 200 == 0:
+                    print('{:5s}|{:12s}'.format(
+                        'It.', 'Err') + '\n' + '-' * 19)
+                print('{:5d}|{:8e}|'.format(cpt, err))
+
+        cpt += 1
+
+    return C
+
+
+def gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun,
+                       max_iter=1000, tol=1e-9, verbose=False, log=False, init_C=None):
+    """
+    Returns the gromov-wasserstein barycenters of S measured similarity matrices
+
+    (Cs)_{s=1}^{s=S}
+
+    The function solves the following optimization problem with block
+    coordinate descent:
+
+    .. math::
+        C = argmin_C\in R^NxN \sum_s \lambda_s GW(C,Cs,p,ps)
+
+
+    Where :
+
+        Cs : metric cost matrix
+        ps  : distribution
+
+    Parameters
+    ----------
+    N  : Integer
+         Size of the targeted barycenter
+    Cs : list of S np.ndarray(ns,ns)
+         Metric cost matrices
+    ps : list of S np.ndarray(ns,)
+         sample weights in the S spaces
+    p  : ndarray, shape(N,)
+         weights in the targeted barycenter
+    lambdas : list of float
+              list of the S spaces' weights
+    loss_fun :  tensor-matrix multiplication function based on specific loss function
+    update : function(p,lambdas,T,Cs) that updates C according to a specific Kernel
+             with the S Ts couplings calculated at each iteration
+    max_iter : int, optional
+        Max number of iterations
+    tol : float, optional
+        Stop threshol on error (>0)
+    verbose : bool, optional
+        Print information along iterations
+    log : bool, optional
+        record log if True
+    init_C : bool, ndarray, shape(N,N)
+             random initial value for the C matrix provided by user
+
+    Returns
+    -------
+    C : ndarray, shape (N, N)
+        Similarity matrix in the barycenter space (permutated arbitrarily)
+
+    References
+    ----------
+    .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
+    "Gromov-Wasserstein averaging of kernel and distance matrices."
+    International Conference on Machine Learning (ICML). 2016.
+
     """
 
     S = len(Cs)
@@ -444,8 +790,8 @@ def gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon,
     while(err > tol and cpt < max_iter):
         Cprev = C
 
-        T = [gromov_wasserstein(Cs[s], C, ps[s], p, loss_fun, epsilon,
-                                max_iter, 1e-5, verbose, log) for s in range(S)]
+        T = [gromov_wasserstein(Cs[s], C, ps[s], p, loss_fun,
+                                numItermax=max_iter, stopThr=1e-5, verbose=verbose, log=log) for s in range(S)]
         if loss_fun == 'square_loss':
             C = update_square_loss(p, lambdas, T, Cs)
 
diff --git a/ot/utils.py b/ot/utils.py
index 31a002b..9eab3fc 100644
--- a/ot/utils.py
+++ b/ot/utils.py
@@ -223,6 +223,7 @@ def check_params(**kwargs):
 
 
 class deprecated(object):
+
     """Decorator to mark a function or class as deprecated.
 
     deprecated class from scikit-learn package
@@ -320,6 +321,7 @@ def _is_deprecated(func):
 
 
 class BaseEstimator(object):
+
     """Base class for most objects in POT
     adapted from sklearn BaseEstimator class
author	Rémi Flamary <remi.flamary@gmail.com>	2018-02-20 16:11:56 +0100
committer	Rémi Flamary <remi.flamary@gmail.com>	2018-02-20 16:11:56 +0100
commit	6d9b281271167d3676538f2ef8518abea82ef9c8 (patch)
tree	7d1fae1d15a0ec70e229819a68b9f3a1ceea8f02 /ot
parent	806a406e1ca2e9ca0bfdfe0516c75865e8098205 (diff)
parent	5ff8030ce300f3d066e1edba2b36e60709b023b8 (diff)