summaryrefslogtreecommitdiff
path: root/ot
diff options
context:
space:
mode:
authorRémi Flamary <remi.flamary@gmail.com>2018-02-20 16:11:56 +0100
committerRémi Flamary <remi.flamary@gmail.com>2018-02-20 16:11:56 +0100
commit6d9b281271167d3676538f2ef8518abea82ef9c8 (patch)
tree7d1fae1d15a0ec70e229819a68b9f3a1ceea8f02 /ot
parent806a406e1ca2e9ca0bfdfe0516c75865e8098205 (diff)
parent5ff8030ce300f3d066e1edba2b36e60709b023b8 (diff)
Merge branch 'master' of github.com:rflamary/POT
Diffstat (limited to 'ot')
-rw-r--r--ot/__init__.py7
-rw-r--r--ot/da.py26
-rw-r--r--ot/gpu/__init__.py2
-rw-r--r--ot/gpu/da.py3
-rw-r--r--ot/gromov.py554
-rw-r--r--ot/utils.py2
6 files changed, 474 insertions, 120 deletions
diff --git a/ot/__init__.py b/ot/__init__.py
index a5df43d..1500e59 100644
--- a/ot/__init__.py
+++ b/ot/__init__.py
@@ -16,7 +16,6 @@ from . import bregman
from . import optim
from . import utils
from . import datasets
-from . import plot
from . import da
from . import gromov
@@ -24,7 +23,6 @@ from . import gromov
from .lp import emd, emd2
from .bregman import sinkhorn, sinkhorn2, barycenter
from .da import sinkhorn_lpl1_mm
-from .gromov import gromov_wasserstein, gromov_wasserstein2
# utils functions
from .utils import dist, unif, tic, toc, toq
@@ -32,6 +30,5 @@ from .utils import dist, unif, tic, toc, toq
__version__ = "0.4.0"
__all__ = ["emd", "emd2", "sinkhorn", "sinkhorn2", "utils", 'datasets',
- 'bregman', 'lp', 'plot', 'tic', 'toc', 'toq',
- 'dist', 'unif', 'barycenter', 'sinkhorn_lpl1_mm', 'da', 'optim',
- 'gromov_wasserstein','gromov_wasserstein2']
+ 'bregman', 'lp', 'tic', 'toc', 'toq', 'gromov',
+ 'dist', 'unif', 'barycenter', 'sinkhorn_lpl1_mm', 'da', 'optim']
diff --git a/ot/da.py b/ot/da.py
index 1d3d0ba..c688654 100644
--- a/ot/da.py
+++ b/ot/da.py
@@ -330,17 +330,17 @@ def joint_OT_mapping_linear(xs, xt, mu=1, eta=0.001, bias=False, verbose=False,
if bias:
xs1 = np.hstack((xs, np.ones((ns, 1))))
xstxs = xs1.T.dot(xs1)
- I = np.eye(d + 1)
- I[-1] = 0
- I0 = I[:, :-1]
+ Id = np.eye(d + 1)
+ Id[-1] = 0
+ I0 = Id[:, :-1]
def sel(x):
return x[:-1, :]
else:
xs1 = xs
xstxs = xs1.T.dot(xs1)
- I = np.eye(d)
- I0 = I
+ Id = np.eye(d)
+ I0 = Id
def sel(x):
return x
@@ -361,7 +361,7 @@ def joint_OT_mapping_linear(xs, xt, mu=1, eta=0.001, bias=False, verbose=False,
def solve_L(G):
""" solve L problem with fixed G (least square)"""
xst = ns * G.dot(xt)
- return np.linalg.solve(xstxs + eta * I, xs1.T.dot(xst) + eta * I0)
+ return np.linalg.solve(xstxs + eta * Id, xs1.T.dot(xst) + eta * I0)
def solve_G(L, G0):
"""Update G with CG algorithm"""
@@ -520,8 +520,8 @@ def joint_OT_mapping_kernel(xs, xt, mu=1, eta=0.001, kerneltype='gaussian',
K = kernel(xs, xs, method=kerneltype, sigma=sigma)
if bias:
K1 = np.hstack((K, np.ones((ns, 1))))
- I = np.eye(ns + 1)
- I[-1] = 0
+ Id = np.eye(ns + 1)
+ Id[-1] = 0
Kp = np.eye(ns + 1)
Kp[:ns, :ns] = K
@@ -535,14 +535,14 @@ def joint_OT_mapping_kernel(xs, xt, mu=1, eta=0.001, kerneltype='gaussian',
else:
K1 = K
- I = np.eye(ns)
+ Id = np.eye(ns)
# ls regul
# K0 = K1.T.dot(K1)+eta*I
# Kreg=I
# proper kernel ridge
- K0 = K + eta * I
+ K0 = K + eta * Id
Kreg = K
if log:
@@ -933,6 +933,7 @@ def distribution_estimation_uniform(X):
class BaseTransport(BaseEstimator):
+
"""Base class for OTDA objects
Notes
@@ -1180,6 +1181,7 @@ class BaseTransport(BaseEstimator):
class SinkhornTransport(BaseTransport):
+
"""Domain Adapatation OT method based on Sinkhorn Algorithm
Parameters
@@ -1289,6 +1291,7 @@ class SinkhornTransport(BaseTransport):
class EMDTransport(BaseTransport):
+
"""Domain Adapatation OT method based on Earth Mover's Distance
Parameters
@@ -1377,6 +1380,7 @@ class EMDTransport(BaseTransport):
class SinkhornLpl1Transport(BaseTransport):
+
"""Domain Adapatation OT method based on sinkhorn algorithm +
LpL1 class regularization.
@@ -1486,6 +1490,7 @@ class SinkhornLpl1Transport(BaseTransport):
class SinkhornL1l2Transport(BaseTransport):
+
"""Domain Adapatation OT method based on sinkhorn algorithm +
l1l2 class regularization.
@@ -1608,6 +1613,7 @@ class SinkhornL1l2Transport(BaseTransport):
class MappingTransport(BaseEstimator):
+
"""MappingTransport: DA methods that aims at jointly estimating a optimal
transport coupling and the associated mapping
diff --git a/ot/gpu/__init__.py b/ot/gpu/__init__.py
index c8f9433..a2fdd3d 100644
--- a/ot/gpu/__init__.py
+++ b/ot/gpu/__init__.py
@@ -5,7 +5,7 @@ from . import da
from .bregman import sinkhorn
# Author: Remi Flamary <remi.flamary@unice.fr>
-# Leo Gautheron <https://github.com/aje>
+# Leo Gautheron <https://github.com/aje>
#
# License: MIT License
diff --git a/ot/gpu/da.py b/ot/gpu/da.py
index 05c580f..71a485a 100644
--- a/ot/gpu/da.py
+++ b/ot/gpu/da.py
@@ -188,6 +188,7 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10,
class OTDA_GPU(OTDA):
+
def normalizeM(self, norm):
if norm == "median":
self.M_GPU.divide(float(np.median(self.M_GPU.asarray())))
@@ -204,6 +205,7 @@ class OTDA_GPU(OTDA):
class OTDA_sinkhorn(OTDA_GPU):
+
def fit(self, xs, xt, reg=1, ws=None, wt=None, norm=None, **kwargs):
cudamat.init()
xs = np.asarray(xs, dtype=np.float64)
@@ -228,6 +230,7 @@ class OTDA_sinkhorn(OTDA_GPU):
class OTDA_lpl1(OTDA_GPU):
+
def fit(self, xs, ys, xt, reg=1, eta=1, ws=None, wt=None, norm=None,
**kwargs):
cudamat.init()
diff --git a/ot/gromov.py b/ot/gromov.py
index 20bf7ee..2a23873 100644
--- a/ot/gromov.py
+++ b/ot/gromov.py
@@ -9,6 +9,7 @@ Gromov-Wasserstein transport method
# Author: Erwan Vautier <erwan.vautier@gmail.com>
# Nicolas Courty <ncourty@irisa.fr>
+# Rémi Flamary <remi.flamary@unice.fr>
#
# License: MIT License
@@ -16,40 +17,35 @@ import numpy as np
from .bregman import sinkhorn
from .utils import dist
+from .optim import cg
-def square_loss(a, b):
- """
- Returns the value of L(a,b)=(1/2)*|a-b|^2
- """
-
- return 0.5 * (a - b)**2
+def init_matrix(C1, C2, T, p, q, loss_fun='square_loss'):
+ """ Return loss matrices and tensors for Gromov-Wasserstein fast computation
-
-def kl_loss(a, b):
- """
- Returns the value of L(a,b)=a*log(a/b)-a+b
- """
-
- return a * np.log(a / b) - a + b
-
-
-def tensor_square_loss(C1, C2, T):
- """
- Returns the value of \mathcal{L}(C1,C2) \otimes T with the square loss
+ Returns the value of \mathcal{L}(C1,C2) \otimes T with the selected loss
function as the loss function of Gromow-Wasserstein discrepancy.
+ The matrices are computed as described in Proposition 1 in [12]
+
Where :
- C1 : Metric cost matrix in the source space
- C2 : Metric cost matrix in the target space
- T : A coupling between those two spaces
+ * C1 : Metric cost matrix in the source space
+ * C2 : Metric cost matrix in the target space
+ * T : A coupling between those two spaces
The square-loss function L(a,b)=(1/2)*|a-b|^2 is read as :
L(a,b) = f1(a)+f2(b)-h1(a)*h2(b) with :
- f1(a)=(a^2)/2
- f2(b)=(b^2)/2
- h1(a)=a
- h2(b)=b
+ * f1(a)=(a^2)/2
+ * f2(b)=(b^2)/2
+ * h1(a)=a
+ * h2(b)=b
+
+ The kl-loss function L(a,b)=(1/2)*|a-b|^2 is read as :
+ L(a,b) = f1(a)+f2(b)-h1(a)*h2(b) with :
+ * f1(a)=a*log(a)-a
+ * f2(b)=b
+ * h1(a)=a
+ * h2(b)=log(b)
Parameters
----------
@@ -57,66 +53,83 @@ def tensor_square_loss(C1, C2, T):
Metric cost matrix in the source space
C2 : ndarray, shape (nt, nt)
Metric costfr matrix in the target space
- T : ndarray, shape (ns, nt)
+ T : ndarray, shape (ns, nt)
Coupling between source and target spaces
+ p : ndarray, shape (ns,)
+
Returns
-------
- tens : ndarray, shape (ns, nt)
- \mathcal{L}(C1,C2) \otimes T tensor-matrix multiplication result
+
+ constC : ndarray, shape (ns, nt)
+ Constant C matrix in Eq. (6)
+ hC1 : ndarray, shape (ns, ns)
+ h1(C1) matrix in Eq. (6)
+ hC2 : ndarray, shape (nt, nt)
+ h2(C) matrix in Eq. (6)
+
+ References
+ ----------
+ .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
+
"""
- C1 = np.asarray(C1, dtype=np.float64)
- C2 = np.asarray(C2, dtype=np.float64)
- T = np.asarray(T, dtype=np.float64)
+ if loss_fun == 'square_loss':
+ def f1(a):
+ return (a**2) / 2
- def f1(a):
- return (a**2) / 2
+ def f2(b):
+ return (b**2) / 2
- def f2(b):
- return (b**2) / 2
+ def h1(a):
+ return a
- def h1(a):
- return a
+ def h2(b):
+ return b
+ elif loss_fun == 'kl_loss':
+ def f1(a):
+ return a * np.log(a + 1e-15) - a
- def h2(b):
- return b
+ def f2(b):
+ return b
- tens = -np.dot(h1(C1), T).dot(h2(C2).T)
- tens -= tens.min()
+ def h1(a):
+ return a
- return tens
+ def h2(b):
+ return np.log(b + 1e-15)
+ constC1 = np.dot(np.dot(f1(C1), p.reshape(-1, 1)),
+ np.ones(len(q)).reshape(1, -1))
+ constC2 = np.dot(np.ones(len(p)).reshape(-1, 1),
+ np.dot(q.reshape(1, -1), f2(C2).T))
+ constC = constC1 + constC2
+ hC1 = h1(C1)
+ hC2 = h2(C2)
-def tensor_kl_loss(C1, C2, T):
- """
- Returns the value of \mathcal{L}(C1,C2) \otimes T with the square loss
- function as the loss function of Gromow-Wasserstein discrepancy.
+ return constC, hC1, hC2
- Where :
- C1 : Metric cost matrix in the source space
- C2 : Metric cost matrix in the target space
- T : A coupling between those two spaces
+def tensor_product(constC, hC1, hC2, T):
+ """ Return the tensor for Gromov-Wasserstein fast computation
- The square-loss function L(a,b)=(1/2)*|a-b|^2 is read as :
- L(a,b) = f1(a)+f2(b)-h1(a)*h2(b) with :
- f1(a)=a*log(a)-a
- f2(b)=b
- h1(a)=a
- h2(b)=log(b)
+ The tensor is computed as described in Proposition 1 Eq. (6) in [12].
Parameters
----------
- C1 : ndarray, shape (ns, ns)
- Metric cost matrix in the source space
- C2 : ndarray, shape (nt, nt)
- Metric costfr matrix in the target space
- T : ndarray, shape (ns, nt)
- Coupling between source and target spaces
+ constC : ndarray, shape (ns, nt)
+ Constant C matrix in Eq. (6)
+ hC1 : ndarray, shape (ns, ns)
+ h1(C1) matrix in Eq. (6)
+ hC2 : ndarray, shape (nt, nt)
+ h2(C) matrix in Eq. (6)
+
Returns
-------
+
tens : ndarray, shape (ns, nt)
\mathcal{L}(C1,C2) \otimes T tensor-matrix multiplication result
@@ -127,27 +140,78 @@ def tensor_kl_loss(C1, C2, T):
International Conference on Machine Learning (ICML). 2016.
"""
+ A = -np.dot(hC1, T).dot(hC2.T)
+ tens = constC + A
+ # tens -= tens.min()
+ return tens
- C1 = np.asarray(C1, dtype=np.float64)
- C2 = np.asarray(C2, dtype=np.float64)
- T = np.asarray(T, dtype=np.float64)
- def f1(a):
- return a * np.log(a + 1e-15) - a
+def gwloss(constC, hC1, hC2, T):
+ """ Return the Loss for Gromov-Wasserstein
- def f2(b):
- return b
+ The loss is computed as described in Proposition 1 Eq. (6) in [12].
- def h1(a):
- return a
+ Parameters
+ ----------
+ constC : ndarray, shape (ns, nt)
+ Constant C matrix in Eq. (6)
+ hC1 : ndarray, shape (ns, ns)
+ h1(C1) matrix in Eq. (6)
+ hC2 : ndarray, shape (nt, nt)
+ h2(C) matrix in Eq. (6)
+ T : ndarray, shape (ns, nt)
+ Current value of transport matrix T
- def h2(b):
- return np.log(b + 1e-15)
+ Returns
+ -------
- tens = -np.dot(h1(C1), T).dot(h2(C2).T)
- tens -= tens.min()
+ loss : float
+ Gromov Wasserstein loss
- return tens
+ References
+ ----------
+ .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
+
+ """
+
+ tens = tensor_product(constC, hC1, hC2, T)
+
+ return np.sum(tens * T)
+
+
+def gwggrad(constC, hC1, hC2, T):
+ """ Return the gradient for Gromov-Wasserstein
+
+ The gradient is computed as described in Proposition 2 in [12].
+
+ Parameters
+ ----------
+ constC : ndarray, shape (ns, nt)
+ Constant C matrix in Eq. (6)
+ hC1 : ndarray, shape (ns, ns)
+ h1(C1) matrix in Eq. (6)
+ hC2 : ndarray, shape (nt, nt)
+ h2(C) matrix in Eq. (6)
+ T : ndarray, shape (ns, nt)
+ Current value of transport matrix T
+
+ Returns
+ -------
+
+ grad : ndarray, shape (ns, nt)
+ Gromov Wasserstein gradient
+
+ References
+ ----------
+ .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
+
+ """
+ return 2 * tensor_product(constC, hC1, hC2,
+ T) # [12] Prop. 2 misses a 2 factor
def update_square_loss(p, lambdas, T, Cs):
@@ -205,10 +269,169 @@ def update_kl_loss(p, lambdas, T, Cs):
return np.exp(np.divide(tmpsum, ppt))
-def gromov_wasserstein(C1, C2, p, q, loss_fun, epsilon,
- max_iter=1000, tol=1e-9, verbose=False, log=False):
+def gromov_wasserstein(C1, C2, p, q, loss_fun, log=False, **kwargs):
"""
- Returns the gromov-wasserstein coupling between the two measured similarity matrices
+ Returns the gromov-wasserstein transport between (C1,p) and (C2,q)
+
+ The function solves the following optimization problem:
+
+ .. math::
+ \GW_Dist = \min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
+
+ Where :
+ C1 : Metric cost matrix in the source space
+ C2 : Metric cost matrix in the target space
+ p : distribution in the source space
+ q : distribution in the target space
+ L : loss function to account for the misfit between the similarity matrices
+ H : entropy
+
+ Parameters
+ ----------
+ C1 : ndarray, shape (ns, ns)
+ Metric cost matrix in the source space
+ C2 : ndarray, shape (nt, nt)
+ Metric costfr matrix in the target space
+ p : ndarray, shape (ns,)
+ distribution in the source space
+ q : ndarray, shape (nt,)
+ distribution in the target space
+ loss_fun : string
+ loss function used for the solver either 'square_loss' or 'kl_loss'
+
+ max_iter : int, optional
+ Max number of iterations
+ tol : float, optional
+ Stop threshold on error (>0)
+ verbose : bool, optional
+ Print information along iterations
+ log : bool, optional
+ record log if True
+ **kwargs : dict
+ parameters can be directly pased to the ot.optim.cg solver
+
+ Returns
+ -------
+ T : ndarray, shape (ns, nt)
+ coupling between the two spaces that minimizes :
+ \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
+ log : dict
+ convergence information and loss
+
+ References
+ ----------
+ .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
+
+ .. [13] Mémoli, Facundo. Gromov–Wasserstein distances and the
+ metric approach to object matching. Foundations of computational
+ mathematics 11.4 (2011): 417-487.
+
+ """
+
+ T = np.eye(len(p), len(q))
+
+ constC, hC1, hC2 = init_matrix(C1, C2, T, p, q, loss_fun)
+
+ G0 = p[:, None] * q[None, :]
+
+ def f(G):
+ return gwloss(constC, hC1, hC2, G)
+
+ def df(G):
+ return gwggrad(constC, hC1, hC2, G)
+
+ if log:
+ res, log = cg(p, q, 0, 1, f, df, G0, log=True, **kwargs)
+ log['gw_dist'] = gwloss(constC, hC1, hC2, res)
+ return res, log
+ else:
+ return cg(p, q, 0, 1, f, df, G0, **kwargs)
+
+
+def gromov_wasserstein2(C1, C2, p, q, loss_fun, log=False, **kwargs):
+ """
+ Returns the gromov-wasserstein discrepancy between (C1,p) and (C2,q)
+
+ The function solves the following optimization problem:
+
+ .. math::
+ \GW_Dist = \min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
+
+ Where :
+ C1 : Metric cost matrix in the source space
+ C2 : Metric cost matrix in the target space
+ p : distribution in the source space
+ q : distribution in the target space
+ L : loss function to account for the misfit between the similarity matrices
+ H : entropy
+
+ Parameters
+ ----------
+ C1 : ndarray, shape (ns, ns)
+ Metric cost matrix in the source space
+ C2 : ndarray, shape (nt, nt)
+ Metric costfr matrix in the target space
+ p : ndarray, shape (ns,)
+ distribution in the source space
+ q : ndarray, shape (nt,)
+ distribution in the target space
+ loss_fun : string
+ loss function used for the solver either 'square_loss' or 'kl_loss'
+
+ max_iter : int, optional
+ Max number of iterations
+ tol : float, optional
+ Stop threshold on error (>0)
+ verbose : bool, optional
+ Print information along iterations
+ log : bool, optional
+ record log if True
+
+ Returns
+ -------
+ gw_dist : float
+ Gromov-Wasserstein distance
+ log : dict
+ convergence information and Coupling marix
+
+ References
+ ----------
+ .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
+
+ .. [13] Mémoli, Facundo. Gromov–Wasserstein distances and the
+ metric approach to object matching. Foundations of computational
+ mathematics 11.4 (2011): 417-487.
+
+ """
+
+ T = np.eye(len(p), len(q))
+
+ constC, hC1, hC2 = init_matrix(C1, C2, T, p, q, loss_fun)
+
+ G0 = p[:, None] * q[None, :]
+
+ def f(G):
+ return gwloss(constC, hC1, hC2, G)
+
+ def df(G):
+ return gwggrad(constC, hC1, hC2, G)
+ res, log = cg(p, q, 0, 1, f, df, G0, log=True, **kwargs)
+ log['gw_dist'] = gwloss(constC, hC1, hC2, res)
+ log['T'] = res
+ if log:
+ return log['gw_dist'], log
+ else:
+ return log['gw_dist']
+
+
+def entropic_gromov_wasserstein(C1, C2, p, q, loss_fun, epsilon,
+ max_iter=1000, tol=1e-9, verbose=False, log=False):
+ """
+ Returns the gromov-wasserstein transport between (C1,p) and (C2,q)
(C1,p) and (C2,q)
@@ -259,6 +482,13 @@ def gromov_wasserstein(C1, C2, p, q, loss_fun, epsilon,
T : ndarray, shape (ns, nt)
coupling between the two spaces that minimizes :
\sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}-\epsilon(H(T))
+
+ References
+ ----------
+ .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
+
"""
C1 = np.asarray(C1, dtype=np.float64)
@@ -266,18 +496,20 @@ def gromov_wasserstein(C1, C2, p, q, loss_fun, epsilon,
T = np.outer(p, q) # Initialization
+ constC, hC1, hC2 = init_matrix(C1, C2, T, p, q, loss_fun)
+
cpt = 0
err = 1
+ if log:
+ log = {'err': []}
+
while (err > tol and cpt < max_iter):
Tprev = T
- if loss_fun == 'square_loss':
- tens = tensor_square_loss(C1, C2, T)
-
- elif loss_fun == 'kl_loss':
- tens = tensor_kl_loss(C1, C2, T)
+ # compute the gradient
+ tens = gwggrad(constC, hC1, hC2, T)
T = sinkhorn(p, q, tens, epsilon)
@@ -298,15 +530,16 @@ def gromov_wasserstein(C1, C2, p, q, loss_fun, epsilon,
cpt += 1
if log:
+ log['gw_dist'] = gwloss(constC, hC1, hC2, T)
return T, log
else:
return T
-def gromov_wasserstein2(C1, C2, p, q, loss_fun, epsilon,
- max_iter=1000, tol=1e-9, verbose=False, log=False):
+def entropic_gromov_wasserstein2(C1, C2, p, q, loss_fun, epsilon,
+ max_iter=1000, tol=1e-9, verbose=False, log=False):
"""
- Returns the gromov-wasserstein discrepancy between the two measured similarity matrices
+ Returns the entropic gromov-wasserstein discrepancy between the two measured similarity matrices
(C1,p) and (C2,q)
@@ -350,29 +583,28 @@ def gromov_wasserstein2(C1, C2, p, q, loss_fun, epsilon,
-------
gw_dist : float
Gromov-Wasserstein distance
- """
- if log:
- gw, logv = gromov_wasserstein(
- C1, C2, p, q, loss_fun, epsilon, max_iter, tol, verbose, log)
- else:
- gw = gromov_wasserstein(C1, C2, p, q, loss_fun,
- epsilon, max_iter, tol, verbose, log)
+ References
+ ----------
+ .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
- if loss_fun == 'square_loss':
- gw_dist = np.sum(gw * tensor_square_loss(C1, C2, gw))
+ """
- elif loss_fun == 'kl_loss':
- gw_dist = np.sum(gw * tensor_kl_loss(C1, C2, gw))
+ gw, logv = entropic_gromov_wasserstein(
+ C1, C2, p, q, loss_fun, epsilon, max_iter, tol, verbose, log=True)
+
+ log['T'] = gw
if log:
- return gw_dist, logv
+ return logv['gw_dist'], logv
else:
- return gw_dist
+ return logv['gw_dist']
-def gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon,
- max_iter=1000, tol=1e-9, verbose=False, log=False, init_C=None):
+def entropic_gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon,
+ max_iter=1000, tol=1e-9, verbose=False, log=False, init_C=None):
"""
Returns the gromov-wasserstein barycenters of S measured similarity matrices
@@ -421,6 +653,120 @@ def gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon,
-------
C : ndarray, shape (N, N)
Similarity matrix in the barycenter space (permutated arbitrarily)
+
+ References
+ ----------
+ .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
+
+ """
+
+ S = len(Cs)
+
+ Cs = [np.asarray(Cs[s], dtype=np.float64) for s in range(S)]
+ lambdas = np.asarray(lambdas, dtype=np.float64)
+
+ # Initialization of C : random SPD matrix (if not provided by user)
+ if init_C is None:
+ xalea = np.random.randn(N, 2)
+ C = dist(xalea, xalea)
+ C /= C.max()
+ else:
+ C = init_C
+
+ cpt = 0
+ err = 1
+
+ error = []
+
+ while(err > tol and cpt < max_iter):
+ Cprev = C
+
+ T = [entropic_gromov_wasserstein(Cs[s], C, ps[s], p, loss_fun, epsilon,
+ max_iter, 1e-5, verbose, log) for s in range(S)]
+ if loss_fun == 'square_loss':
+ C = update_square_loss(p, lambdas, T, Cs)
+
+ elif loss_fun == 'kl_loss':
+ C = update_kl_loss(p, lambdas, T, Cs)
+
+ if cpt % 10 == 0:
+ # we can speed up the process by checking for the error only all
+ # the 10th iterations
+ err = np.linalg.norm(C - Cprev)
+ error.append(err)
+
+ if log:
+ log['err'].append(err)
+
+ if verbose:
+ if cpt % 200 == 0:
+ print('{:5s}|{:12s}'.format(
+ 'It.', 'Err') + '\n' + '-' * 19)
+ print('{:5d}|{:8e}|'.format(cpt, err))
+
+ cpt += 1
+
+ return C
+
+
+def gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun,
+ max_iter=1000, tol=1e-9, verbose=False, log=False, init_C=None):
+ """
+ Returns the gromov-wasserstein barycenters of S measured similarity matrices
+
+ (Cs)_{s=1}^{s=S}
+
+ The function solves the following optimization problem with block
+ coordinate descent:
+
+ .. math::
+ C = argmin_C\in R^NxN \sum_s \lambda_s GW(C,Cs,p,ps)
+
+
+ Where :
+
+ Cs : metric cost matrix
+ ps : distribution
+
+ Parameters
+ ----------
+ N : Integer
+ Size of the targeted barycenter
+ Cs : list of S np.ndarray(ns,ns)
+ Metric cost matrices
+ ps : list of S np.ndarray(ns,)
+ sample weights in the S spaces
+ p : ndarray, shape(N,)
+ weights in the targeted barycenter
+ lambdas : list of float
+ list of the S spaces' weights
+ loss_fun : tensor-matrix multiplication function based on specific loss function
+ update : function(p,lambdas,T,Cs) that updates C according to a specific Kernel
+ with the S Ts couplings calculated at each iteration
+ max_iter : int, optional
+ Max number of iterations
+ tol : float, optional
+ Stop threshol on error (>0)
+ verbose : bool, optional
+ Print information along iterations
+ log : bool, optional
+ record log if True
+ init_C : bool, ndarray, shape(N,N)
+ random initial value for the C matrix provided by user
+
+ Returns
+ -------
+ C : ndarray, shape (N, N)
+ Similarity matrix in the barycenter space (permutated arbitrarily)
+
+ References
+ ----------
+ .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
+
"""
S = len(Cs)
@@ -444,8 +790,8 @@ def gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon,
while(err > tol and cpt < max_iter):
Cprev = C
- T = [gromov_wasserstein(Cs[s], C, ps[s], p, loss_fun, epsilon,
- max_iter, 1e-5, verbose, log) for s in range(S)]
+ T = [gromov_wasserstein(Cs[s], C, ps[s], p, loss_fun,
+ numItermax=max_iter, stopThr=1e-5, verbose=verbose, log=log) for s in range(S)]
if loss_fun == 'square_loss':
C = update_square_loss(p, lambdas, T, Cs)
diff --git a/ot/utils.py b/ot/utils.py
index 31a002b..9eab3fc 100644
--- a/ot/utils.py
+++ b/ot/utils.py
@@ -223,6 +223,7 @@ def check_params(**kwargs):
class deprecated(object):
+
"""Decorator to mark a function or class as deprecated.
deprecated class from scikit-learn package
@@ -320,6 +321,7 @@ def _is_deprecated(func):
class BaseEstimator(object):
+
"""Base class for most objects in POT
adapted from sklearn BaseEstimator class